-
Notifications
You must be signed in to change notification settings - Fork 213
Add pytorch style dataloader #463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 20 commits
Commits
Show all changes
45 commits
Select commit
Hold shift + click to select a range
7360f91
Add pytorch style dataloader
dayo05 3de29c3
Remove GetDataEnumerable from interface
dayo05 857f9e6
Resolve review except get random value
dayo05 954fa8c
Rename method and create reset method
dayo05 63568f9
Add copyright string
dayo05 d8d249f
Use new shuffle algorithm
dayo05 ab2eb09
Add summery
dayo05 ca49340
Make able to create non-shuffle dataloader
dayo05 2cc2300
Make able to create non-shuffle dataset
dayo05 4cff460
Change tensor tuple to dictionary
dayo05 124790d
Merge branch 'main' into main
dayo05 17a9022
Replace files and make dataset abstract class
dayo05 d9dddb8
Merge remote-tracking branch 'origin/main'
dayo05 07d2689
Merge branch 'dotnet:main' into main
dayo05 2315e66
Merge branch 'main' into main
dayo05 ab6bd3e
Make dataloader disposable
dayo05 9631ed6
Make count priority abstract
dayo05 43dfcc1
Make dataloader to stack data as end of tensor
dayo05 225b9f8
Create simple test for dataset and dataloader
dayo05 a44ba7d
Merge remote-tracking branch 'origin/main'
dayo05 6e336ca
Make dispose enumerator
dayo05 5b5d9d5
Rename methods and add copyright notice
dayo05 49d7afd
Rename reset to Reset
dayo05 b250fe0
Make Count of dataset to long type
dayo05 703cb01
Make type of Count to long
dayo05 65f06cf
Make Count to long
dayo05 5f1707f
Rename methods
dayo05 3bda582
Make move tensor automatically to device
dayo05 22d0556
Make able to use custom seed
dayo05 eb53d61
Edit test for long
dayo05 921bb3d
Create test for custom seed
dayo05 8733f10
Merge branch 'main' into main
dayo05 168f87c
Make dataloader tensor dispose on MoveNext or Reset
dayo05 4db9b64
Change GCD algorithm
dayo05 5315175
Merge branch 'dotnet:main' into main
dayo05 eab28eb
Added document comments
dayo05 0deaddb
Add document comment for classes
dayo05 e9c20a4
Make catenate every tensor once
dayo05 cc5dfe0
Update doc comment
dayo05 dfff08a
Make able to set custom shuffler
dayo05 9dffab6
Fix mistake on creating custom shuffler
dayo05 991c377
Add fisher yates shuffler and make that as default
dayo05 2efce39
Fix mistake on shuffler
dayo05 00e16ad
Make dispose dataset once
dayo05 9291dc4
Undo changes on global.json
dayo05 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. | ||
|
||
using System; | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
|
||
using TorchSharp.Utils; | ||
|
||
namespace TorchSharp | ||
{ | ||
public static partial class torch | ||
{ | ||
public static partial class utils | ||
{ | ||
public static partial class data | ||
{ | ||
public class DataLoader : IEnumerable<Dictionary<string, Tensor>>, IDisposable | ||
{ | ||
private Dataset dataset; | ||
private int batchSize; | ||
private bool shuffle; | ||
private Device device; | ||
|
||
/// <summary> | ||
/// Create pytorch style dataloader | ||
/// </summary> | ||
/// <param name="dataset"></param> | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
/// <param name="batchSize"></param> | ||
/// <param name="shuffle"></param> | ||
/// <param name="device"></param> | ||
public DataLoader(Dataset dataset, int batchSize, bool shuffle = false, Device device = null) | ||
{ | ||
this.dataset = dataset; | ||
this.batchSize = batchSize; | ||
this.shuffle = shuffle; | ||
this.device = device ?? CPU; | ||
} | ||
|
||
public IEnumerator<Dictionary<string, Tensor>> GetEnumerator() => | ||
new DataLoaderEnumerator(dataset, batchSize, shuffle, device); | ||
|
||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); | ||
|
||
public long Count => (dataset.Count - 1) / batchSize + 1; | ||
|
||
private class DataLoaderEnumerator : IEnumerator<Dictionary<string, Tensor>> | ||
{ | ||
private Dataset dataset; | ||
private int batchSize; | ||
private Device device; | ||
dayo05 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
private bool shuffle; | ||
private ShuffleGenerator shuffleGenerator; | ||
private int currentVal = 0; | ||
|
||
public DataLoaderEnumerator(Dataset dataset, int batchSize, bool shuffle, Device device) | ||
{ | ||
this.dataset = dataset; | ||
this.batchSize = batchSize; | ||
this.device = device; | ||
this.shuffle = shuffle; | ||
reset(); | ||
} | ||
|
||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
private bool isFinished() => | ||
shuffle ? !shuffleGenerator.hasNext() : currentVal >= dataset.Count; | ||
|
||
private int getNextValue() => shuffle ? shuffleGenerator.next() : currentVal++; | ||
|
||
private void reset() | ||
{ | ||
shuffleGenerator = new ShuffleGenerator(dataset.Count); | ||
currentVal = 0; | ||
} | ||
|
||
public bool MoveNext() | ||
dayo05 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
if (isFinished()) return false; | ||
Current = dataset.GetTensor(getNextValue()); | ||
var currentKeys = Current.Keys; | ||
foreach (var x in currentKeys) | ||
Current[x].unsqueeze_(0); | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
Dictionary<string, Tensor> dic; | ||
for (var i = 1; i < batchSize; i++) { | ||
if (isFinished()) | ||
break; | ||
dic = dataset.GetTensor(getNextValue()); | ||
foreach (var x in currentKeys) | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
Current[x] = cat(new List<Tensor>() {Current[x], dic[x].unsqueeze(0)}, 0); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
public void Reset() => reset(); | ||
|
||
public Dictionary<string, Tensor> Current { get; private set; } | ||
|
||
object IEnumerator.Current => Current; | ||
|
||
public void Dispose() | ||
{ | ||
dataset.Dispose(); | ||
} | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
dataset.Dispose(); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. | ||
using System; | ||
using System.Collections.Generic; | ||
|
||
namespace TorchSharp | ||
{ | ||
public static partial class torch | ||
{ | ||
public static partial class utils | ||
{ | ||
public static partial class data | ||
{ | ||
public abstract class Dataset : IDisposable | ||
dayo05 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
public virtual void Dispose() | ||
{ | ||
} | ||
|
||
public abstract int Count { get; } | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
public abstract Dictionary<string, Tensor> GetTensor(int index); | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
using System; | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
namespace TorchSharp.Utils | ||
{ | ||
public class ShuffleGenerator | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
{ | ||
int maxrange; | ||
int prime; | ||
int index; | ||
int offset; | ||
int runningvalue; | ||
|
||
public ShuffleGenerator(int size) | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
{ | ||
var min = size / 2; | ||
maxrange = size; | ||
prime = selectCoPrimeResev(min, size); | ||
offset = new Random().Next(size); | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
index = 0; | ||
runningvalue = offset; | ||
} | ||
|
||
private int getCurrentValue() | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
{ | ||
return (int) (((long) index * prime + offset) % (maxrange)); | ||
} | ||
|
||
public bool hasNext() | ||
{ | ||
return index < maxrange; | ||
} | ||
|
||
public int next() | ||
{ | ||
runningvalue += prime; | ||
if (runningvalue >= maxrange) runningvalue -= maxrange; | ||
index++; | ||
return runningvalue; | ||
} | ||
|
||
private const int MAX_COUNT = int.MaxValue; | ||
|
||
static int selectCoPrimeResev(int min, int target) | ||
{ | ||
var count = 0; | ||
var selected = 0; | ||
var rand = new Random(); | ||
for (var val = min; val < target; ++val) { | ||
if (coprime(val, target)) { | ||
count += 1; | ||
if ((count == 1) || (rand.Next(count) < 1)) { | ||
selected = val; | ||
} | ||
} | ||
|
||
if (count == MAX_COUNT) return val; | ||
} | ||
|
||
return selected; | ||
} | ||
|
||
static bool coprime(int u, int v) => gcd(u, v) == 1; | ||
|
||
static int gcd(int u, int v) | ||
dayo05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
{ | ||
int shift; | ||
if (u == 0) return v; | ||
if (v == 0) return u; | ||
for (shift = 0; ((u | v) & 1) == 0; ++shift) { | ||
u >>= 1; | ||
v >>= 1; | ||
} | ||
|
||
while ((u & 1) == 0) | ||
u >>= 1; | ||
|
||
do { | ||
while ((v & 1) == 0) | ||
v >>= 1; | ||
if (u > v) | ||
(v, u) = (u, v); | ||
|
||
v -= u; | ||
} while (v != 0); | ||
|
||
return u << shift; | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Xunit; | ||
|
||
namespace TorchSharp | ||
{ | ||
public class TestDataLoader | ||
{ | ||
private class TestDataset : torch.utils.data.Dataset | ||
{ | ||
public override int Count { get; } = 10; | ||
public override Dictionary<string, torch.Tensor> GetTensor(int index) | ||
{ | ||
return new() {{"data", torch.tensor(1)}, {"label", torch.tensor(13)}, {"index", torch.tensor(index)}}; | ||
} | ||
} | ||
|
||
[Fact] | ||
public void DatasetTest() | ||
{ | ||
using var dataset = new TestDataset(); | ||
var d = dataset.GetTensor(0); | ||
Assert.True(d.ContainsKey("data")); | ||
Assert.True(d.ContainsKey("index")); | ||
Assert.True(d.ContainsKey("label")); | ||
|
||
Assert.Equal(d["data"], torch.tensor(1)); | ||
Assert.Equal(d["label"], torch.tensor(13)); | ||
Assert.Equal(d["index"], torch.tensor(0)); | ||
} | ||
|
||
[Fact] | ||
public void DataLoaderTest() | ||
{ | ||
using var dataset = new TestDataset(); | ||
using var dataloader = new torch.utils.data.DataLoader(dataset, 2, false, torch.CPU); | ||
var iterator = dataloader.GetEnumerator(); | ||
iterator.MoveNext(); | ||
Assert.Equal(iterator.Current["data"], torch.tensor(rawArray: new[]{1, 1}, dimensions: new[]{2L})); | ||
Assert.Equal(iterator.Current["label"], torch.tensor(rawArray: new[]{13, 13}, dimensions: new[]{2L})); | ||
Assert.Equal(iterator.Current["index"], torch.tensor(rawArray: new[]{0, 1}, dimensions: new[]{2L})); | ||
} | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.