-
Notifications
You must be signed in to change notification settings - Fork 581
Open
Description
When I generate a large amount of data using FasterKV (with the number of keys exceeding 300 million), and then save a checkpoint and load it again, an error occurs, and I can no longer recover the original data. What could be the issue?
Here is the source code:
using FASTER.core;
using System.Diagnostics;
class Program
{
const int DataSize = 300000000;
//const int DataSize = 100000;
static async Task Main()
{
CleanData();
await CreateLargeData();
await LoadDataAndVerify();
}
private static void CleanData()
{
var sw = Stopwatch.StartNew();
Console.WriteLine("Cleaning data ...");
if (Directory.Exists("./data"))
{
Directory.Delete("./data", true);
}
sw.Stop();
Console.WriteLine($" - Cleaning data done. (Elapsed: {sw.Elapsed.TotalSeconds:f2}s)");
}
private static FasterKV<long, long> CreateStore(bool tryRecoverLatest = false)
{
var log = Devices.CreateLogDevice("./data/hlog.log");
var objlog = Devices.CreateLogDevice("./data/hlog.obj.log");
var store = new FasterKV<long, long>(
// 1L << 20,
1L << 18,
new LogSettings { LogDevice = log, ObjectLogDevice = objlog },
new CheckpointSettings{ CheckpointDir = "./data/checkpoint" },
tryRecoverLatest: tryRecoverLatest);
return store;
}
// Generate a large amount of data
private static async Task CreateLargeData()
{
Console.WriteLine($"Creating large data: size:{DataSize} ...");
// Create FasterKV instance
var store = CreateStore();
using var session = store.NewSession(new SimpleFunctions<long, long>());
// Write data
var sw = Stopwatch.StartNew();
var lastShowProgress = 0;
for (long i = 0; i < DataSize; i++)
{
session.Upsert(i, i);
var totalSeconds = (int)sw.Elapsed.TotalSeconds;
if (totalSeconds - lastShowProgress > 5)
{
// Display progress
lastShowProgress = totalSeconds;
var currentSpeed = i / sw.Elapsed.TotalSeconds;
Console.WriteLine($" - Creating progress: {i}/{DataSize} ({i * 100.0 / DataSize:f2}%) (Elapse: {totalSeconds}, Speed: {currentSpeed:f2} ops/s) ...");
}
}
// Write checkpoint
var writeCheckpointSw = Stopwatch.StartNew();
Console.WriteLine(" - Writing checkpoint ...");
await store.TakeFullCheckpointAsync(CheckpointType.Snapshot);
await session.CompletePendingAsync();
// store.Log.Flush(true);
writeCheckpointSw.Stop();
Console.WriteLine($" - Writing checkpoint done. (Elapsed: {writeCheckpointSw.Elapsed.TotalSeconds:f2}s)");
ShowStoreInfo(store);
// Dispose resources
session.Dispose();
store.Dispose();
var speed = DataSize / sw.Elapsed.TotalSeconds;
Console.WriteLine($" - All done. (Elapsed: {sw.Elapsed.TotalSeconds:f2}s, Speed: {speed:f2} ops/s)");
}
static async Task LoadDataAndVerify()
{
Console.WriteLine($"Loading data and verify: size:{DataSize} ...");
var loadSw = Stopwatch.StartNew();
var store = CreateStore(tryRecoverLatest: true);
loadSw.Stop();
Console.WriteLine($" - Loading data done. (Elapsed: {loadSw.Elapsed.TotalSeconds:f2}s)");
ShowStoreInfo(store);
using var session = store.NewSession(new SimpleFunctions<long, long>());
// Read & verify data
var sw = Stopwatch.StartNew();
var lastShowProgressTime = 0;
var lastShowErrorTime = -100;
var errorCount = 0;
for (long i = 0; i < DataSize; i++)
{
var (status, output) = (await session.ReadAsync(i)).Complete();
if (!status.Found)
{
if (sw.Elapsed.TotalSeconds - lastShowErrorTime > 5)
{
lastShowErrorTime = (int)sw.Elapsed.TotalSeconds;
Console.WriteLine($" - Error: key {i} not found. (Status: {status}) (ErrorCount: {errorCount})");
}
errorCount++;
continue;
}
if (output != i)
{
if (sw.Elapsed.TotalSeconds - lastShowErrorTime > 5)
{
lastShowErrorTime = (int)sw.Elapsed.TotalSeconds;
Console.WriteLine($" - Error: key {i} value is {output}, but expect is {i}. (ErrorCount: {errorCount})");
}
errorCount++;
}
var totalSeconds = (int)sw.Elapsed.TotalSeconds;
if (totalSeconds - lastShowProgressTime > 5)
{
// Display progress
lastShowProgressTime = totalSeconds;
var currentSpeed = i / sw.Elapsed.TotalSeconds;
Console.WriteLine($" - Creating progress: {i}/{DataSize} ({i * 100.0 / DataSize:f2}%) (Elapse: {totalSeconds}, Speed: {currentSpeed:f2} ops/s) ...");
}
}
// Write checkpoint
await store.TakeFullCheckpointAsync(CheckpointType.Snapshot);
await session.CompletePendingAsync();
ShowStoreInfo(store);
// Dispose resources
session.Dispose();
store.Dispose();
var speed = DataSize / sw.Elapsed.TotalSeconds;
Console.WriteLine($" - All done. (ErrorCount: {errorCount}, Elapsed: {sw.Elapsed.TotalSeconds:f2}s, Speed: {speed:f2} ops/s)");
}
static void ShowStoreInfo(FasterKV<long, long> store)
{
var sw = Stopwatch.StartNew();
// Get FasterKV store info
var entryCount = store.EntryCount;
var indexSize = store.IndexSize;
var overflowBucketCount = store.OverflowBucketCount;
var memorySizeBytes = store.Log.MemorySizeBytes;
sw.Stop();
Console.WriteLine(
$" - Store info: EntryCount: {entryCount}, IndexSize: {indexSize}, overflowBucketCount: {overflowBucketCount}, "
+ $"memorySizeBytes: {memorySizeBytes}, (Get info elapse: {sw.Elapsed.TotalSeconds:f2}s)"
);
}
}
Run result:
Cleaning data ...
- Cleaning data done. (Elapsed: 2.66s)
Creating large data: size:300000000 ...
- Creating progress: 8333810/300000000 (2.78%) (Elapse: 6, Speed: 1388968.26 ops/s) ...
...
- Creating progress: 299706408/300000000 (99.90%) (Elapse: 2520, Speed: 118931.11 ops/s) ...
- Writing checkpoint ...
- Writing checkpoint done. (Elapsed: 76.84s)
- Store info: EntryCount: 300000000, IndexSize: 262144, overflowBucketCount: 42710337, memorySizeBytes: 7247757312, (Get info elapse: 16.72s)
- All done. (Elapsed: 2619.05s, Speed: 114545.54 ops/s)
Loading data and verify: size:300000000 ...
- Loading data done. (Elapsed: 0.85s)
- Store info: EntryCount: 1835008, IndexSize: 262144, overflowBucketCount: 42710337, memorySizeBytes: 67108864, (Get info elapse: 0.02s)
- Error: key 0 not found. (Status: NotFound) (ErrorCount: 0)
- Error: key 8532049 not found. (Status: NotFound) (ErrorCount: 8532049)
...
- Error: key 299672862 not found. (Status: NotFound) (ErrorCount: 299672862)
- Store info: EntryCount: 1835008, IndexSize: 262144, overflowBucketCount: 42710337, memorySizeBytes: 67108864, (Get info elapse: 0.01s)
- All done. (ErrorCount: 300000000, Elapsed: 175.54s, Speed: 1708998.12 ops/s)
Note: After restoring the data, the EntryCount is noticeably much lower, and all keys cannot be found. (If DataSize = 200000000, then it is normal, with no errors.)
Metadata
Metadata
Assignees
Labels
No labels