Skip to content

Commit d4d61c7

Browse files
arijitADtimwu20
authored andcommitted
feat(cmd): implement offline pruning of state trie (ChainSafe#1564)
1 parent 88d303e commit d4d61c7

File tree

18 files changed

+557
-32
lines changed

18 files changed

+557
-32
lines changed

cmd/gossamer/flags.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,29 @@ var (
264264
}
265265
)
266266

267+
// State Prune flags
268+
var (
269+
// BloomFilterSizeFlag size for bloom filter, valid for the use with prune-state subcommand
270+
BloomFilterSizeFlag = cli.IntFlag{
271+
Name: "bloom-size",
272+
Usage: "Megabytes of memory allocated to bloom-filter for pruning",
273+
Value: 2048,
274+
}
275+
276+
// DBPathFlag data directory for pruned DB, valid for the use with prune-state subcommand
277+
DBPathFlag = cli.StringFlag{
278+
Name: "pruned-db-path",
279+
Usage: "Data directory for the output DB",
280+
}
281+
282+
// RetainBlockNumberFlag retain number of block from latest block while pruning, valid for the use with prune-state subcommand
283+
RetainBlockNumberFlag = cli.IntFlag{
284+
Name: "retain-blocks",
285+
Usage: "Retain number of block from latest block while pruning",
286+
Value: 256,
287+
}
288+
)
289+
267290
// flag sets that are shared by multiple commands
268291
var (
269292
// GlobalFlags are flags that are valid for use with the root command and all subcommands
@@ -276,6 +299,9 @@ var (
276299
CPUProfFlag,
277300
MemProfFlag,
278301
RewindFlag,
302+
DBPathFlag,
303+
BloomFilterSizeFlag,
304+
RetainBlockNumberFlag,
279305
}
280306

281307
// StartupFlags are flags that are valid for use with the root command and the export subcommand
@@ -354,6 +380,14 @@ var (
354380
HeaderFlag,
355381
FirstSlotFlag,
356382
}
383+
384+
PruningFlags = []cli.Flag{
385+
ChainFlag,
386+
ConfigFlag,
387+
DBPathFlag,
388+
BloomFilterSizeFlag,
389+
RetainBlockNumberFlag,
390+
}
357391
)
358392

359393
// FixFlagOrder allow us to use various flag order formats (ie, `gossamer init

cmd/gossamer/main.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"os"
2323

2424
"github.com/ChainSafe/gossamer/dot"
25+
"github.com/ChainSafe/gossamer/dot/state"
2526
"github.com/ChainSafe/gossamer/lib/keystore"
2627
"github.com/ChainSafe/gossamer/lib/utils"
2728
log "github.com/ChainSafe/log15"
@@ -35,6 +36,7 @@ const (
3536
buildSpecCommandName = "build-spec"
3637
importRuntimeCommandName = "import-runtime"
3738
importStateCommandName = "import-state"
39+
pruningStateCommandName = "prune-state"
3840
)
3941

4042
// app is the cli application
@@ -115,6 +117,18 @@ var (
115117
"Input can be generated by using the RPC function state_getPairs.\n" +
116118
"\tUsage: gossamer import-state --state state.json --header header.json --first-slot <first slot of network>\n",
117119
}
120+
121+
pruningCommand = cli.Command{
122+
Action: FixFlagOrder(pruneState),
123+
Name: pruningStateCommandName,
124+
Usage: "Prune state will prune the state trie",
125+
ArgsUsage: "",
126+
Flags: PruningFlags,
127+
Description: `prune-state <retain-blocks> will prune historical state data.
128+
All trie nodes that do not belong to the specified version state will be deleted from the database.
129+
130+
The default pruning target is the HEAD-256 state`,
131+
}
118132
)
119133

120134
// init initialises the cli application
@@ -132,6 +146,7 @@ func init() {
132146
buildSpecCommand,
133147
importRuntimeCommand,
134148
importStateCommand,
149+
pruningCommand,
135150
}
136151
app.Flags = RootFlags
137152
}
@@ -411,3 +426,39 @@ func buildSpecAction(ctx *cli.Context) error {
411426

412427
return nil
413428
}
429+
430+
func pruneState(ctx *cli.Context) error {
431+
tomlCfg, _, err := setupConfigFromChain(ctx)
432+
if err != nil {
433+
logger.Error("failed to load chain configuration", "error", err)
434+
return err
435+
}
436+
437+
inputDBPath := tomlCfg.Global.BasePath
438+
prunedDBPath := ctx.GlobalString(DBPathFlag.Name)
439+
if prunedDBPath == "" {
440+
return fmt.Errorf("path not specified for badger db")
441+
}
442+
443+
bloomSize := ctx.GlobalUint64(BloomFilterSizeFlag.Name)
444+
retainBlocks := ctx.GlobalInt64(RetainBlockNumberFlag.Name)
445+
446+
pruner, err := state.NewPruner(inputDBPath, prunedDBPath, bloomSize, retainBlocks)
447+
if err != nil {
448+
return err
449+
}
450+
451+
logger.Info("Pruner initialised")
452+
453+
err = pruner.SetBloomFilter()
454+
if err != nil {
455+
return fmt.Errorf("failed to set keys into bloom filter %w", err)
456+
}
457+
458+
err = pruner.Prune()
459+
if err != nil {
460+
return fmt.Errorf("failed to prune %w", err)
461+
}
462+
463+
return nil
464+
}

cmd/gossamer/prune_test.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"testing"
7+
8+
"github.com/dgraph-io/badger/v2"
9+
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
func iterateDB(db *badger.DB, cb func(*badger.Item)) {
14+
txn := db.NewTransaction(false)
15+
itr := txn.NewIterator(badger.DefaultIteratorOptions)
16+
17+
for itr.Rewind(); itr.Valid(); itr.Next() {
18+
cb(itr.Item())
19+
}
20+
}
21+
func runPruneCmd(t *testing.T, configFile, prunedDBPath string) {
22+
ctx, err := newTestContext(
23+
"Test state trie offline pruning --prune-state",
24+
[]string{"config", "pruned-db-path", "bloom-size", "retain-blocks"},
25+
[]interface{}{configFile, prunedDBPath, "256", "5"},
26+
)
27+
if err != nil {
28+
t.Fatal(err)
29+
}
30+
31+
command := pruningCommand
32+
err = command.Run(ctx)
33+
if err != nil {
34+
t.Fatal(err)
35+
}
36+
}
37+
38+
func TestPruneState(t *testing.T) {
39+
var (
40+
inputDBPath = "../../tests/data/db"
41+
configFile = "../../tests/data/db/config.toml"
42+
prunedDBPath = fmt.Sprintf("%s/%s", t.TempDir(), "pruned")
43+
storagePrefix = "storage"
44+
)
45+
46+
inputDB, err := badger.Open(badger.DefaultOptions(inputDBPath).WithReadOnly(true))
47+
require.NoError(t, err)
48+
49+
nonStorageKeys := make(map[string]interface{})
50+
var numStorageKeys int
51+
52+
getKeysInputDB := func(item *badger.Item) {
53+
key := string(item.Key())
54+
if strings.HasPrefix(key, storagePrefix) {
55+
numStorageKeys++
56+
return
57+
}
58+
nonStorageKeys[key] = nil
59+
}
60+
iterateDB(inputDB, getKeysInputDB)
61+
62+
err = inputDB.Close()
63+
require.NoError(t, err)
64+
65+
t.Log("Total keys in input DB", numStorageKeys+len(nonStorageKeys), "storage keys", numStorageKeys)
66+
67+
t.Log("pruned DB path", prunedDBPath)
68+
69+
runPruneCmd(t, configFile, prunedDBPath)
70+
71+
prunedDB, err := badger.Open(badger.DefaultOptions(prunedDBPath))
72+
require.NoError(t, err)
73+
74+
nonStorageKeysPruned := make(map[string]interface{})
75+
var numStorageKeysPruned int
76+
77+
getKeysPrunedDB := func(item *badger.Item) {
78+
key := string(item.Key())
79+
if strings.HasPrefix(key, storagePrefix) {
80+
numStorageKeysPruned++
81+
return
82+
}
83+
nonStorageKeysPruned[key] = nil
84+
}
85+
iterateDB(prunedDB, getKeysPrunedDB)
86+
87+
t.Log("Total keys in pruned DB", len(nonStorageKeysPruned)+numStorageKeysPruned, "storage keys", numStorageKeysPruned)
88+
require.Equal(t, len(nonStorageKeysPruned), len(nonStorageKeys))
89+
90+
// Check all non storage keys are present.
91+
for k := range nonStorageKeys {
92+
_, ok := nonStorageKeysPruned[k]
93+
require.True(t, ok)
94+
}
95+
96+
err = prunedDB.Close()
97+
require.NoError(t, err)
98+
}

cmd/gossamer/utils.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828

2929
"github.com/ChainSafe/gossamer/dot"
3030
"github.com/ChainSafe/gossamer/lib/utils"
31-
3231
log "github.com/ChainSafe/log15"
3332
"github.com/stretchr/testify/require"
3433
"github.com/urfave/cli"

dot/network/message_cache_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ func TestMessageCache(t *testing.T) {
4242
ok = msgCache.exists(peerID, msg)
4343
require.True(t, ok)
4444

45-
time.Sleep(50 * time.Millisecond)
45+
// TODO: Cache has issues with timeout. https://discuss.dgraph.io/t/setwithttl-doesnt-work/14192
46+
time.Sleep(3 * time.Second)
4647

4748
ok = msgCache.exists(peerID, msg)
4849
require.False(t, ok)

dot/state/bloom.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package state
2+
3+
import (
4+
"encoding/binary"
5+
"errors"
6+
7+
"github.com/ChainSafe/gossamer/lib/common"
8+
log "github.com/ChainSafe/log15"
9+
bloomfilter "github.com/holiman/bloomfilter/v2"
10+
)
11+
12+
// ErrKeySize is returned when key size does not fit
13+
var ErrKeySize = errors.New("cannot have nil keystore")
14+
15+
type bloomStateHasher []byte
16+
17+
func (f bloomStateHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
18+
func (f bloomStateHasher) Sum(b []byte) []byte { panic("not implemented") }
19+
func (f bloomStateHasher) Reset() { panic("not implemented") }
20+
func (f bloomStateHasher) BlockSize() int { panic("not implemented") }
21+
func (f bloomStateHasher) Size() int { return 8 }
22+
func (f bloomStateHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }
23+
24+
// bloomState is a wrapper for bloom filter.
25+
// The keys of all generated entries will be recorded here so that in the pruning
26+
// stage the entries belong to the specific version can be avoided for deletion.
27+
type bloomState struct {
28+
bloom *bloomfilter.Filter
29+
}
30+
31+
// newBloomState creates a brand new state bloom for state generation
32+
// The bloom filter will be created by the passing bloom filter size. the parameters
33+
// are picked so that the false-positive rate for mainnet is low enough.
34+
func newBloomState(size uint64) (*bloomState, error) {
35+
bloom, err := bloomfilter.New(size*1024*1024*8, 4)
36+
if err != nil {
37+
return nil, err
38+
}
39+
log.Info("initialised state bloom", "size", float64(bloom.M()/8))
40+
return &bloomState{bloom: bloom}, nil
41+
}
42+
43+
// put writes key to bloom filter
44+
func (sb *bloomState) put(key []byte) error {
45+
if len(key) != common.HashLength {
46+
return ErrKeySize
47+
}
48+
49+
sb.bloom.Add(bloomStateHasher(key))
50+
return nil
51+
}
52+
53+
// contain is the wrapper of the underlying contains function which
54+
// reports whether the key is contained.
55+
// - If it says yes, the key may be contained
56+
// - If it says no, the key is definitely not contained.
57+
func (sb *bloomState) contain(key []byte) bool {
58+
return sb.bloom.Contains(bloomStateHasher(key))
59+
}

0 commit comments

Comments
 (0)