Skip to content
8 changes: 7 additions & 1 deletion litt/cli/litt_cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ var (
Usage: "Source paths where the DB data is found, at least one is required.",
Required: true,
}
forceFlag = &cli.BoolFlag{
Name: "force",
Aliases: []string{"f"},
Usage: "Force the operation without prompting for confirmation.",
}
)

// buildCliParser creates a command line parser for the LittDB CLI tool.
Expand Down Expand Up @@ -280,9 +285,10 @@ func buildCLIParser(logger logging.Logger) *cli.App {
{
Name: "unlock",
Usage: "Manually delete LittDB lock files. Dangerous if used improperly, use with caution.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add some sort of confirmation flow where the user has to say "yes" and if they want to skip there's the force-unlock or --force flag for skipping that flow?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. You either must type I know what I am doing, or include a --force tag.

ArgsUsage: "--src <path1> ... --src <pathN>",
ArgsUsage: "--src <path1> ... --src <pathN> [--force]",
Flags: []cli.Flag{
srcFlag,
forceFlag,
},
Action: unlockCommand,
},
Expand Down
20 changes: 20 additions & 0 deletions litt/cli/unlock.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package main

import (
"bufio"
"fmt"
"os"
"strings"

"github.com/Layr-Labs/eigenda/common"
"github.com/Layr-Labs/eigenda/litt/disktable"
Expand All @@ -20,5 +23,22 @@ func unlockCommand(ctx *cli.Context) error {
return fmt.Errorf("at least one source path is required")
}

force := ctx.Bool(forceFlag.Name)
if !force {
magicString := "I know what I am doing"
logger.Warnf("About to delete LittDB lock files. This is potentially dangerous. "+
"Type \"%s\" to continue, or use "+
"the --force flag.", magicString)
reader := bufio.NewReader(os.Stdin)
input, err := reader.ReadString('\n')
if err != nil {
return fmt.Errorf("failed to read input: %w", err)
}
input = strings.TrimSuffix(input, "\n")
if input != magicString {
return fmt.Errorf("unlock operation aborted")
}
}

return disktable.Unlock(logger, sources)
}
6 changes: 5 additions & 1 deletion litt/test/unlock_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package test

import (
"fmt"
"os"
"path"
"path/filepath"
Expand All @@ -20,11 +21,12 @@ import (
func TestUnlock(t *testing.T) {
testDir := t.TempDir()
rand := testrandom.NewTestRandom()
volumes := []string{path.Join(testDir, "volume1", path.Join(testDir, "volume2"), path.Join(testDir, "volume3"))}
volumes := []string{path.Join(testDir, "volume1"), path.Join(testDir, "volume2"), path.Join(testDir, "volume3")}

config, err := litt.DefaultConfig(volumes...)
config.Fsync = false // Disable fsync for faster tests
config.TargetSegmentFileSize = 100
config.ShardingFactor = uint32(len(volumes))
require.NoError(t, err)

db, err := littbuilder.NewDB(config)
Expand Down Expand Up @@ -55,11 +57,13 @@ func TestUnlock(t *testing.T) {
return nil
}
if strings.HasSuffix(path, util.LockfileName) {
fmt.Printf("Found lockfile %s\n", path) // TODO
lockFileCount++
}
return nil
})
require.NoError(t, err)
require.Equal(t, 3, lockFileCount)

// Unlock the DB. This should remove all lock files, but leave other files intact.
err = disktable.Unlock(config.Logger, volumes)
Expand Down
15 changes: 15 additions & 0 deletions node/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,21 @@ type Config struct {

// If true, then purge LittDB locks on startup. Potentially useful to get rid of zombie lock files,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't quite understand the scenario in which zombie lock files appear? Is it due to some sort of ungraceful termination with containers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expanded the documentation here:

	// If true, then purge LittDB locks on startup. Potentially useful to get rid of zombie lock files,
	// but also dangerous (multiple LittDB processes operating on the same files can lead to data corruption).
	//
	// When LittDB starts up, it attempts to create lock files. When a validator is forcefully shut down, lock files 
	// may be left behind. At startup time, if LittDB observes existing lock files, it first checks to see
	// if the process that created the lock files is still running. The lock files contain the creator's PID, and so 
	// LittDB checks to see if there is any process with that PID still running.
	//
	// Although it should be rare, it's possible that another process may be started with the same PID as the
	// PID used to create the lock files. When this happens, LittDB will be prevented from starting up out of
	// fear of another process trying to access the same files, even though the original process that created the 
	// lock files is no longer running. If that happens, this flag is a safe way to force LittDB to start up
	// without being blocked by those lock files. BE VERY CERTAIN THAT THE OTHER PROCESS IS ACTUALLY DEAD!
	// If two instances of LittDB are running on the same files, it WILL lead to data corruption.
	//
	// An alternate way to clear the LittDB lock files is via the LittDB CLI with the "litt unlock" command.
	// Run "litt unlock --help" for more information.
	LittUnsafePurgeLocks bool

// but also dangerous (multiple LittDB processes operating on the same files can lead to data corruption).
//
// When LittDB starts up, it attempts to create lock files. When a validator is forcefully shut down, lock files
// may be left behind. At startup time, if LittDB observes existing lock files, it first checks to see
// if the process that created the lock files is still running. The lock files contain the creator's PID, and so
// LittDB checks to see if there is any process with that PID still running.
//
// Although it should be rare, it's possible that another process may be started with the same PID as the
// PID used to create the lock files. When this happens, LittDB will be prevented from starting up out of
// fear of another process trying to access the same files, even though the original process that created the
// lock files is no longer running. If that happens, this flag is a safe way to force LittDB to start up
// without being blocked by those lock files. BE VERY CERTAIN THAT THE OTHER PROCESS IS ACTUALLY DEAD!
// If two instances of LittDB are running on the same files, it WILL lead to data corruption.
//
// An alternate way to clear the LittDB lock files is via the LittDB CLI with the "litt unlock" command.
// Run "litt unlock --help" for more information.
LittUnsafePurgeLocks bool

// The rate limit for the number of bytes served by the GetChunks API if the data is in the cache.
Expand Down
2 changes: 1 addition & 1 deletion node/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ var optionalFlags = []cli.Flag{
EigenDADirectoryFlag,
BlsOperatorStateRetrieverFlag,
EigenDAServiceManagerFlag,
LevelDBDisableSeeksCompactionV1Flag,
LittUnsafePurgeLocksFlag,
}

func init() {
Expand Down
Loading