feat(payments): Implement leaky bucket algorithm (#1853)

litt3 · web-flow · commit 6a7a24bc1c11 · 2025-08-13T18:10:16.000Z
* Implement leaky bucket algorithm

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Improve docs and clean up

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Simplify leaky bucket by using basic math ops

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Delete unnecessary utils

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Use better int types

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Specify capacity directly

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Use time.Second instead of 1e9

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Add bool to return signature of Fill

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Reorganize comment diagrams

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Reorganize packages, and add package docs

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Add doc about leak rate

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Simplify computeFullSecondLeakage signature

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Augment previousPartialSecondLeakage doc

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Make time.Second less opaque

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Revert change to accept capacity in LeakyBucket constructor

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Add doc to bias field

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

* Panic on unknown enum val

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;

---------

Signed-off-by: litt3 &lt;102969658+litt3@users.noreply.github.com&gt;
diff --git a/core/payments/reservation/CLAUDE.md b/core/payments/reservation/CLAUDE.md
@@ -0,0 +1,11 @@
+# Reservation Payments
+
+The reservation package implements accounting logic for reservation-based EigenDA usage.
+
+## Key Implementation Details
+
+- Reservation accounting is performed with a LeakyBucket algorithm.
+- Each instance of the LeakyBucket algorithm is configured with a BiasBehavior, to determine whether to err
+on the side of permitting more or less throughput.
+- Each instance of the LeakyBucket algorithm is configured with an OverfillBehavior, which governs behavior when bucket
+capacity is exceeded.
diff --git a/core/payments/reservation/bias_behavior.go b/core/payments/reservation/bias_behavior.go
@@ -0,0 +1,22 @@
+package reservation
+
+// In the leaky bucket implementation, there are different points where we need to decide whether we should err on the
+// side of permitting *more* or *less* throughput.
+//
+// Consider the different users of the leaky bucket:
+//   - Validator nodes should err on the side of permitting *more* throughput. Processing a little extra data isn't
+//     a big deal, but denying usage that a user is entitled to is something to be avoided at all costs.
+//   - Clients should err on the side of utilizing *less* throughput. They should do their best to use the
+//     full capacity of the reservation they're entitled to, but should prefer slight under-use.
+type BiasBehavior string
+
+const (
+	// When in doubt, permit *more* throughput instead of less.
+	//
+	// This is what a validator node should use.
+	BiasPermitMore BiasBehavior = "permitMore"
+	// When in doubt, permit *less* throughput instead of more.
+	//
+	// This is what a client should use.
+	BiasPermitLess BiasBehavior = "permitLess"
+)
diff --git a/core/payments/reservation/doc.go b/core/payments/reservation/doc.go
@@ -0,0 +1,2 @@
+// Package reservation implements accounting logic for reservation-based EigenDA usage.
+package reservation
diff --git a/core/payments/reservation/errors.go b/core/payments/reservation/errors.go
@@ -0,0 +1,25 @@
+package reservation
+
+import (
+	"fmt"
+	"time"
+)
+
+// TimeMovedBackwardError is returned when a timestamp is observed that is before a previously observed timestamp.
+//
+// This should not normally happen, but with clock drift and NTP adjustments, system clocks can occasionally jump
+// backward. This error allows the system to handle such cases gracefully rather than fatally erroring.
+type TimeMovedBackwardError struct {
+	// The current time that was provided
+	CurrentTime time.Time
+	// The previously observed time that is after CurrentTime
+	PreviousTime time.Time
+}
+
+// Implements the error interface
+func (e *TimeMovedBackwardError) Error() string {
+	return fmt.Sprintf("time moved backward: current time %s is before previous time %s (delta: %v)",
+		e.CurrentTime.Format(time.RFC3339Nano),
+		e.PreviousTime.Format(time.RFC3339Nano),
+		e.PreviousTime.Sub(e.CurrentTime))
+}
diff --git a/core/payments/reservation/leaky_bucket.go b/core/payments/reservation/leaky_bucket.go
@@ -0,0 +1,279 @@
+package reservation
+
+import (
+	"errors"
+	"fmt"
+	"time"
+)
+
+// This struct implements the [leaky bucket](https://en.wikipedia.org/wiki/Leaky_bucket) algorithm as a meter.
+//
+// Symbols "leak out" of the bucket at a constant rate, creating capacity for new symbols. The bucket can be "filled"
+// with additional symbols if there is enough available capacity.
+//
+// The standard golang golang.org/x/time/rate.Limiter is not suitable for our use-case, for the following reasons:
+//
+//  1. The Limiter doesn't support the concept of overfilling the bucket. We require the concept of overfill, for cases
+//     where a bucket size might be too small to fit the largest permissible blob size. We don't want to prevent users
+//     with a small reservation size from submitting large blobs.
+//  2. The Limiter uses floating point math. Though it would *probably* be ok to use floats, it makes the distributed
+//     system harder to reason about. What level of error accumulation would we see with frequent updates? Under
+//     what conditions would it be possible for the client and server representations of a given leaky bucket to
+//     diverge, and what impact would that have on our assumptions? These questions can be avoided entirely by using
+//     an integer based implementation.
+//
+// NOTE: This struct doesn't do any synchronization! The caller is responsible for making sure that only one goroutine
+// is using it at a time.
+type LeakyBucket struct {
+	// Defines whether we should err on the side of permitting more or less throughput
+	//
+	// Practically, this value is used to determine whether the bucket is initialized to full or empty, as well as
+	// rounding direction when leaking.
+	biasBehavior BiasBehavior
+
+	// Defines different ways that overfilling the bucket should be handled
+	overfillBehavior OverfillBehavior
+
+	// The total number of symbols that fit in the bucket
+	bucketCapacity uint64
+
+	// The number of symbols that leak out of the bucket each second, as determined by the reservation.
+	symbolsPerSecondLeakRate uint64
+
+	// The number of symbols currently in the bucket
+	currentFillLevel uint64
+
+	// The time at which the previous leak calculation was made
+	previousLeakTime time.Time
+
+	// The number of symbols which leaked in the "partial second" of the previous leak calculation.
+	//
+	// To understand the logic of how this value is used, see the inline documentation of the `leak()` method.
+	//
+	// Since the leaky bucket uses integers instead of floats, leak math isn't straight forward. It's easy to calculate
+	// the number of symbols that leak in a full second, since leak rate is defined in terms of symbols / second. But
+	// determining how many symbols leak in a number of nanoseconds requires making a rounding choice. Leak calculation
+	// N needs to take the partialSecondLeakage of calculation N-1 into account, so that the precisely correct number
+	// of symbols are leaked for each full second.
+	previousPartialSecondLeakage uint64
+}
+
+// Creates a new instance of the leaky bucket algorithm
+func NewLeakyBucket(
+	// how fast symbols leak out of the bucket
+	symbolsPerSecondLeakRate uint64,
+	// bucketCapacityDuration * symbolsPerSecondLeakRate becomes the bucket capacity
+	bucketCapacityDuration time.Duration,
+	// whether to err on the side of permitting more or less throughput
+	biasBehavior BiasBehavior,
+	// how to handle overfilling the bucket
+	overfillBehavior OverfillBehavior,
+	// the current time, when this is being constructed
+	now time.Time,
+) (*LeakyBucket, error) {
+	if symbolsPerSecondLeakRate == 0 {
+		return nil, errors.New("symbolsPerSecondLeakRate must be > 0")
+	}
+
+	if bucketCapacityDuration <= 0 {
+		return nil, fmt.Errorf("bucketCapacityDuration must be > 0, got %s", bucketCapacityDuration)
+	}
+
+	// 1e9
+	nanosecondsPerSecond := uint64(time.Second)
+	bucketCapacity := symbolsPerSecondLeakRate * uint64(bucketCapacityDuration.Nanoseconds()) / nanosecondsPerSecond
+
+	if bucketCapacity == 0 {
+		return nil, fmt.Errorf("bucket capacity must be > 0 (from leak rate %d symbols/sec * duration %s)",
+			symbolsPerSecondLeakRate, bucketCapacityDuration)
+	}
+
+	var currentFillLevel uint64
+	switch biasBehavior {
+	case BiasPermitMore:
+		// starting with a fill level of 0 means the bucket starts out with available capacity
+		currentFillLevel = 0
+	case BiasPermitLess:
+		// starting with a full bucket means some time must elapse to allow leakage before the bucket can be used
+		currentFillLevel = bucketCapacity
+	default:
+		panic(fmt.Sprintf("unknown bias behavior %s", biasBehavior))
+	}
+
+	return &LeakyBucket{
+		biasBehavior:                 biasBehavior,
+		overfillBehavior:             overfillBehavior,
+		bucketCapacity:               bucketCapacity,
+		symbolsPerSecondLeakRate:     symbolsPerSecondLeakRate,
+		currentFillLevel:             currentFillLevel,
+		previousLeakTime:             now,
+		previousPartialSecondLeakage: 0,
+	}, nil
+}
+
+// Fill the bucket with a number of symbols.
+//
+// - Returns (true, nil) if the leaky bucket has enough capacity to accept the fill.
+// - Returns (false, nil) if bucket lacks capacity to permit the fill.
+// - Returns (false, error) for actual errors:
+//   - TimeMovedBackwardError if input time is before previous leak time.
+//   - Generic error for all other modes of failure.
+//
+// If the bucket doesn't have enough capacity to accommodate the fill, symbolCount IS NOT added to the bucket, i.e. a
+// failed fill doesn't count against the meter.
+func (lb *LeakyBucket) Fill(now time.Time, symbolCount uint32) (bool, error) {
+	if symbolCount == 0 {
+		return false, errors.New("symbolCount must be > 0")
+	}
+
+	err := lb.leak(now)
+	if err != nil {
+		return false, fmt.Errorf("leak: %w", err)
+	}
+
+	// this is how full the bucket would be, if the fill were to be accepted
+	newFillLevel := lb.currentFillLevel + uint64(symbolCount)
+
+	// if newFillLevel is <= the total bucket capacity, no further checks are required
+	if newFillLevel <= lb.bucketCapacity {
+		lb.currentFillLevel = newFillLevel
+		return true, nil
+	}
+
+	// this fill would result in the bucket being overfilled, so we check the overfill behavior to decide what to do
+	switch lb.overfillBehavior {
+	case OverfillNotPermitted:
+		return false, nil
+	case OverfillOncePermitted:
+		zeroCapacityAvailable := lb.currentFillLevel >= lb.bucketCapacity
+
+		// if there is no available capacity whatsoever, dispersal is never permitted, no matter the overfill behavior
+		if zeroCapacityAvailable {
+			return false, nil
+		}
+
+		lb.currentFillLevel = newFillLevel
+		return true, nil
+	default:
+		panic(fmt.Sprintf("unknown overfill behavior %s", lb.overfillBehavior))
+	}
+}
+
+// Reverts a previous fill, i.e. removes the number of symbols that got added to the bucket
+//
+// - Returns a TimeMovedBackwardError if input time is before previous leak time.
+// - Returns a generic error for all other modes of failure.
+//
+// The input time should be the most up-to-date time, NOT the time of the original fill.
+func (lb *LeakyBucket) RevertFill(now time.Time, symbolCount uint32) error {
+	if symbolCount == 0 {
+		return errors.New("symbolCount must be > 0")
+	}
+
+	err := lb.leak(now)
+	if err != nil {
+		return fmt.Errorf("leak: %w", err)
+	}
+
+	if lb.currentFillLevel <= uint64(symbolCount) {
+		lb.currentFillLevel = 0
+		return nil
+	}
+
+	lb.currentFillLevel = lb.currentFillLevel - uint64(symbolCount)
+	return nil
+}
+
+// Lets the correct number of symbols leak out of the bucket, based on when we last leaked
+//
+// Returns a TimeMovedBackwardError if input time is before previous leak time.
+func (lb *LeakyBucket) leak(now time.Time) error {
+	if now.Before(lb.previousLeakTime) {
+		return &TimeMovedBackwardError{PreviousTime: lb.previousLeakTime, CurrentTime: now}
+	}
+
+	defer func() {
+		lb.previousLeakTime = now
+	}()
+
+	//	 Previous leak (N-1)                      Current Leak (N)
+	//	        ↓                                      ↓
+	//	   |----*----------|----------------|----------*-----|
+	//	   ↑________________________________↑
+	//	          fullSecondLeakage
+	fullSecondLeakage := lb.computeFullSecondLeakage(uint64(now.Unix()))
+
+	// We need to correct the full-second leakage value: the previous leak calculation already let some symbols from a
+	// partial second period leak out, and those symbols shouldn't leak twice
+	//
+	// This value can be negative if the previous leak calculation was within the same second as this calculation,
+	// since in that case fullSecondLeakage would be 0.
+	//
+	//	 Previous leak (N-1)                      Current Leak (N)
+	//	        ↓                                      ↓
+	//	   |----*----------|----------------|----------*-----|
+	//	   ↑____↑
+	//	  previousPartialSecondLeakage
+	//
+	//	 Previous leak (N-1)                    Current Leak (N)
+	//	        ↓                                      ↓
+	//	   |----*----------|----------------|----------*-----|
+	//	        ↑___________________________↑
+	//	          correctedFullSecondLeakage
+	correctedFullSecondLeakage := fullSecondLeakage - lb.previousPartialSecondLeakage
+
+	//	 Previous leak (N-1)                      Current Leak (N)
+	//	        ↓                                      ↓
+	//	   |----*----------|----------------|----------*-----|
+	//	                                    ↑__________↑
+	//	                                partialSecondLeakage
+	partialSecondLeakage := lb.computePartialSecondLeakage(uint64(now.Nanosecond()))
+	lb.previousPartialSecondLeakage = partialSecondLeakage
+
+	//	Previous leak (N-1)                      Current Leak (N)
+	//	        ↓                                      ↓
+	//	   |----*----------|----------------|----------*-----|
+	//	        ↑______________________________________↑
+	//	                     actualLeakage
+	actualLeakage := correctedFullSecondLeakage + partialSecondLeakage
+
+	if lb.currentFillLevel <= actualLeakage {
+		lb.currentFillLevel = 0
+		return nil
+	}
+
+	lb.currentFillLevel = lb.currentFillLevel - actualLeakage
+	return nil
+}
+
+// Accepts the current number of seconds since epoch. Returns the number of symbols that should leak from the bucket,
+// based on when we last leaked.
+//
+// Since this method only takes full seconds into consideration, the returned value must be used carefully. See leak()
+// for details.
+func (lb *LeakyBucket) computeFullSecondLeakage(epochSeconds uint64) uint64 {
+	secondsSinceLastUpdate := epochSeconds - uint64(lb.previousLeakTime.Unix())
+	fullSecondLeakage := secondsSinceLastUpdate * lb.symbolsPerSecondLeakRate
+	return fullSecondLeakage
+}
+
+// Accepts a number of nanoseconds, which represent a fraction of a single second.
+//
+// Computes the number of symbols which leak out in the given fractional second. Since this deals with integers,
+// the configured bias determines which direction we round in.
+func (lb *LeakyBucket) computePartialSecondLeakage(nanos uint64) uint64 {
+	// 1e9
+	nanosecondsPerSecond := uint64(time.Second)
+
+	switch lb.biasBehavior {
+	case BiasPermitMore:
+		// Round up, to permit more (more leakage = more capacity freed up)
+		// Add (1e9 - 1) before dividing to round up
+		return (nanos*lb.symbolsPerSecondLeakRate + nanosecondsPerSecond - 1) / nanosecondsPerSecond
+	case BiasPermitLess:
+		// Round down, to permit less (less leakage = less capacity freed up)
+		return nanos * lb.symbolsPerSecondLeakRate / nanosecondsPerSecond
+	default:
+		panic(fmt.Sprintf("unknown bias: %s", lb.biasBehavior))
+	}
+}
diff --git a/core/payments/reservation/leaky_bucket_test.go b/core/payments/reservation/leaky_bucket_test.go
diff --git a/core/payments/reservation/overfill_behavior.go b/core/payments/reservation/overfill_behavior.go

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+// Package reservation implements accounting logic for reservation-based EigenDA usage.`
	`2`	`+package reservation`