Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/spire_agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ This may be useful for templating configuration files, for example across differ
| experimental | Description | Default |
|:------------------|-----------------------------------------------------------------|-------------------------|
| `named_pipe_name` | Pipe name to bind the SPIRE Agent API named pipe (Windows only) | \spire-agent\public\api |
| `sync_interval` | Sync interval with SPIRE server with exponential backoff | 5 sec |

### Initial trust bundle configuration

Expand Down
23 changes: 22 additions & 1 deletion pkg/agent/common/backoff/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@ const (
_noMaxElapsedTime = 0
)

// Option allows customization of the backoff.ExponentialBackOff
type Option interface {
applyOption(*backoff.ExponentialBackOff)
}

// NewBackoff returns a new backoff calculator ready for use. Generalizes all backoffs
// to have the same behavioral pattern, though with different bounds based on given
// interval.
func NewBackoff(clk clock.Clock, interval time.Duration) BackOff {
func NewBackoff(clk clock.Clock, interval time.Duration, opts ...Option) BackOff {
b := &backoff.ExponentialBackOff{
Clock: clk,
InitialInterval: interval,
Expand All @@ -30,7 +35,23 @@ func NewBackoff(clk clock.Clock, interval time.Duration) BackOff {
MaxInterval: _maxIntervalMultiple * interval,
MaxElapsedTime: _noMaxElapsedTime,
}
for _, opt := range opts {
opt.applyOption(b)
}
b.Reset()

return b
}

// WithMaxInterval returns maxInterval backoff option to override the MaxInterval
func WithMaxInterval(maxInterval time.Duration) Option {
return backoffOption{maxInterval: maxInterval}
}

type backoffOption struct {
maxInterval time.Duration
}

func (b backoffOption) applyOption(backoff *backoff.ExponentialBackOff) {
backoff.MaxInterval = b.maxInterval
}
22 changes: 22 additions & 0 deletions pkg/agent/common/backoff/backoff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,28 @@ func TestBackOff(t *testing.T) {
inRange(t, expectedResults[0], b)
}

func TestBackOffWithMaxInterval(t *testing.T) {
testInitialInterval := 6400 * time.Millisecond

mockClk := clock.NewMock(t)
b := NewBackoff(mockClk, testInitialInterval, WithMaxInterval(33000*time.Millisecond))

expectedResults := []time.Duration{}
for _, d := range []int{6400, 9600, 14400, 21600, 32400, 33000, 33000} {
expectedResults = append(expectedResults, time.Duration(d)*time.Millisecond)
}

for _, expected := range expectedResults {
// Assert that the next backoff falls in the expected range.
inRange(t, expected, b)
mockClk.Add(expected)
}

// assert reset works as expected
b.Reset()
inRange(t, expectedResults[0], b)
}

func inRange(t *testing.T, expected time.Duration, b BackOff) {
var minInterval = expected - time.Duration(_jitter*float64(expected))
var maxInterval = expected + time.Duration(_jitter*float64(expected))
Expand Down
62 changes: 62 additions & 0 deletions pkg/agent/common/backoff/size_backoff.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package backoff

// SizeLimitedBackOff defines interface for implementing a size based backoff for requests which
// contain number of records to be processed by server.
type SizeLimitedBackOff interface {
// NextBackOff returns the duration to wait before retrying the operation,
// or backoff.
NextBackOff() int

// Success indicates the backoff implementation that previous request succeeded
// so that it can adjust backoff accordingly for next request.
Success()

// Failure indicates the backoff implementation that previous request failed
// so that it can adjust backoff accordingly for next request.
Failure()

// Reset to initial state.
Reset()
}

type sizeLimitedBackOff struct {
currentSize int
maxSize int
}

var _ SizeLimitedBackOff = (*sizeLimitedBackOff)(nil)

func (r *sizeLimitedBackOff) NextBackOff() int {
return r.currentSize
}

func (r *sizeLimitedBackOff) Success() {
newSize := r.currentSize * 2
if newSize > r.maxSize {
newSize = r.maxSize
}
r.currentSize = newSize
}

func (r *sizeLimitedBackOff) Failure() {
newSize := r.currentSize / 2
if newSize < 1 {
newSize = 1
}
r.currentSize = newSize
}

func (r *sizeLimitedBackOff) Reset() {
r.currentSize = r.maxSize
}

// NewSizeLimitedBackOff returns a new SizeLimitedBackOff with provided maxRequestSize and lowest request size of 1.
// On Failure the size gets reduced by half and on Success size gets doubled
func NewSizeLimitedBackOff(maxRequestSize int) SizeLimitedBackOff {
b := &sizeLimitedBackOff{
maxSize: maxRequestSize,
}
b.Reset()

return b
}
54 changes: 54 additions & 0 deletions pkg/agent/common/backoff/size_backoff_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package backoff

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestRequestSizeBackOff(t *testing.T) {
maxRequestSize := 1000
b := NewSizeLimitedBackOff(maxRequestSize)

// Initial backoff value should be equal to the maxRequestSize
assert.Equal(t, maxRequestSize, b.NextBackOff())

// After multiple successes, the backoff value should cap at maxRequestSize
b.Success()
assert.Equal(t, maxRequestSize, b.NextBackOff())
b.Success()
assert.Equal(t, maxRequestSize, b.NextBackOff())

// After a failure, the backoff value should be halved
b.Failure()
assert.Equal(t, maxRequestSize/2, b.NextBackOff())

// After multiple failures, the backoff value should keep halving
b.Failure()
b.Failure()
assert.Equal(t, maxRequestSize/8, b.NextBackOff())

// After success backoff value should keep doubling
b.Success()
assert.Equal(t, maxRequestSize/4, b.NextBackOff())
b.Success()
assert.Equal(t, maxRequestSize/2, b.NextBackOff())

// Reset should set the backoff value back to the initial maxRequestSize
b.Reset()
assert.Equal(t, maxRequestSize, b.NextBackOff())

// validate lower limit
maxRequestSize = 5
b = NewSizeLimitedBackOff(maxRequestSize)
assert.Equal(t, maxRequestSize, b.NextBackOff())

b.Failure()
assert.Equal(t, 2, b.NextBackOff())
b.Failure()
assert.Equal(t, 1, b.NextBackOff())

// backoff value should not go below 1
b.Failure()
assert.Equal(t, 1, b.NextBackOff())
}
20 changes: 18 additions & 2 deletions pkg/agent/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,18 @@ import (
"github.com/spiffe/spire/pkg/common/rotationutil"
"github.com/spiffe/spire/pkg/common/telemetry"
"github.com/spiffe/spire/pkg/common/util"
"github.com/spiffe/spire/pkg/server/api/limits"
"github.com/spiffe/spire/proto/spire/common"
)

const (
maxSVIDSyncInterval = 4 * time.Minute
// for sync interval of 5 sec this will result in max of 4 mins of backoff
synchronizeMaxIntervalMultiple = 48
// for larger sync interval set max interval as 8 mins
synchronizeMaxInterval = 8 * time.Minute
)

// Manager provides cache management functionalities for agents.
type Manager interface {
// Initialize initializes the manager.
Expand Down Expand Up @@ -132,6 +141,8 @@ type manager struct {
// fetch attempt
synchronizeBackoff backoff.BackOff
svidSyncBackoff backoff.BackOff
// csrSizeLimitedBackoff backs off the number of csrs if error is returned on fetch svid attempt
csrSizeLimitedBackoff backoff.SizeLimitedBackOff

client client.Client

Expand All @@ -148,8 +159,13 @@ func (m *manager) Initialize(ctx context.Context) error {
m.storeSVID(m.svid.State().SVID, m.svid.State().Reattestable)
m.storeBundle(m.cache.Bundle())

m.synchronizeBackoff = backoff.NewBackoff(m.clk, m.c.SyncInterval)
m.svidSyncBackoff = backoff.NewBackoff(m.clk, cache.SVIDSyncInterval)
synchronizeBackoffMaxInterval := synchronizeMaxIntervalMultiple * m.c.SyncInterval
if synchronizeBackoffMaxInterval > synchronizeMaxInterval {
synchronizeBackoffMaxInterval = synchronizeMaxInterval
}
m.synchronizeBackoff = backoff.NewBackoff(m.clk, m.c.SyncInterval, backoff.WithMaxInterval(synchronizeBackoffMaxInterval))
m.svidSyncBackoff = backoff.NewBackoff(m.clk, cache.SVIDSyncInterval, backoff.WithMaxInterval(maxSVIDSyncInterval))
m.csrSizeLimitedBackoff = backoff.NewSizeLimitedBackOff(limits.SignLimitPerIP)

err := m.synchronize(ctx)
if nodeutil.ShouldAgentReattest(err) {
Expand Down
13 changes: 10 additions & 3 deletions pkg/agent/manager/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"github.com/spiffe/spire/pkg/common/telemetry"
telemetry_agent "github.com/spiffe/spire/pkg/common/telemetry/agent"
"github.com/spiffe/spire/pkg/common/util"
"github.com/spiffe/spire/pkg/server/api/limits"
"github.com/spiffe/spire/proto/spire/common"
)

Expand Down Expand Up @@ -116,14 +115,15 @@ func (m *manager) updateSVIDs(ctx context.Context, log logrus.FieldLogger, c SVI
staleEntries := c.GetStaleEntries()
if len(staleEntries) > 0 {
var csrs []csrRequest
sizeLimit := m.csrSizeLimitedBackoff.NextBackOff()
log.WithFields(logrus.Fields{
telemetry.Count: len(staleEntries),
telemetry.Limit: limits.SignLimitPerIP,
telemetry.Limit: sizeLimit,
}).Debug("Renewing stale entries")

for _, entry := range staleEntries {
// we've exceeded the CSR limit, don't make any more CSRs
if len(csrs) >= limits.SignLimitPerIP {
if len(csrs) >= sizeLimit {
break
}

Expand All @@ -148,6 +148,11 @@ func (m *manager) fetchSVIDs(ctx context.Context, csrs []csrRequest) (_ *cache.U
// Put all the CSRs in an array to make just one call with all the CSRs.
counter := telemetry_agent.StartManagerFetchSVIDsUpdatesCall(m.c.Metrics)
defer counter.Done(&err)
defer func() {
if err == nil {
m.csrSizeLimitedBackoff.Success()
}
}()

csrsIn := make(map[string][]byte)

Expand Down Expand Up @@ -180,6 +185,8 @@ func (m *manager) fetchSVIDs(ctx context.Context, csrs []csrRequest) (_ *cache.U

svidsOut, err := m.client.NewX509SVIDs(ctx, csrsIn)
if err != nil {
// Reduce csr size for next invocation
m.csrSizeLimitedBackoff.Failure()
return nil, err
}

Expand Down