Skip to content

feat(webhook): add rate limiting to webhook endpoint #1210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"math"
"os"
"strings"
"sync"
Expand Down Expand Up @@ -216,6 +217,11 @@ func newRunCommand() *cobra.Command {
log.Infof("Starting webhook server on port %d", webhookCfg.Port)
webhookServer = webhook.NewWebhookServer(webhookCfg.Port, handler, cfg.KubeClient, argoClient)

if webhookCfg.RateLimitEnabled {
limiter := webhook.NewRateLimiter(webhookCfg.RateLimitNumAllowedRequests, webhookCfg.RateLimitWindow, webhookCfg.RateLimitCleanUpInterval)
webhookServer.RateLimiter = limiter
}

// Set updater config
webhookServer.UpdaterConfig = &argocd.UpdateConfiguration{
NewRegFN: registry.NewClient,
Expand Down Expand Up @@ -271,6 +277,9 @@ func newRunCommand() *cobra.Command {
if err := webhookServer.Stop(); err != nil {
log.Errorf("Error stopping webhook server: %v", err)
}
if webhookCfg.RateLimitEnabled {
webhookServer.RateLimiter.StopCleanUp()
}
}
return nil
case err := <-whErrCh:
Expand Down Expand Up @@ -337,6 +346,10 @@ func newRunCommand() *cobra.Command {
runCmd.Flags().StringVar(&webhookCfg.GHCRSecret, "ghcr-webhook-secret", env.GetStringVal("GHCR_WEBHOOK_SECRET", ""), "Secret for validating GitHub Container Registry webhooks")
runCmd.Flags().StringVar(&webhookCfg.QuaySecret, "quay-webhook-secret", env.GetStringVal("QUAY_WEBHOOK_SECRET", ""), "Secret for validating Quay webhooks")
runCmd.Flags().StringVar(&webhookCfg.HarborSecret, "harbor-webhook-secret", env.GetStringVal("HARBOR_WEBHOOK_SECRET", ""), "Secret for validating Harbor webhooks")
runCmd.Flags().BoolVar(&webhookCfg.RateLimitEnabled, "enable-webhook-ratelimit", env.GetBoolVal("ENABLE_WEBHOOK_RATELIMIT", false), "Enable rate limiting for the webhook endpoint")
runCmd.Flags().IntVar(&webhookCfg.RateLimitNumAllowedRequests, "webhook-ratelimit-num-allowed", env.ParseNumFromEnv("WEBHOOK_RATELIMIT_NUM_ALLOWED_REQUESTS", 100, 0, math.MaxInt), "The number of allowed requests in a window for webhook rate limiting")
runCmd.Flags().DurationVar(&webhookCfg.RateLimitWindow, "webhook-ratelimit-window", env.GetDurationVal("WEBHOOK_RATELIMIT_WINDOW", 2*time.Minute), "The duration for the window for the webhook rate limiting")
runCmd.Flags().DurationVar(&webhookCfg.RateLimitCleanUpInterval, "webhook-ratelimit-cleanup-interval", env.GetDurationVal("WEBHOOK_RATELIMIT_CLEANUP_INTERVAL", 1*time.Hour), "How often the rate limiter cleans up stale clients")

return runCmd
}
Expand Down
30 changes: 25 additions & 5 deletions cmd/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import (
"context"
"errors"
"fmt"
"math"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"text/template"
"time"

"github.com/argoproj-labs/argocd-image-updater/pkg/argocd"
"github.com/argoproj-labs/argocd-image-updater/pkg/common"
Expand All @@ -26,11 +28,15 @@ import (

// WebhookConfig holds the options for the webhook server
type WebhookConfig struct {
Port int
DockerSecret string
GHCRSecret string
QuaySecret string
HarborSecret string
Port int
DockerSecret string
GHCRSecret string
QuaySecret string
HarborSecret string
RateLimitEnabled bool
RateLimitNumAllowedRequests int
RateLimitWindow time.Duration
RateLimitCleanUpInterval time.Duration
}

// NewWebhookCommand creates a new webhook command
Expand Down Expand Up @@ -190,6 +196,10 @@ Supported registries:
webhookCmd.Flags().StringVar(&webhookCfg.GHCRSecret, "ghcr-webhook-secret", env.GetStringVal("GHCR_WEBHOOK_SECRET", ""), "Secret for validating GitHub Container Registry webhooks")
webhookCmd.Flags().StringVar(&webhookCfg.QuaySecret, "quay-webhook-secret", env.GetStringVal("QUAY_WEBHOOK_SECRET", ""), "Secret for validating Quay webhooks")
webhookCmd.Flags().StringVar(&webhookCfg.HarborSecret, "harbor-webhook-secret", env.GetStringVal("HARBOR_WEBHOOK_SECRET", ""), "Secret for validating Harbor webhooks")
webhookCmd.Flags().BoolVar(&webhookCfg.RateLimitEnabled, "enable-webhook-ratelimit", env.GetBoolVal("ENABLE_WEBHOOK_RATELIMIT", false), "Enable rate limiting for the webhook endpoint")
webhookCmd.Flags().IntVar(&webhookCfg.RateLimitNumAllowedRequests, "webhook-ratelimit-num-allowed", env.ParseNumFromEnv("WEBHOOK_RATELIMIT_NUM_ALLOWED_REQUESTS", 100, 0, math.MaxInt), "The number of allowed requests in a window for webhook rate limiting")
webhookCmd.Flags().DurationVar(&webhookCfg.RateLimitWindow, "webhook-ratelimit-window", env.GetDurationVal("WEBHOOK_RATELIMIT_WINDOW", 2*time.Minute), "The duration for the window for the webhook rate limiting")
webhookCmd.Flags().DurationVar(&webhookCfg.RateLimitCleanUpInterval, "webhook-ratelimit-cleanup-interval", env.GetDurationVal("WEBHOOK_RATELIMIT_CLEANUP_INTERVAL", 1*time.Hour), "How often the rate limiter cleans up stale clients")

return webhookCmd
}
Expand Down Expand Up @@ -238,6 +248,11 @@ func runWebhook(cfg *ImageUpdaterConfig, webhookCfg *WebhookConfig) error {
// Create webhook server
server := webhook.NewWebhookServer(webhookCfg.Port, handler, cfg.KubeClient, cfg.ArgoClient)

if webhookCfg.RateLimitEnabled {
limiter := webhook.NewRateLimiter(webhookCfg.RateLimitNumAllowedRequests, webhookCfg.RateLimitWindow, webhookCfg.RateLimitCleanUpInterval)
server.RateLimiter = limiter
}

// Set updater config
server.UpdaterConfig = &argocd.UpdateConfiguration{
NewRegFN: registry.NewClient,
Expand Down Expand Up @@ -273,5 +288,10 @@ func runWebhook(cfg *ImageUpdaterConfig, webhookCfg *WebhookConfig) error {
if err := server.Stop(); err != nil {
log.Errorf("Error stopping webhook server: %v", err)
}

if webhookCfg.RateLimitEnabled {
server.RateLimiter.StopCleanUp()
}

return nil
}
27 changes: 27 additions & 0 deletions docs/configuration/webhook.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,29 @@ to apply them yourself.

They are located in the `manifets/base/networking` directory.

## Enabling Rate Limiting

To prevent the endpoint from being overwhelmed with requests which could cause
updates to run over and over, rate limiting can be turned on for the `/webhook`
endpoint to prevent this from happening. The rate limiting occurs when the client
goes over the amount of requests in the given window from when they make the request.

The window and amount of requests are configurable and can be set through editing
the `argocd-image-updater-config` ConfigMap. There is also an interval that can be
set to clean up the storage to remove clients that might not have sent notifications
in awhile.
```yaml
data:
# Enable rate limiting for the webhook endpoint
webhook.enable-rate-limit: true
# Set the amount of requests that can be made in a window before getting limited
webhook.ratelimit-num-allowed-requests: <SOME_NUMBER>
# Set the window of time checked
webhook.ratelimit-window: <SOME_DURATION>
# Set the interval for when clean ups occur
webhook.ratelimit-cleanup-interval: <SOME_DURATION>
```

## Environment Variables

The flags for both the `run` and `webhook` CLI commands can also be set via
Expand All @@ -168,6 +191,10 @@ environment variables. Below is the list of which variables correspond to which
|`GHCR_WEBHOOK_SECRET` |`--gchr-webhook-secret`|
|`HARBOR_WEBHOOK_SECRET` |`--harbor-webhook-secret`|
|`QUAY_WEBHOOK_SECRET` |`--quay-webhook-secret`|
|`ENABLE_WEBHOOK_RATELIMIT`|`--enable-webhook-ratelimit`|
|`WEBHOOK_RATELIMIT_NUM_ALLOWED_REQUESTS`|`--webhook-ratelimit-num-allowed`|
|`WEBHOOK_RATELIMIT_WINDOW`|`--webhook-ratelimit-window`|
|`WEBHOOK_RATELIMIT_CLEANUP_INTERVAL`|`--webhook-ratelimit-cleanup-interval`|

## Adding Support For Other Registries

Expand Down
17 changes: 17 additions & 0 deletions docs/install/cmd/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ Secret for validating Docker Hub webhooks.

Enable webhook server for receiving registry events.

**--enable-webhook-ratelimit *enabled***

Enable rate limiting for the webhook endpoint

**--ghcr-webhook-secret *secret***

Secret for validating GitHub container registry webhooks.
Expand Down Expand Up @@ -225,4 +229,17 @@ whether to perform a cache warm-up on startup (default true)

Port to listen on for webhook events (default 8082)

**--webhook-ratelimit-cleanup-interval *duration***

How often the rate limiter cleans up stale clients (default 1h0m0s)

**--webhook-ratelimit-num-allowed *numRequests***

The number of allowed requests in a window for webhook rate limiting (default 100)

**--webhook-ratelimit-window *duration***

The duration for the window for the webhook rate limiting (default 2m0s)


[label selector syntax]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
17 changes: 17 additions & 0 deletions docs/install/cmd/webhook.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ for images can only be specified from an environment variable.

Secret for validating Docker Hub webhooks.


**--enable-webhook-ratelimit *enabled***

Enable rate limiting for the webhook endpoint

**--ghcr-webhook-secret *secret***

Secret for validating GitHub container registry secrets.
Expand Down Expand Up @@ -186,4 +191,16 @@ default configuration should be used instead, specify the empty string, i.e.

Port to listen on for webhook events (default 8080)

**--webhook-ratelimit-cleanup-interval *duration***

How often the rate limiter cleans up stale clients (default 1h0m0s)

**--webhook-ratelimit-num-allowed *numRequests***

The number of allowed requests in a window for webhook rate limiting (default 100)

**--webhook-ratelimit-window *duration***

The duration for the window for the webhook rate limiting (default 2m0s)

[label selector syntax]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
24 changes: 24 additions & 0 deletions manifests/base/deployment/argocd-image-updater-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,30 @@ spec:
name: argocd-image-updater-secret
key: webhook.harbor-secret
optional: true
- name: ENABLE_WEBHOOK_RATELIMIT
valueFrom:
configMapKeyRef:
name: argocd-image-updater-config
key: webhook.enable-rate-limit
optional: true
- name: WEBHOOK_RATELIMIT_NUM_ALLOWED_REQUESTS
valueFrom:
configMapKeyRef:
name: argocd-image-updater-config
key: webhook.ratelimit-num-allowed-requests
optional: true
- name: WEBHOOK_RATELIMIT_WINDOW
valueFrom:
configMapKeyRef:
name: argocd-image-updater-config
key: webhook.ratelimit-window
optional: true
- name: WEBHOOK_RATELIMIT_CLEANUP_INTERVAL
valueFrom:
configMapKeyRef:
name: argocd-image-updater-config
key: webhook.ratelimit-cleanup-interval
optional: true
livenessProbe:
httpGet:
path: /healthz
Expand Down
24 changes: 24 additions & 0 deletions manifests/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,30 @@ spec:
key: webhook.harbor-secret
name: argocd-image-updater-secret
optional: true
- name: ENABLE_WEBHOOK_RATELIMIT
valueFrom:
configMapKeyRef:
key: webhook.enable-rate-limit
name: argocd-image-updater-config
optional: true
- name: WEBHOOK_RATELIMIT_NUM_ALLOWED_REQUESTS
valueFrom:
configMapKeyRef:
key: webhook.ratelimit-num-allowed-requests
name: argocd-image-updater-config
optional: true
- name: WEBHOOK_RATELIMIT_WINDOW
valueFrom:
configMapKeyRef:
key: webhook.ratelimit-window
name: argocd-image-updater-config
optional: true
- name: WEBHOOK_RATELIMIT_CLEANUP_INTERVAL
valueFrom:
configMapKeyRef:
key: webhook.ratelimit-cleanup-interval
name: argocd-image-updater-config
optional: true
image: quay.io/argoprojlabs/argocd-image-updater:latest
imagePullPolicy: Always
livenessProbe:
Expand Down
91 changes: 91 additions & 0 deletions pkg/webhook/ratelimit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package webhook

import (
"sync"
"time"
)

// RateLimiter implements a sliding window rate limiting algorithm
type RateLimiter struct {
mu sync.Mutex
clients map[string][]int64
lastSeen map[string]time.Time
window time.Duration
allowed int
done chan bool
}

func NewRateLimiter(numRequets int, window time.Duration, cleanUpInterval time.Duration) *RateLimiter {
limiter := RateLimiter{
clients: make(map[string][]int64),
lastSeen: make(map[string]time.Time),
window: window,
allowed: numRequets,
done: make(chan bool),
}
go limiter.StartCleanUp(cleanUpInterval)
return &limiter
}

// Checks to see if a client has gone over the limit
func (rl *RateLimiter) Allow(clientIP string) bool {
now := time.Now()
rl.mu.Lock()
defer rl.mu.Unlock()

if _, ok := rl.clients[clientIP]; !ok {
rl.clients[clientIP] = []int64{}
}

allow := false
windowStart := now.Unix() - int64(rl.window.Seconds())
filtered := []int64{}
for _, ts := range rl.clients[clientIP] {
if ts > windowStart {
filtered = append(filtered, ts)
}
}
rl.clients[clientIP] = filtered

if len(filtered) < rl.allowed {
rl.clients[clientIP] = append(filtered, now.Unix())
rl.lastSeen[clientIP] = now
allow = true
}

return allow
}

// Cleans up the clients map at an interval to prevent stale clients from taking up memory
func (rl *RateLimiter) StartCleanUp(interval time.Duration) {
ticker := time.NewTicker(interval)
defer ticker.Stop()

for {
select {
case <-rl.done:
return
case <-ticker.C:
rl.CleanUp()
}
}
}

// Cleans up any clients that have not made a request in an amount of time over the window
func (rl *RateLimiter) CleanUp() {
rl.mu.Lock()
defer rl.mu.Unlock()

for k, v := range rl.lastSeen {
now := time.Now()
if v.Add(rl.window).Before(now) {
delete(rl.clients, k)
delete(rl.lastSeen, k)
}
}
}

// Stop the clean up goroutine
func (rl *RateLimiter) StopCleanUp() {
rl.done <- true
}
Loading