Skip to content

Commit 18d055f

Browse files
committed
Added a flag to disable tracing of health check requests
1 parent 0e74699 commit 18d055f

File tree

10 files changed

+427
-68
lines changed

10 files changed

+427
-68
lines changed

internal/gateway/gateway.go

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"google.golang.org/grpc/credentials/insecure"
1919
healthpb "google.golang.org/grpc/health/grpc_health_v1"
2020
"google.golang.org/grpc/metadata"
21+
"google.golang.org/grpc/stats"
2122

2223
"github.com/authzed/authzed-go/proto"
2324
v1 "github.com/authzed/authzed-go/proto/authzed/api/v1"
@@ -26,6 +27,9 @@ import (
2627
"github.com/authzed/spicedb/internal/grpchelpers"
2728
)
2829

30+
const healthCheckRoute = "/grpc.health.v1.Health/Check"
31+
const healthCheckHttpPath = "/healthz"
32+
2933
var histogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
3034
Namespace: "spicedb",
3135
Subsystem: "rest_gateway",
@@ -35,14 +39,23 @@ var histogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
3539

3640
// NewHandler creates an REST gateway HTTP CloserHandler with the provided upstream
3741
// configuration.
38-
func NewHandler(ctx context.Context, upstreamAddr, upstreamTLSCertPath string) (*CloserHandler, error) {
42+
func NewHandler(ctx context.Context, upstreamAddr, upstreamTLSCertPath string, disableHealthCheckTracing bool) (*CloserHandler, error) {
3943
if upstreamAddr == "" {
4044
return nil, fmt.Errorf("upstreamAddr must not be empty")
4145
}
4246

43-
opts := []grpc.DialOption{
44-
grpc.WithStatsHandler(otelgrpc.NewClientHandler()),
47+
var opts []grpc.DialOption
48+
49+
if disableHealthCheckTracing {
50+
opts = append(opts, grpc.WithStatsHandler(otelgrpc.NewClientHandler(
51+
otelgrpc.WithFilter(func(info *stats.RPCTagInfo) bool {
52+
return info.FullMethodName != healthCheckRoute
53+
}),
54+
)))
55+
} else {
56+
opts = append(opts, grpc.WithStatsHandler(otelgrpc.NewClientHandler()))
4557
}
58+
4659
if upstreamTLSCertPath == "" {
4760
opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials()))
4861
} else {
@@ -85,7 +98,15 @@ func NewHandler(ctx context.Context, upstreamAddr, upstreamTLSCertPath string) (
8598
}))
8699
mux.Handle("/", gwMux)
87100

88-
finalHandler := promhttp.InstrumentHandlerDuration(histogram, otelhttp.NewHandler(mux, "gateway"))
101+
var finalHandler http.Handler
102+
if disableHealthCheckTracing {
103+
// Only apply tracing to non-health check endpoints
104+
finalHandler = promhttp.InstrumentHandlerDuration(histogram,
105+
skipHealthCheckTracing(mux, otelhttp.NewHandler(mux, "gateway")))
106+
} else {
107+
finalHandler = promhttp.InstrumentHandlerDuration(histogram, otelhttp.NewHandler(mux, "gateway"))
108+
}
109+
89110
return newCloserHandler(finalHandler, schemaConn, permissionsConn, watchConn, healthConn, experimentalConn), nil
90111
}
91112

@@ -143,6 +164,19 @@ func registerHandler(ctx context.Context, mux *runtime.ServeMux, endpoint string
143164
return conn, nil
144165
}
145166

167+
// skipHealthCheckTracing creates an HTTP handler that skips OpenTelemetry tracing for health check endpoints
168+
func skipHealthCheckTracing(directHandler http.Handler, tracedHandler http.Handler) http.Handler {
169+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
170+
if r.URL.Path == healthCheckHttpPath {
171+
// Skip tracing for health check endpoints
172+
directHandler.ServeHTTP(w, r)
173+
} else {
174+
// Apply OpenTelemetry tracing for all other endpoints
175+
tracedHandler.ServeHTTP(w, r)
176+
}
177+
})
178+
}
179+
146180
var defaultOtelOpts = []otelgrpc.Option{
147181
otelgrpc.WithPropagators(otel.GetTextMapPropagator()),
148182
otelgrpc.WithTracerProvider(otel.GetTracerProvider()),

internal/gateway/gateway_test.go

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
11
package gateway
22

33
import (
4+
"context"
5+
"net"
46
"net/http"
7+
"net/http/httptest"
58
"testing"
9+
"time"
610

711
"github.com/stretchr/testify/require"
812
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
913
"go.opentelemetry.io/otel"
1014
"go.opentelemetry.io/otel/propagation"
15+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
16+
"go.opentelemetry.io/otel/sdk/trace/tracetest"
1117
"go.opentelemetry.io/otel/trace"
1218
"go.uber.org/goleak"
19+
"google.golang.org/grpc"
20+
"google.golang.org/grpc/health"
21+
healthpb "google.golang.org/grpc/health/grpc_health_v1"
1322

1423
"github.com/authzed/spicedb/pkg/testutil"
1524
)
@@ -48,11 +57,159 @@ func TestOtelForwarding(t *testing.T) {
4857
func TestCloseConnections(t *testing.T) {
4958
defer goleak.VerifyNone(t, append(testutil.GoLeakIgnores(), goleak.IgnoreCurrent())...)
5059

51-
gatewayHandler, err := NewHandler(t.Context(), "192.0.2.0:4321", "")
60+
gatewayHandler, err := NewHandler(t.Context(), "192.0.2.0:4321", "", false)
5261
require.NoError(t, err)
5362
// 4 conns for permission+schema+watch+experimental services, 1 for health check
5463
require.Len(t, gatewayHandler.closers, 5)
5564

5665
// if connections are not closed, goleak would detect it
5766
require.NoError(t, gatewayHandler.Close())
5867
}
68+
69+
func TestGatewayHealthCheckTracing(t *testing.T) {
70+
tests := []struct {
71+
name string
72+
disableHealthCheckTracing bool
73+
description string
74+
}{
75+
{
76+
name: "health check tracing disabled",
77+
disableHealthCheckTracing: true,
78+
description: "gateway should skip tracing for health checks when flag is true",
79+
},
80+
{
81+
name: "health check tracing enabled",
82+
disableHealthCheckTracing: false,
83+
description: "gateway should trace health checks when flag is false",
84+
},
85+
}
86+
87+
for _, tt := range tests {
88+
t.Run(tt.name, func(t *testing.T) {
89+
defer goleak.VerifyNone(t, append(testutil.GoLeakIgnores(), goleak.IgnoreCurrent())...)
90+
91+
ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second)
92+
defer cancel()
93+
94+
// Since we can't use a real gRPC server, we need to mock one
95+
mockServer := setupMockGRPCServer(t, ctx)
96+
defer mockServer.Stop()
97+
98+
defaultProvider := otel.GetTracerProvider()
99+
defer otel.SetTracerProvider(defaultProvider)
100+
101+
provider := sdktrace.NewTracerProvider(
102+
sdktrace.WithSampler(sdktrace.AlwaysSample()),
103+
)
104+
spanrecorder := tracetest.NewSpanRecorder()
105+
provider.RegisterSpanProcessor(spanrecorder)
106+
otel.SetTracerProvider(provider)
107+
108+
gatewayHandler, err := NewHandler(ctx, mockServer.Addr(), "", tt.disableHealthCheckTracing)
109+
require.NoError(t, err)
110+
defer gatewayHandler.Close()
111+
112+
// Test health check endpoint
113+
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
114+
w := httptest.NewRecorder()
115+
gatewayHandler.ServeHTTP(w, req)
116+
require.Equal(t, http.StatusOK, w.Code)
117+
118+
healthSpans := spanrecorder.Ended()
119+
spanrecorder.Reset()
120+
121+
// Test regular endpoint
122+
req = httptest.NewRequest(http.MethodGet, "/openapi.json", nil)
123+
w = httptest.NewRecorder()
124+
gatewayHandler.ServeHTTP(w, req)
125+
require.Equal(t, http.StatusOK, w.Code)
126+
127+
regularSpans := spanrecorder.Ended()
128+
129+
if tt.disableHealthCheckTracing {
130+
require.Len(t, healthSpans, 0, "Health check should not create spans when tracing is disabled")
131+
132+
healthFound := false
133+
for _, span := range healthSpans {
134+
if span.Name() == "grpc.health.v1.Health/Check" {
135+
healthFound = true
136+
break
137+
}
138+
}
139+
require.False(t, healthFound, "Unexpectedly found health check span when tracing is disabled")
140+
141+
gatewayFound := false
142+
for _, span := range healthSpans {
143+
if span.Name() == "gateway" {
144+
gatewayFound = true
145+
break
146+
}
147+
}
148+
require.False(t, gatewayFound, "Unexpectedly found gateway span for health check when tracing is disabled")
149+
} else {
150+
require.True(t, len(healthSpans) > 0, "Health check should create spans when tracing is enabled")
151+
152+
healthFound := false
153+
for _, span := range healthSpans {
154+
if span.Name() == "grpc.health.v1.Health/Check" {
155+
healthFound = true
156+
break
157+
}
158+
}
159+
require.True(t, healthFound, "Expected to find health check span when tracing is enabled")
160+
}
161+
162+
require.True(t, len(regularSpans) > 0, "Regular endpoints should create spans")
163+
regularFound := false
164+
for _, span := range regularSpans {
165+
if span.Name() == "gateway" {
166+
regularFound = true
167+
break
168+
}
169+
}
170+
require.True(t, regularFound, "Expected to find gateway span for regular endpoint")
171+
172+
})
173+
}
174+
}
175+
176+
type mockGRPCServer struct {
177+
server *grpc.Server
178+
listener net.Listener
179+
addr string
180+
}
181+
182+
func (m *mockGRPCServer) Addr() string {
183+
return m.addr
184+
}
185+
186+
func (m *mockGRPCServer) Stop() {
187+
if m.server != nil {
188+
m.server.Stop()
189+
}
190+
if m.listener != nil {
191+
m.listener.Close()
192+
}
193+
}
194+
195+
func setupMockGRPCServer(t *testing.T, ctx context.Context) *mockGRPCServer {
196+
listener, err := net.Listen("tcp", "127.0.0.1:0")
197+
require.NoError(t, err)
198+
199+
server := grpc.NewServer()
200+
201+
healthServer := health.NewServer()
202+
healthpb.RegisterHealthServer(server, healthServer)
203+
204+
go func() {
205+
if err := server.Serve(listener); err != nil {
206+
t.Logf("Mock gRPC server error: %v", err)
207+
}
208+
}()
209+
210+
return &mockGRPCServer{
211+
server: server,
212+
listener: listener,
213+
addr: listener.Addr().String(),
214+
}
215+
}

pkg/cmd/serve.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ func RegisterServeFlags(cmd *cobra.Command, config *server.Config) error {
184184
otel := cobraotel.New("spicedb")
185185
otel.RegisterFlags(observabilityFlags)
186186
runtime.RegisterFlags(observabilityFlags)
187+
observabilityFlags.BoolVar(&config.DisableHealthCheckOTelTracingMiddleware, "otel-disable-healthcheck-tracing", false, "disables telemetry tracing for health check API calls to reduce trace volume")
187188

188189
metricsFlags := nfs.FlagSet(BoldBlue("Metrics Server"))
189190
// Flags for metrics

0 commit comments

Comments
 (0)