Skip to content

Commit dcc38fb

Browse files
committed
Added a flag to disable tracing of health check requests
1 parent 7b70ff2 commit dcc38fb

File tree

10 files changed

+427
-67
lines changed

10 files changed

+427
-67
lines changed

internal/gateway/gateway.go

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"google.golang.org/grpc/credentials/insecure"
1919
healthpb "google.golang.org/grpc/health/grpc_health_v1"
2020
"google.golang.org/grpc/metadata"
21+
"google.golang.org/grpc/stats"
2122

2223
"github.com/authzed/authzed-go/proto"
2324
v1 "github.com/authzed/authzed-go/proto/authzed/api/v1"
@@ -26,6 +27,11 @@ import (
2627
"github.com/authzed/spicedb/internal/grpchelpers"
2728
)
2829

30+
const (
31+
healthCheckRoute = "/grpc.health.v1.Health/Check"
32+
healthCheckHTTPPath = "/healthz"
33+
)
34+
2935
var histogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
3036
Namespace: "spicedb",
3137
Subsystem: "rest_gateway",
@@ -35,14 +41,23 @@ var histogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
3541

3642
// NewHandler creates an REST gateway HTTP CloserHandler with the provided upstream
3743
// configuration.
38-
func NewHandler(ctx context.Context, upstreamAddr, upstreamTLSCertPath string) (*CloserHandler, error) {
44+
func NewHandler(ctx context.Context, upstreamAddr, upstreamTLSCertPath string, disableHealthCheckTracing bool) (*CloserHandler, error) {
3945
if upstreamAddr == "" {
4046
return nil, fmt.Errorf("upstreamAddr must not be empty")
4147
}
4248

43-
opts := []grpc.DialOption{
44-
grpc.WithStatsHandler(otelgrpc.NewClientHandler()),
49+
var opts []grpc.DialOption
50+
51+
if disableHealthCheckTracing {
52+
opts = append(opts, grpc.WithStatsHandler(otelgrpc.NewClientHandler(
53+
otelgrpc.WithFilter(func(info *stats.RPCTagInfo) bool {
54+
return info.FullMethodName != healthCheckRoute
55+
}),
56+
)))
57+
} else {
58+
opts = append(opts, grpc.WithStatsHandler(otelgrpc.NewClientHandler()))
4559
}
60+
4661
if upstreamTLSCertPath == "" {
4762
opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials()))
4863
} else {
@@ -85,7 +100,15 @@ func NewHandler(ctx context.Context, upstreamAddr, upstreamTLSCertPath string) (
85100
}))
86101
mux.Handle("/", gwMux)
87102

88-
finalHandler := promhttp.InstrumentHandlerDuration(histogram, otelhttp.NewHandler(mux, "gateway"))
103+
var finalHandler http.Handler
104+
if disableHealthCheckTracing {
105+
// Only apply tracing to non-health check endpoints
106+
finalHandler = promhttp.InstrumentHandlerDuration(histogram,
107+
skipHealthCheckTracing(mux, otelhttp.NewHandler(mux, "gateway")))
108+
} else {
109+
finalHandler = promhttp.InstrumentHandlerDuration(histogram, otelhttp.NewHandler(mux, "gateway"))
110+
}
111+
89112
return newCloserHandler(finalHandler, schemaConn, permissionsConn, watchConn, healthConn, experimentalConn), nil
90113
}
91114

@@ -143,6 +166,19 @@ func registerHandler(ctx context.Context, mux *runtime.ServeMux, endpoint string
143166
return conn, nil
144167
}
145168

169+
// skipHealthCheckTracing creates an HTTP handler that skips OpenTelemetry tracing for health check endpoints
170+
func skipHealthCheckTracing(directHandler http.Handler, tracedHandler http.Handler) http.Handler {
171+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
172+
if r.URL.Path == healthCheckHTTPPath {
173+
// Skip tracing for health check endpoints
174+
directHandler.ServeHTTP(w, r)
175+
} else {
176+
// Apply OpenTelemetry tracing for all other endpoints
177+
tracedHandler.ServeHTTP(w, r)
178+
}
179+
})
180+
}
181+
146182
var defaultOtelOpts = []otelgrpc.Option{
147183
otelgrpc.WithPropagators(otel.GetTextMapPropagator()),
148184
otelgrpc.WithTracerProvider(otel.GetTracerProvider()),

internal/gateway/gateway_test.go

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
11
package gateway
22

33
import (
4+
"context"
5+
"net"
46
"net/http"
7+
"net/http/httptest"
58
"testing"
9+
"time"
610

711
"github.com/stretchr/testify/require"
812
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
913
"go.opentelemetry.io/otel"
1014
"go.opentelemetry.io/otel/propagation"
15+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
16+
"go.opentelemetry.io/otel/sdk/trace/tracetest"
1117
"go.opentelemetry.io/otel/trace"
1218
"go.uber.org/goleak"
19+
"google.golang.org/grpc"
20+
"google.golang.org/grpc/health"
21+
healthpb "google.golang.org/grpc/health/grpc_health_v1"
1322

1423
"github.com/authzed/spicedb/pkg/testutil"
1524
)
@@ -48,11 +57,158 @@ func TestOtelForwarding(t *testing.T) {
4857
func TestCloseConnections(t *testing.T) {
4958
defer goleak.VerifyNone(t, append(testutil.GoLeakIgnores(), goleak.IgnoreCurrent())...)
5059

51-
gatewayHandler, err := NewHandler(t.Context(), "192.0.2.0:4321", "")
60+
gatewayHandler, err := NewHandler(t.Context(), "192.0.2.0:4321", "", false)
5261
require.NoError(t, err)
5362
// 4 conns for permission+schema+watch+experimental services, 1 for health check
5463
require.Len(t, gatewayHandler.closers, 5)
5564

5665
// if connections are not closed, goleak would detect it
5766
require.NoError(t, gatewayHandler.Close())
5867
}
68+
69+
func TestGatewayHealthCheckTracing(t *testing.T) {
70+
tests := []struct {
71+
name string
72+
disableHealthCheckTracing bool
73+
description string
74+
}{
75+
{
76+
name: "health check tracing disabled",
77+
disableHealthCheckTracing: true,
78+
description: "gateway should skip tracing for health checks when flag is true",
79+
},
80+
{
81+
name: "health check tracing enabled",
82+
disableHealthCheckTracing: false,
83+
description: "gateway should trace health checks when flag is false",
84+
},
85+
}
86+
87+
for _, tt := range tests {
88+
t.Run(tt.name, func(t *testing.T) {
89+
defer goleak.VerifyNone(t, append(testutil.GoLeakIgnores(), goleak.IgnoreCurrent())...)
90+
91+
ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second)
92+
defer cancel()
93+
94+
// Since we can't use a real gRPC server, we need to mock one
95+
mockServer := setupMockGRPCServer(t, ctx)
96+
defer mockServer.Stop()
97+
98+
defaultProvider := otel.GetTracerProvider()
99+
defer otel.SetTracerProvider(defaultProvider)
100+
101+
provider := sdktrace.NewTracerProvider(
102+
sdktrace.WithSampler(sdktrace.AlwaysSample()),
103+
)
104+
spanrecorder := tracetest.NewSpanRecorder()
105+
provider.RegisterSpanProcessor(spanrecorder)
106+
otel.SetTracerProvider(provider)
107+
108+
gatewayHandler, err := NewHandler(ctx, mockServer.Addr(), "", tt.disableHealthCheckTracing)
109+
require.NoError(t, err)
110+
defer gatewayHandler.Close()
111+
112+
// Test health check endpoint
113+
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
114+
w := httptest.NewRecorder()
115+
gatewayHandler.ServeHTTP(w, req)
116+
require.Equal(t, http.StatusOK, w.Code)
117+
118+
healthSpans := spanrecorder.Ended()
119+
spanrecorder.Reset()
120+
121+
// Test regular endpoint
122+
req = httptest.NewRequest(http.MethodGet, "/openapi.json", nil)
123+
w = httptest.NewRecorder()
124+
gatewayHandler.ServeHTTP(w, req)
125+
require.Equal(t, http.StatusOK, w.Code)
126+
127+
regularSpans := spanrecorder.Ended()
128+
129+
if tt.disableHealthCheckTracing {
130+
require.Len(t, healthSpans, 0, "Health check should not create spans when tracing is disabled")
131+
132+
healthFound := false
133+
for _, span := range healthSpans {
134+
if span.Name() == "grpc.health.v1.Health/Check" {
135+
healthFound = true
136+
break
137+
}
138+
}
139+
require.False(t, healthFound, "Unexpectedly found health check span when tracing is disabled")
140+
141+
gatewayFound := false
142+
for _, span := range healthSpans {
143+
if span.Name() == "gateway" {
144+
gatewayFound = true
145+
break
146+
}
147+
}
148+
require.False(t, gatewayFound, "Unexpectedly found gateway span for health check when tracing is disabled")
149+
} else {
150+
require.True(t, len(healthSpans) > 0, "Health check should create spans when tracing is enabled")
151+
152+
healthFound := false
153+
for _, span := range healthSpans {
154+
if span.Name() == "grpc.health.v1.Health/Check" {
155+
healthFound = true
156+
break
157+
}
158+
}
159+
require.True(t, healthFound, "Expected to find health check span when tracing is enabled")
160+
}
161+
162+
require.True(t, len(regularSpans) > 0, "Regular endpoints should create spans")
163+
regularFound := false
164+
for _, span := range regularSpans {
165+
if span.Name() == "gateway" {
166+
regularFound = true
167+
break
168+
}
169+
}
170+
require.True(t, regularFound, "Expected to find gateway span for regular endpoint")
171+
})
172+
}
173+
}
174+
175+
type mockGRPCServer struct {
176+
server *grpc.Server
177+
listener net.Listener
178+
addr string
179+
}
180+
181+
func (m *mockGRPCServer) Addr() string {
182+
return m.addr
183+
}
184+
185+
func (m *mockGRPCServer) Stop() {
186+
if m.server != nil {
187+
m.server.Stop()
188+
}
189+
if m.listener != nil {
190+
m.listener.Close()
191+
}
192+
}
193+
194+
func setupMockGRPCServer(t *testing.T, ctx context.Context) *mockGRPCServer {
195+
listener, err := net.Listen("tcp", "127.0.0.1:0")
196+
require.NoError(t, err)
197+
198+
server := grpc.NewServer()
199+
200+
healthServer := health.NewServer()
201+
healthpb.RegisterHealthServer(server, healthServer)
202+
203+
go func() {
204+
if err := server.Serve(listener); err != nil {
205+
t.Logf("Mock gRPC server error: %v", err)
206+
}
207+
}()
208+
209+
return &mockGRPCServer{
210+
server: server,
211+
listener: listener,
212+
addr: listener.Addr().String(),
213+
}
214+
}

pkg/cmd/serve.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ func RegisterServeFlags(cmd *cobra.Command, config *server.Config) error {
180180
// NOTE: cobraotel.New takes service name as an arg rather than command name.
181181
otel := cobraotel.New("spicedb")
182182
otel.RegisterFlags(tracingFlags)
183+
tracingFlags.BoolVar(&config.DisableHealthCheckOTelTracingMiddleware, "otel-disable-healthcheck-tracing", false, "disables telemetry tracing for health check API calls to reduce trace volume")
183184

184185
loggingFlagSet := nfs.FlagSet(BoldBlue("Logging"))
185186
loggingFlagSet.BoolVar(&config.EnableRequestLogs, "grpc-log-requests-enabled", false, "enable logging of API request payloads")

0 commit comments

Comments
 (0)