Skip to content

Commit 85ce900

Browse files
DNVindhyaejona86
authored andcommitted
gcp-observability, census: add trace information to logs (#9963)
This commit adds trace information (TraceId, SpanId and TraceSampled) fields to LogEntry, when both logging and tracing are enabled in gcp-observability. For server-side logs, span information was readily available using Span.getContext() propagated via `io.grpc.Context`. Similar approach is not feasible for client-side architecture. Client SpanContext which has all the information required to be added to logs is propagated to the logging interceptor via `io.grpc.CallOptions`.
1 parent bb39ca3 commit 85ce900

File tree

17 files changed

+686
-125
lines changed

17 files changed

+686
-125
lines changed

census/src/main/java/io/grpc/census/CensusTracingModule.java

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package io.grpc.census;
1818

1919
import static com.google.common.base.Preconditions.checkNotNull;
20+
import static io.grpc.census.internal.ObservabilityCensusConstants.CLIENT_TRACE_SPAN_CONTEXT_KEY;
2021

2122
import com.google.common.annotations.VisibleForTesting;
2223
import io.grpc.Attributes;
@@ -124,8 +125,8 @@ public SpanContext parseBytes(byte[] serialized) {
124125
*/
125126
@VisibleForTesting
126127
CallAttemptsTracerFactory newClientCallTracer(
127-
@Nullable Span parentSpan, MethodDescriptor<?, ?> method) {
128-
return new CallAttemptsTracerFactory(parentSpan, method);
128+
@Nullable Span clientSpan, MethodDescriptor<?, ?> method) {
129+
return new CallAttemptsTracerFactory(clientSpan, method);
129130
}
130131

131132
/**
@@ -248,17 +249,11 @@ final class CallAttemptsTracerFactory extends ClientStreamTracer.Factory {
248249
private final Span span;
249250
private final String fullMethodName;
250251

251-
CallAttemptsTracerFactory(@Nullable Span parentSpan, MethodDescriptor<?, ?> method) {
252+
CallAttemptsTracerFactory(@Nullable Span clientSpan, MethodDescriptor<?, ?> method) {
252253
checkNotNull(method, "method");
253254
this.isSampledToLocalTracing = method.isSampledToLocalTracing();
254255
this.fullMethodName = method.getFullMethodName();
255-
this.span =
256-
censusTracer
257-
.spanBuilderWithExplicitParent(
258-
generateTraceSpanName(false, fullMethodName),
259-
parentSpan)
260-
.setRecordEvents(true)
261-
.startSpan();
256+
this.span = clientSpan;
262257
}
263258

264259
@Override
@@ -461,13 +456,20 @@ public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
461456
// Safe usage of the unsafe trace API because CONTEXT_SPAN_KEY.get() returns the same value
462457
// as Tracer.getCurrentSpan() except when no value available when the return value is null
463458
// for the direct access and BlankSpan when Tracer API is used.
464-
final CallAttemptsTracerFactory tracerFactory =
465-
newClientCallTracer(
466-
io.opencensus.trace.unsafe.ContextUtils.getValue(Context.current()), method);
459+
Span parentSpan = io.opencensus.trace.unsafe.ContextUtils.getValue(Context.current());
460+
Span clientSpan = censusTracer
461+
.spanBuilderWithExplicitParent(
462+
generateTraceSpanName(false, method.getFullMethodName()),
463+
parentSpan)
464+
.setRecordEvents(true)
465+
.startSpan();
466+
467+
final CallAttemptsTracerFactory tracerFactory = newClientCallTracer(clientSpan, method);
467468
ClientCall<ReqT, RespT> call =
468469
next.newCall(
469470
method,
470-
callOptions.withStreamTracerFactory(tracerFactory));
471+
callOptions.withStreamTracerFactory(tracerFactory)
472+
.withOption(CLIENT_TRACE_SPAN_CONTEXT_KEY, clientSpan.getContext()));
471473
return new SimpleForwardingClientCall<ReqT, RespT>(call) {
472474
@Override
473475
public void start(Listener<RespT> responseListener, Metadata headers) {

census/src/main/java/io/grpc/census/internal/ObservabilityCensusConstants.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@
2626
import static io.opencensus.contrib.grpc.metrics.RpcMeasureConstants.GRPC_SERVER_STATUS;
2727

2828
import com.google.common.annotations.VisibleForTesting;
29+
import io.grpc.CallOptions;
2930
import io.opencensus.contrib.grpc.metrics.RpcViewConstants;
3031
import io.opencensus.stats.Aggregation;
3132
import io.opencensus.stats.Measure;
3233
import io.opencensus.stats.Measure.MeasureDouble;
3334
import io.opencensus.stats.View;
35+
import io.opencensus.trace.SpanContext;
3436
import java.util.Arrays;
3537

3638
// TODO(dnvindhya): Remove metric and view definitions from this class once it is moved to
@@ -42,6 +44,9 @@
4244
@VisibleForTesting
4345
public final class ObservabilityCensusConstants {
4446

47+
public static CallOptions.Key<SpanContext> CLIENT_TRACE_SPAN_CONTEXT_KEY
48+
= CallOptions.Key.createWithDefault("Client span context for tracing", SpanContext.INVALID);
49+
4550
static final Aggregation AGGREGATION_WITH_BYTES_HISTOGRAM =
4651
RpcViewConstants.GRPC_CLIENT_SENT_BYTES_PER_RPC_VIEW.getAggregation();
4752

census/src/test/java/io/grpc/census/CensusModulesTest.java

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import static io.grpc.census.CensusStatsModule.CallAttemptsTracerFactory.RETRY_DELAY_PER_CALL;
2323
import static io.grpc.census.CensusStatsModule.CallAttemptsTracerFactory.TRANSPARENT_RETRIES_PER_CALL;
2424
import static io.grpc.census.internal.ObservabilityCensusConstants.API_LATENCY_PER_CALL;
25+
import static io.grpc.census.internal.ObservabilityCensusConstants.CLIENT_TRACE_SPAN_CONTEXT_KEY;
2526
import static java.util.concurrent.TimeUnit.MILLISECONDS;
2627
import static org.junit.Assert.assertEquals;
2728
import static org.junit.Assert.assertFalse;
@@ -317,6 +318,10 @@ public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
317318
capturedCallOptions.get().getStreamTracerFactories().get(1)
318319
instanceof CensusStatsModule.CallAttemptsTracerFactory);
319320

321+
// The interceptor adds client SpanContext to CallOptions
322+
assertTrue(capturedCallOptions.get().getOption(CLIENT_TRACE_SPAN_CONTEXT_KEY).isValid());
323+
assertTrue(capturedCallOptions.get().getOption(CLIENT_TRACE_SPAN_CONTEXT_KEY) != null);
324+
320325
// Make the call
321326
Metadata headers = new Metadata();
322327
call.start(mockClientCallListener, headers);
@@ -738,12 +743,10 @@ private void assertPerCallMetrics(double expectedLatencyValue) {
738743
@Test
739744
public void clientBasicTracingDefaultSpan() {
740745
CallAttemptsTracerFactory callTracer =
741-
censusTracing.newClientCallTracer(null, method);
746+
censusTracing.newClientCallTracer(spyClientSpan, method);
742747
Metadata headers = new Metadata();
743748
ClientStreamTracer clientStreamTracer = callTracer.newClientStreamTracer(STREAM_INFO, headers);
744749
clientStreamTracer.streamCreated(Attributes.EMPTY, headers);
745-
verify(tracer).spanBuilderWithExplicitParent(
746-
eq("Sent.package1.service2.method3"), ArgumentMatchers.<Span>isNull());
747750
verify(tracer).spanBuilderWithExplicitParent(
748751
eq("Attempt.package1.service2.method3"), eq(spyClientSpan));
749752
verify(spyClientSpan, never()).end(any(EndSpanOptions.class));
@@ -797,7 +800,7 @@ public void clientBasicTracingDefaultSpan() {
797800
@Test
798801
public void clientTracingSampledToLocalSpanStore() {
799802
CallAttemptsTracerFactory callTracer =
800-
censusTracing.newClientCallTracer(null, sampledMethod);
803+
censusTracing.newClientCallTracer(spyClientSpan, sampledMethod);
801804
callTracer.callEnded(Status.OK);
802805

803806
verify(spyClientSpan).end(
@@ -867,10 +870,7 @@ record = statsRecorder.pollRecord();
867870
@Test
868871
public void clientStreamNeverCreatedStillRecordTracing() {
869872
CallAttemptsTracerFactory callTracer =
870-
censusTracing.newClientCallTracer(fakeClientParentSpan, method);
871-
verify(tracer).spanBuilderWithExplicitParent(
872-
eq("Sent.package1.service2.method3"), same(fakeClientParentSpan));
873-
verify(spyClientSpanBuilder).setRecordEvents(eq(true));
873+
censusTracing.newClientCallTracer(spyClientSpan, method);
874874

875875
callTracer.callEnded(Status.DEADLINE_EXCEEDED.withDescription("3 seconds"));
876876
verify(spyClientSpan).end(
@@ -1046,18 +1046,15 @@ public void statsHeaderMalformed() {
10461046
@Test
10471047
public void traceHeadersPropagateSpanContext() throws Exception {
10481048
CallAttemptsTracerFactory callTracer =
1049-
censusTracing.newClientCallTracer(fakeClientParentSpan, method);
1049+
censusTracing.newClientCallTracer(spyClientSpan, method);
10501050
Metadata headers = new Metadata();
10511051
ClientStreamTracer streamTracer = callTracer.newClientStreamTracer(STREAM_INFO, headers);
10521052
streamTracer.streamCreated(Attributes.EMPTY, headers);
10531053

10541054
verify(mockTracingPropagationHandler).toByteArray(same(fakeAttemptSpanContext));
10551055
verifyNoMoreInteractions(mockTracingPropagationHandler);
1056-
verify(tracer).spanBuilderWithExplicitParent(
1057-
eq("Sent.package1.service2.method3"), same(fakeClientParentSpan));
10581056
verify(tracer).spanBuilderWithExplicitParent(
10591057
eq("Attempt.package1.service2.method3"), same(spyClientSpan));
1060-
verify(spyClientSpanBuilder).setRecordEvents(eq(true));
10611058
verifyNoMoreInteractions(tracer);
10621059
assertTrue(headers.containsKey(censusTracing.tracingHeader));
10631060

census/src/test/java/io/grpc/census/CensusTracingAnnotationEventTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ public void wrapUp() {
166166
@Test
167167
public void clientBasicTracingUncompressedSizeAnnotation() {
168168
CallAttemptsTracerFactory callTracer =
169-
censusTracing.newClientCallTracer(null, method);
169+
censusTracing.newClientCallTracer(spyClientSpan, method);
170170
Metadata headers = new Metadata();
171171
ClientStreamTracer clientStreamTracer = callTracer.newClientStreamTracer(STREAM_INFO, headers);
172172
clientStreamTracer.streamCreated(Attributes.EMPTY, headers);

gcp-observability/src/main/java/io/grpc/gcp/observability/GcpObservability.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import io.grpc.gcp.observability.interceptors.LogHelper;
3737
import io.grpc.gcp.observability.logging.GcpLogSink;
3838
import io.grpc.gcp.observability.logging.Sink;
39+
import io.grpc.gcp.observability.logging.TraceLoggingHelper;
3940
import io.opencensus.common.Duration;
4041
import io.opencensus.contrib.grpc.metrics.RpcViewConstants;
4142
import io.opencensus.exporter.stats.stackdriver.StackdriverStatsConfiguration;
@@ -58,7 +59,8 @@
5859
@ExperimentalApi("https://github.com/grpc/grpc-java/issues/8869")
5960
public final class GcpObservability implements AutoCloseable {
6061
private static final int METRICS_EXPORT_INTERVAL = 30;
61-
private static final ImmutableSet<String> SERVICES_TO_EXCLUDE = ImmutableSet.of(
62+
@VisibleForTesting
63+
static final ImmutableSet<String> SERVICES_TO_EXCLUDE = ImmutableSet.of(
6264
"google.logging.v2.LoggingServiceV2", "google.monitoring.v3.MetricService",
6365
"google.devtools.cloudtrace.v2.TraceService");
6466
private static GcpObservability instance = null;
@@ -77,9 +79,11 @@ public static synchronized GcpObservability grpcInit() throws IOException {
7779
if (instance == null) {
7880
GlobalLocationTags globalLocationTags = new GlobalLocationTags();
7981
ObservabilityConfigImpl observabilityConfig = ObservabilityConfigImpl.getInstance();
82+
TraceLoggingHelper traceLoggingHelper = new TraceLoggingHelper(
83+
observabilityConfig.getProjectId());
8084
Sink sink = new GcpLogSink(observabilityConfig.getProjectId(),
81-
globalLocationTags.getLocationTags(), observabilityConfig.getCustomTags(),
82-
SERVICES_TO_EXCLUDE);
85+
globalLocationTags.getLocationTags(), observabilityConfig,
86+
SERVICES_TO_EXCLUDE, traceLoggingHelper);
8387
LogHelper helper = new LogHelper(sink);
8488
ConfigFilterHelper configFilterHelper = ConfigFilterHelper.getInstance(observabilityConfig);
8589
instance = grpcInit(sink, observabilityConfig,

gcp-observability/src/main/java/io/grpc/gcp/observability/interceptors/InternalLoggingChannelInterceptor.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
package io.grpc.gcp.observability.interceptors;
1818

19+
import static io.grpc.census.internal.ObservabilityCensusConstants.CLIENT_TRACE_SPAN_CONTEXT_KEY;
20+
1921
import com.google.protobuf.Duration;
2022
import com.google.protobuf.util.Durations;
2123
import io.grpc.CallOptions;
@@ -33,6 +35,7 @@
3335
import io.grpc.gcp.observability.interceptors.ConfigFilterHelper.FilterParams;
3436
import io.grpc.observabilitylog.v1.GrpcLogRecord.EventLogger;
3537
import io.grpc.observabilitylog.v1.GrpcLogRecord.EventType;
38+
import io.opencensus.trace.SpanContext;
3639
import java.util.UUID;
3740
import java.util.concurrent.TimeUnit;
3841
import java.util.concurrent.atomic.AtomicLong;
@@ -92,6 +95,7 @@ public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(MethodDescriptor<ReqT
9295
// Get the stricter deadline to calculate the timeout once the call starts
9396
final Deadline deadline = LogHelper.min(callOptions.getDeadline(),
9497
Context.current().getDeadline());
98+
final SpanContext clientSpanContext = callOptions.getOption(CLIENT_TRACE_SPAN_CONTEXT_KEY);
9599

96100
FilterParams filterParams = filterHelper.logRpcMethod(method.getFullMethodName(), true);
97101
if (!filterParams.log()) {
@@ -122,7 +126,8 @@ public void start(Listener<RespT> responseListener, Metadata headers) {
122126
maxHeaderBytes,
123127
EventLogger.CLIENT,
124128
callId,
125-
null);
129+
null,
130+
clientSpanContext);
126131
} catch (Exception e) {
127132
// Catching generic exceptions instead of specific ones for all the events.
128133
// This way we can catch both expected and unexpected exceptions instead of re-throwing
@@ -148,7 +153,8 @@ public void onMessage(RespT message) {
148153
message,
149154
maxMessageBytes,
150155
EventLogger.CLIENT,
151-
callId);
156+
callId,
157+
clientSpanContext);
152158
} catch (Exception e) {
153159
logger.log(Level.SEVERE, "Unable to log response message", e);
154160
}
@@ -168,7 +174,8 @@ public void onHeaders(Metadata headers) {
168174
maxHeaderBytes,
169175
EventLogger.CLIENT,
170176
callId,
171-
LogHelper.getPeerAddress(getAttributes()));
177+
LogHelper.getPeerAddress(getAttributes()),
178+
clientSpanContext);
172179
} catch (Exception e) {
173180
logger.log(Level.SEVERE, "Unable to log response header", e);
174181
}
@@ -189,7 +196,8 @@ public void onClose(Status status, Metadata trailers) {
189196
maxHeaderBytes,
190197
EventLogger.CLIENT,
191198
callId,
192-
LogHelper.getPeerAddress(getAttributes()));
199+
LogHelper.getPeerAddress(getAttributes()),
200+
clientSpanContext);
193201
} catch (Exception e) {
194202
logger.log(Level.SEVERE, "Unable to log trailer", e);
195203
}
@@ -212,7 +220,8 @@ public void sendMessage(ReqT message) {
212220
message,
213221
maxMessageBytes,
214222
EventLogger.CLIENT,
215-
callId);
223+
callId,
224+
clientSpanContext);
216225
} catch (Exception e) {
217226
logger.log(Level.SEVERE, "Unable to log request message", e);
218227
}
@@ -229,7 +238,8 @@ public void halfClose() {
229238
methodName,
230239
authority,
231240
EventLogger.CLIENT,
232-
callId);
241+
callId,
242+
clientSpanContext);
233243
} catch (Exception e) {
234244
logger.log(Level.SEVERE, "Unable to log half close", e);
235245
}
@@ -246,7 +256,8 @@ public void cancel(String message, Throwable cause) {
246256
methodName,
247257
authority,
248258
EventLogger.CLIENT,
249-
callId);
259+
callId,
260+
clientSpanContext);
250261
} catch (Exception e) {
251262
logger.log(Level.SEVERE, "Unable to log cancel", e);
252263
}

gcp-observability/src/main/java/io/grpc/gcp/observability/interceptors/InternalLoggingServerInterceptor.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
import io.grpc.gcp.observability.interceptors.ConfigFilterHelper.FilterParams;
3232
import io.grpc.observabilitylog.v1.GrpcLogRecord.EventLogger;
3333
import io.grpc.observabilitylog.v1.GrpcLogRecord.EventType;
34+
import io.opencensus.trace.Span;
35+
import io.opencensus.trace.SpanContext;
36+
import io.opencensus.trace.unsafe.ContextHandleUtils;
3437
import java.net.SocketAddress;
3538
import java.util.UUID;
3639
import java.util.concurrent.TimeUnit;
@@ -91,6 +94,8 @@ public <ReqT, RespT> ServerCall.Listener<ReqT> interceptCall(ServerCall<ReqT, Re
9194
Deadline deadline = Context.current().getDeadline();
9295
final Duration timeout = deadline == null ? null
9396
: Durations.fromNanos(deadline.timeRemaining(TimeUnit.NANOSECONDS));
97+
Span span = ContextHandleUtils.getValue(ContextHandleUtils.currentContext());
98+
final SpanContext serverSpanContext = span == null ? SpanContext.INVALID : span.getContext();
9499

95100
FilterParams filterParams =
96101
filterHelper.logRpcMethod(call.getMethodDescriptor().getFullMethodName(), false);
@@ -113,7 +118,8 @@ public <ReqT, RespT> ServerCall.Listener<ReqT> interceptCall(ServerCall<ReqT, Re
113118
maxHeaderBytes,
114119
EventLogger.SERVER,
115120
callId,
116-
peerAddress);
121+
peerAddress,
122+
serverSpanContext);
117123
} catch (Exception e) {
118124
// Catching generic exceptions instead of specific ones for all the events.
119125
// This way we can catch both expected and unexpected exceptions instead of re-throwing
@@ -139,7 +145,8 @@ public void sendHeaders(Metadata headers) {
139145
maxHeaderBytes,
140146
EventLogger.SERVER,
141147
callId,
142-
null);
148+
null,
149+
serverSpanContext);
143150
} catch (Exception e) {
144151
logger.log(Level.SEVERE, "Unable to log response header", e);
145152
}
@@ -160,7 +167,8 @@ public void sendMessage(RespT message) {
160167
message,
161168
maxMessageBytes,
162169
EventLogger.SERVER,
163-
callId);
170+
callId,
171+
serverSpanContext);
164172
} catch (Exception e) {
165173
logger.log(Level.SEVERE, "Unable to log response message", e);
166174
}
@@ -181,7 +189,8 @@ public void close(Status status, Metadata trailers) {
181189
maxHeaderBytes,
182190
EventLogger.SERVER,
183191
callId,
184-
null);
192+
null,
193+
serverSpanContext);
185194
} catch (Exception e) {
186195
logger.log(Level.SEVERE, "Unable to log trailer", e);
187196
}
@@ -206,7 +215,8 @@ public void onMessage(ReqT message) {
206215
message,
207216
maxMessageBytes,
208217
EventLogger.SERVER,
209-
callId);
218+
callId,
219+
serverSpanContext);
210220
} catch (Exception e) {
211221
logger.log(Level.SEVERE, "Unable to log request message", e);
212222
}
@@ -223,7 +233,8 @@ public void onHalfClose() {
223233
methodName,
224234
authority,
225235
EventLogger.SERVER,
226-
callId);
236+
callId,
237+
serverSpanContext);
227238
} catch (Exception e) {
228239
logger.log(Level.SEVERE, "Unable to log half close", e);
229240
}
@@ -240,7 +251,8 @@ public void onCancel() {
240251
methodName,
241252
authority,
242253
EventLogger.SERVER,
243-
callId);
254+
callId,
255+
serverSpanContext);
244256
} catch (Exception e) {
245257
logger.log(Level.SEVERE, "Unable to log cancel", e);
246258
}

0 commit comments

Comments
 (0)