Skip to content
This repository was archived by the owner on Apr 2, 2024. It is now read-only.

Commit 5c5dd5a

Browse files
committed
Fix Jaeger trace querying by converted Jaeger tags
When translating Jaeger traces to OTEL format, some Jaeger tags get converted to the to OTEL specific fields and gets removed from the tag map. This change takes this conversion into account when querying traces using these tags.
1 parent 6dc1a15 commit 5c5dd5a

File tree

11 files changed

+357
-82
lines changed

11 files changed

+357
-82
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ We use the following categories for changes:
1919
- `-enable-feature=promql-per-step-stats` feature for statistics in PromQL evaluation
2020
- Add `readinessProbe` in helm chart [#1266]
2121

22+
### Fixed
23+
- Fix trace querying by status code tag [#1384]
24+
2225
## [0.11.0] - 2022-05-11
2326

2427
## Added

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ require (
4040
github.com/thanos-io/thanos v0.25.2
4141
go.opentelemetry.io/collector/model v0.49.0
4242
go.opentelemetry.io/collector/pdata v0.49.0
43+
go.opentelemetry.io/collector/semconv v0.52.0 // indirect
4344
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.32.0
4445
go.opentelemetry.io/otel v1.7.0
4546
go.opentelemetry.io/otel/exporters/jaeger v1.6.3

go.sum

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2074,6 +2074,12 @@ go.opentelemetry.io/collector/model v0.49.0 h1:mbUSNgpaBE3GWmzGsRb5t0xILpXIVYv7s
20742074
go.opentelemetry.io/collector/model v0.49.0/go.mod h1:nOYQv9KoFPs6ihJwOi24qB209EOhS9HkwhGj54YiEAw=
20752075
go.opentelemetry.io/collector/pdata v0.49.0 h1:aYj5rOlRC0x7lGXbc185LMsMMoY/pjOTXr5s1O2SzXs=
20762076
go.opentelemetry.io/collector/pdata v0.49.0/go.mod h1:YwmKuiFhNgtmhRdpi8Q8FAWPa0AwJTCSlssSsAtuRcY=
2077+
go.opentelemetry.io/collector/semconv v0.52.0 h1:ogYkQ6WL01xQ4GGSwWQejNTQwy/Pwcd6jCKFLSd7svA=
2078+
go.opentelemetry.io/collector/semconv v0.52.0/go.mod h1:SxK0rUnUP7YeDakexzbE/vhimTOHwE6m/4aKKd9e27Q=
2079+
go.opentelemetry.io/contrib v0.20.0 h1:ubFQUn0VCZ0gPwIoJfBJVpeBlyRMxu8Mm/huKWYd9p0=
2080+
go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc=
2081+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0/go.mod h1:oVGt1LRbBOBq1A5BQLlUg9UaU/54aiHw8cgjV3aWZ/E=
2082+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.28.0/go.mod h1:vEhqr0m4eTc+DWxfsXoXue2GBgV2uUwVznkGIHW/e5w=
20772083
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.31.0/go.mod h1:SY9qHHUES6W3oZnO1H2W8NvsSovIoXRg/A1AH9px8+I=
20782084
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.31.0/go.mod h1:PFmBsWbldL1kiWZk9+0LBZz2brhByaGsvp6pRICMlPE=
20792085
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.32.0 h1:mac9BKRqwaX6zxHPDe3pvmWpwuuIM0vuXv2juCnQevE=

pkg/jaeger/query/get_operations.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func getOperations(ctx context.Context, conn pgxconn.PgxConn, query spanstore.Op
4141
args := []interface{}{query.ServiceName}
4242
kindQual := "TRUE"
4343
if len(query.SpanKind) > 0 {
44-
pgEnum, err := getPGKindEnum(query.SpanKind)
44+
pgEnum, err := spanKindStringToInternal(query.SpanKind)
4545
if err != nil {
4646
return operationsResp, fmt.Errorf("converting query enum: %w", err)
4747
}

pkg/jaeger/query/trace_query.go

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// This file and its contents are licensed under the Apache License 2.0.
22
// Please see the included NOTICE for copyright information and
33
// LICENSE for a copy of the license.
4-
54
package query
65

76
import (
@@ -12,6 +11,9 @@ import (
1211
"github.com/jackc/pgtype"
1312
"github.com/jaegertracing/jaeger/model"
1413
"github.com/jaegertracing/jaeger/storage/spanstore"
14+
"go.opentelemetry.io/collector/model/pdata"
15+
"go.opentelemetry.io/collector/pdata/ptrace"
16+
semconv "go.opentelemetry.io/collector/semconv/v1.6.1"
1517
)
1618

1719
const (
@@ -106,6 +108,13 @@ const (
106108
%s
107109
) AS complete_trace ON (TRUE)
108110
`
111+
112+
// Keys used to represent OTLP constructs from Jaeger tags which are then dropped from the tag map.
113+
TagError = "error"
114+
TagHostname = "hostname"
115+
TagJaegerVersion = "jaeger.version"
116+
TagSpanKind = "span.kind"
117+
TagW3CTraceState = "w3c.tracestate"
109118
)
110119

111120
type Builder struct {
@@ -148,8 +157,7 @@ func (b *Builder) findTracesQuery(q *spanstore.TraceQueryParameters) (string, []
148157
}
149158

150159
func (b *Builder) findTraceIDsQuery(q *spanstore.TraceQueryParameters) (string, []interface{}) {
151-
subquery, params := b.buildTraceIDSubquery(q)
152-
return subquery, params
160+
return b.buildTraceIDSubquery(q)
153161
}
154162

155163
func (b *Builder) getTraceQuery(traceID model.TraceID) (string, []interface{}, error) {
@@ -192,6 +200,50 @@ func (b *Builder) buildTraceIDSubquery(q *spanstore.TraceQueryParameters) (strin
192200
operation_clauses = append(operation_clauses, qual)
193201
}
194202

203+
if len(q.Tags) > 0 {
204+
for k, v := range q.Tags {
205+
switch k {
206+
case TagError:
207+
var sc pdata.StatusCode
208+
switch v {
209+
case "true":
210+
sc = ptrace.StatusCodeError
211+
case "false":
212+
sc = ptrace.StatusCodeUnset
213+
default:
214+
// Ignore tag if we don't match boolean.
215+
continue
216+
}
217+
params = append(params, statusCodeToInternal(sc))
218+
qual := fmt.Sprintf(`s.status_code = $%d`, len(params))
219+
clauses = append(clauses, qual)
220+
case TagHostname:
221+
// https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/a13030ca6208fa7d4477f0805f3b95e271f32b24/pkg/translator/jaeger/jaegerproto_to_traces.go#L109
222+
params = append(params, semconv.AttributeHostName, "\""+v+"\"")
223+
qual := fmt.Sprintf(`_ps_trace.tag_map_denormalize(s.span_tags)->$%d = $%d`, len(params)-1, len(params))
224+
clauses = append(clauses, qual)
225+
case TagJaegerVersion:
226+
// https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/a13030ca6208fa7d4477f0805f3b95e271f32b24/pkg/translator/jaeger/jaegerproto_to_traces.go#L119
227+
params = append(params, "opencensus.exporterversion", "\"Jaeger-"+v+"\"")
228+
qual := fmt.Sprintf(`_ps_trace.tag_map_denormalize(s.span_tags)->$%d = $%d`, len(params)-1, len(params))
229+
clauses = append(clauses, qual)
230+
case TagSpanKind:
231+
params = append(params, jSpanKindToInternal(v))
232+
qual := fmt.Sprintf("span_kind = $%d", len(params))
233+
operation_clauses = append(operation_clauses, qual)
234+
case TagW3CTraceState:
235+
params = append(params, v)
236+
qual := fmt.Sprintf(`s.trace_state = $%d`, len(params))
237+
clauses = append(clauses, qual)
238+
default:
239+
//TODO make sure this is optimized correctly
240+
params = append(params, k, "\""+v+"\"")
241+
qual := fmt.Sprintf(`_ps_trace.tag_map_denormalize(s.span_tags)->$%d = $%d`, len(params)-1, len(params))
242+
clauses = append(clauses, qual)
243+
}
244+
}
245+
}
246+
195247
if len(operation_clauses) > 0 {
196248
qual := fmt.Sprintf(`
197249
s.operation_id IN (
@@ -205,15 +257,6 @@ func (b *Builder) buildTraceIDSubquery(q *spanstore.TraceQueryParameters) (strin
205257
clauses = append(clauses, qual)
206258
}
207259

208-
if len(q.Tags) > 0 {
209-
for k, v := range q.Tags {
210-
//TODO make sure this is optimized correctly
211-
params = append(params, k, "\""+v+"\"")
212-
qual := fmt.Sprintf(`_ps_trace.tag_map_denormalize(s.span_tags)->$%d = $%d`, len(params)-1, len(params))
213-
clauses = append(clauses, qual)
214-
}
215-
}
216-
217260
//todo check the inclusive semantics here
218261
var defaultTime time.Time
219262
if q.StartTimeMin != defaultTime {

pkg/jaeger/query/trace_scan.go

Lines changed: 2 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,7 @@ func populateSpan(
163163
ref.SetName(dbResult.spanName)
164164

165165
if dbResult.kind.Status == pgtype.Present {
166-
kind, err := makeKind(dbResult.kind.String)
167-
if err != nil {
168-
return err
169-
}
170-
ref.SetKind(kind)
166+
ref.SetKind(internalToSpanKind(dbResult.kind.String))
171167
}
172168

173169
ref.SetStartTimestamp(pdata.NewTimestampFromTime(dbResult.startTime))
@@ -202,11 +198,7 @@ func populateSpan(
202198

203199
func setStatus(ref ptrace.Span, dbRes *spanDBResult) error {
204200
if dbRes.statusCode != "" {
205-
code, err := makeStatusCode(dbRes.statusCode)
206-
if err != nil {
207-
return err
208-
}
209-
ref.Status().SetCode(code)
201+
ref.Status().SetCode(internalToStatusCode(dbRes.statusCode))
210202
}
211203
if dbRes.statusMessage.Status != pgtype.Null {
212204
message := dbRes.statusMessage.String
@@ -300,54 +292,3 @@ func makeSpanId(s *int64) pdata.SpanID {
300292
b8 := trace.Int64ToByteArray(*s)
301293
return pdata.NewSpanID(b8)
302294
}
303-
304-
func getPGKindEnum(jaegerKind string) (string, error) {
305-
switch jaegerKind {
306-
case ptrace.SpanKindClient.String():
307-
return "client", nil
308-
case ptrace.SpanKindServer.String():
309-
return "server", nil
310-
case ptrace.SpanKindInternal.String():
311-
return "internal", nil
312-
case ptrace.SpanKindConsumer.String():
313-
return "consumer", nil
314-
case ptrace.SpanKindProducer.String():
315-
return "producer", nil
316-
case ptrace.SpanKindUnspecified.String():
317-
return "unspecified", nil
318-
default:
319-
return "", fmt.Errorf("unknown span kind: %v", jaegerKind)
320-
}
321-
}
322-
323-
func makeKind(s string) (pdata.SpanKind, error) {
324-
switch s {
325-
case "client":
326-
return ptrace.SpanKindClient, nil
327-
case "server":
328-
return ptrace.SpanKindServer, nil
329-
case "internal":
330-
return ptrace.SpanKindInternal, nil
331-
case "consumer":
332-
return ptrace.SpanKindConsumer, nil
333-
case "producer":
334-
return ptrace.SpanKindProducer, nil
335-
case "unspecified":
336-
return ptrace.SpanKindUnspecified, nil
337-
default:
338-
return ptrace.SpanKindUnspecified, fmt.Errorf("unknown span kind: %s", s)
339-
}
340-
}
341-
342-
func makeStatusCode(s string) (pdata.StatusCode, error) {
343-
switch s {
344-
case "ok":
345-
return ptrace.StatusCodeOk, nil
346-
case "error":
347-
return ptrace.StatusCodeError, nil
348-
case "unset":
349-
return ptrace.StatusCodeUnset, nil
350-
default:
351-
return ptrace.StatusCodeUnset, fmt.Errorf("unknown status-code kind: %s", s)
352-
}
353-
}

pkg/jaeger/query/translation.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// This file and its contents are licensed under the Apache License 2.0.
2+
// Please see the included NOTICE for copyright information and
3+
// LICENSE for a copy of the license.
4+
package query
5+
6+
import (
7+
"fmt"
8+
9+
"go.opentelemetry.io/collector/model/pdata"
10+
"go.opentelemetry.io/collector/pdata/ptrace"
11+
)
12+
13+
var (
14+
// Map of status codes to internal DB values.
15+
statusCodeInternalValue = map[string]pdata.StatusCode{
16+
"ok": ptrace.StatusCodeOk,
17+
"error": ptrace.StatusCodeError,
18+
"unset": ptrace.StatusCodeUnset,
19+
}
20+
// Map of span kinds to internal DB values.
21+
spanKindInternalValue = map[string]pdata.SpanKind{
22+
"client": ptrace.SpanKindClient,
23+
"server": ptrace.SpanKindServer,
24+
"internal": ptrace.SpanKindInternal,
25+
"consumer": ptrace.SpanKindConsumer,
26+
"producer": ptrace.SpanKindProducer,
27+
"unspecified": ptrace.SpanKindUnspecified,
28+
}
29+
// Map of Jaeger span kind tag strings to internal DB values.
30+
// This is kept explicit even though the values are identical.
31+
jSpanKindToInternalValue = map[string]string{
32+
"client": "client",
33+
"server": "server",
34+
"internal": "internal",
35+
"consumer": "consumer",
36+
"producer": "producer",
37+
}
38+
)
39+
40+
func internalToStatusCode(s string) pdata.StatusCode {
41+
v, ok := statusCodeInternalValue[s]
42+
if !ok {
43+
panic(fmt.Sprintf("status code %s does not have internal representation", s))
44+
}
45+
return v
46+
}
47+
48+
func statusCodeToInternal(sc pdata.StatusCode) string {
49+
for k, v := range statusCodeInternalValue {
50+
if sc == v {
51+
return k
52+
}
53+
}
54+
panic(fmt.Sprintf("status code %s does not have internal representation", sc.String()))
55+
}
56+
57+
func internalToSpanKind(s string) pdata.SpanKind {
58+
v, ok := spanKindInternalValue[s]
59+
if !ok {
60+
panic(fmt.Sprintf("span kind %s does not have internal representation", s))
61+
}
62+
return v
63+
}
64+
65+
func spanKindStringToInternal(kind string) (string, error) {
66+
for k, v := range spanKindInternalValue {
67+
if v.String() == kind {
68+
return k, nil
69+
}
70+
}
71+
return "", fmt.Errorf("unknown span kind: %s", kind)
72+
}
73+
74+
// JSpanKindToInternal translates Jaeger span kind tag value to internal enum value used for storage.
75+
// Corresponds to:
76+
// https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/a13030ca6208fa7d4477f0805f3b95e271f32b24/pkg/translator/jaeger/jaegerproto_to_traces.go#L303
77+
func jSpanKindToInternal(spanKind string) string {
78+
if v, ok := jSpanKindToInternalValue[spanKind]; ok {
79+
return v
80+
}
81+
return "unspecified"
82+
}

pkg/tests/end_to_end_tests/dataset_traces_test.go

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,13 @@ var (
3535

3636
service0 = "service-name-0"
3737

38-
spanAttributes = pdata.NewAttributeMapFromMap(map[string]pcommon.Value{"span-attr": pcommon.NewValueString("span-attr-val")})
38+
spanAttributes = pdata.NewAttributeMapFromMap(
39+
map[string]pcommon.Value{
40+
"span-attr": pcommon.NewValueString("span-attr-val"),
41+
"host.name": pcommon.NewValueString("hostname1"),
42+
"opencensus.exporterversion": pcommon.NewValueString("Jaeger-1.0.0"),
43+
},
44+
)
3945
spanEventAttributes = pdata.NewAttributeMapFromMap(map[string]pcommon.Value{"span-event-attr": pcommon.NewValueString("span-event-attr-val")})
4046
spanLinkAttributes = pdata.NewAttributeMapFromMap(map[string]pcommon.Value{"span-link-attr": pcommon.NewValueString("span-link-attr-val")})
4147
)
@@ -44,6 +50,18 @@ func getTraceId(bSlice [16]byte) string {
4450
return hex.EncodeToString(bSlice[:])
4551
}
4652

53+
// generateBrokenTestTraces switches start and end times for every span to check if
54+
// we handle this broken situation correctly and reverse the times while ingesting.
55+
func generateBrokenTestTraces() pdata.Traces {
56+
data := generateTestTrace()
57+
startTime := data.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0).StartTimestamp()
58+
endTime := data.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0).EndTimestamp()
59+
60+
data.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0).SetStartTimestamp(endTime)
61+
data.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0).SetEndTimestamp(startTime)
62+
return data
63+
}
64+
4765
func generateTestTrace() pdata.Traces {
4866
rand.Seed(1)
4967
spanCount := 4
@@ -139,11 +157,11 @@ func fillSpanOne(span pdata.Span) {
139157
span.SetTraceID(pdata.NewTraceID(traceID1))
140158
span.SetSpanID(pdata.NewSpanID(generateRandSpanID()))
141159
span.SetName("operationA")
142-
// test the logic that detects and fixes end < start
143-
span.SetStartTimestamp(testSpanEndTimestamp)
144-
span.SetEndTimestamp(testSpanStartTimestamp)
160+
span.SetStartTimestamp(testSpanStartTimestamp)
161+
span.SetEndTimestamp(testSpanEndTimestamp)
145162
span.SetDroppedAttributesCount(1)
146163
span.SetTraceState("span-trace-state1")
164+
span.SetKind(ptrace.SpanKindClient)
147165
initSpanAttributes(span.Attributes())
148166
evs := span.Events()
149167
ev0 := evs.AppendEmpty()

pkg/tests/end_to_end_tests/generate_jaeger_response_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func insertDataIntoJaeger(endpoint string, data pdata.Traces) error {
7070

7171
client := jaegerproto.NewCollectorServiceClient(conn)
7272

73-
batches, err := jaegertranslator.InternalTracesToJaegerProto(data)
73+
batches, err := jaegertranslator.ProtoFromTraces(data)
7474
if err != nil {
7575
panic(err)
7676
}

0 commit comments

Comments
 (0)