Skip to content
This repository was archived by the owner on Apr 2, 2024. It is now read-only.

Commit 3b9edc6

Browse files
committed
Aggregate metrics at global level to avoid legend pollution in dashboards
Signed-off-by: Arunprasad Rajkumar <[email protected]>
1 parent 19dc42d commit 3b9edc6

File tree

2 files changed

+33
-32
lines changed

2 files changed

+33
-32
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ We use the following categories for changes:
3131
- In order to reduce the overall load on the system, some internal database
3232
metrics won't be collected as often as they used to. None of the affected
3333
metrics is expected to change faster than its new collection interval [#1793]
34+
- Aggregate metrics at global level to avoid legend pollution in dashboards [#1800]
3435

3536
### Fixed
3637

docs/mixin/dashboards/promscale.json

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@
657657
"uid": "${datasource}"
658658
},
659659
"exemplar": true,
660-
"expr": "histogram_quantile(0.5, rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval]))",
660+
"expr": "histogram_quantile(0.5, sum(rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
661661
"interval": "2m",
662662
"legendFormat": "p50",
663663
"range": true,
@@ -669,7 +669,7 @@
669669
"uid": "${datasource}"
670670
},
671671
"exemplar": true,
672-
"expr": "histogram_quantile(0.90, rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval]))",
672+
"expr": "histogram_quantile(0.9, sum(rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
673673
"hide": false,
674674
"interval": "2m",
675675
"legendFormat": "p90",
@@ -682,7 +682,7 @@
682682
"uid": "${datasource}"
683683
},
684684
"exemplar": true,
685-
"expr": "histogram_quantile(0.95, rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval]))",
685+
"expr": "histogram_quantile(0.95, sum(rate(promscale_ingest_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
686686
"hide": false,
687687
"interval": "2m",
688688
"legendFormat": "p95",
@@ -794,7 +794,7 @@
794794
},
795795
"editorMode": "code",
796796
"exemplar": true,
797-
"expr": "rate(grpc_server_msg_received_total{grpc_method=~\"(WriteSpan|WriteSpanStream|Export)\",namespace=~\"$namespace\"}[$__rate_interval])",
797+
"expr": "sum(rate(grpc_server_msg_received_total{grpc_method=~\"(WriteSpan|WriteSpanStream|Export)\",namespace=~\"$namespace\"}[$__rate_interval])) by(grpc_service)",
798798
"interval": "2m",
799799
"legendFormat": "{{ grpc_service }}",
800800
"range": true,
@@ -890,7 +890,7 @@
890890
"uid": "${datasource}"
891891
},
892892
"exemplar": true,
893-
"expr": "rate(grpc_server_handled_total{grpc_service=~\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_code=~\"Aborted|Unavailable|Internal|Unknown|Unimplemented|DataLoss\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])",
893+
"expr": "sum(rate(grpc_server_handled_total{grpc_service=~\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_code=~\"Aborted|Unavailable|Internal|Unknown|Unimplemented|DataLoss\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(grpc_code)",
894894
"interval": "2m",
895895
"legendFormat": "{{ grpc_code }}",
896896
"range": true,
@@ -986,7 +986,7 @@
986986
"uid": "${datasource}"
987987
},
988988
"exemplar": true,
989-
"expr": "histogram_quantile(0.5, rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval]))",
989+
"expr": "histogram_quantile(0.5, sum(rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
990990
"interval": "2m",
991991
"legendFormat": "p50 {{ type }}",
992992
"range": true,
@@ -998,7 +998,7 @@
998998
"uid": "${datasource}"
999999
},
10001000
"exemplar": true,
1001-
"expr": "histogram_quantile(0.9, rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval]))",
1001+
"expr": "histogram_quantile(0.9, sum(rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
10021002
"hide": false,
10031003
"interval": "2m",
10041004
"legendFormat": "p90 {{ type }}",
@@ -1011,7 +1011,7 @@
10111011
"uid": "${datasource}"
10121012
},
10131013
"exemplar": true,
1014-
"expr": "histogram_quantile(0.95, rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval]))",
1014+
"expr": "histogram_quantile(0.95, sum(rate(grpc_server_handling_seconds_bucket{grpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\",grpc_method=~\"Export\",namespace=~\"$namespace\"}[$__rate_interval])) by(le))",
10151015
"hide": false,
10161016
"interval": "2m",
10171017
"legendFormat": "p95 {{ type }}",
@@ -1640,7 +1640,7 @@
16401640
},
16411641
"editorMode": "code",
16421642
"exemplar": true,
1643-
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_expired_count{namespace=~\"$namespace\"})",
1643+
"expr": "max(promscale_sql_database_chunks_metrics_expired_count{namespace=~\"$namespace\"})",
16441644
"interval": "",
16451645
"legendFormat": "metrics-expired",
16461646
"range": true,
@@ -1653,7 +1653,7 @@
16531653
},
16541654
"editorMode": "code",
16551655
"exemplar": true,
1656-
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_uncompressed_count{namespace=~\"$namespace\"})",
1656+
"expr": "max(promscale_sql_database_chunks_metrics_uncompressed_count{namespace=~\"$namespace\"})",
16571657
"hide": false,
16581658
"interval": "",
16591659
"legendFormat": "metrics-uncompressed",
@@ -1667,7 +1667,7 @@
16671667
},
16681668
"editorMode": "code",
16691669
"exemplar": true,
1670-
"expr": "max by (job, instance) (promscale_sql_database_chunks_traces_expired_count{namespace=~\"$namespace\"})",
1670+
"expr": "max(promscale_sql_database_chunks_traces_expired_count{namespace=~\"$namespace\"})",
16711671
"hide": false,
16721672
"interval": "",
16731673
"legendFormat": "traces-expired",
@@ -1681,7 +1681,7 @@
16811681
},
16821682
"editorMode": "code",
16831683
"exemplar": true,
1684-
"expr": "max by (job, instance) (promscale_sql_database_chunks_traces_uncompressed_count{namespace=~\"$namespace\"})",
1684+
"expr": "max(promscale_sql_database_chunks_traces_uncompressed_count{namespace=~\"$namespace\"})",
16851685
"hide": false,
16861686
"interval": "",
16871687
"legendFormat": "traces-uncompressed",
@@ -1695,7 +1695,7 @@
16951695
},
16961696
"editorMode": "code",
16971697
"exemplar": true,
1698-
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_delayed_compression_count{namespace=~\"$namespace\"})",
1698+
"expr": "max(promscale_sql_database_chunks_metrics_delayed_compression_count{namespace=~\"$namespace\"})",
16991699
"hide": false,
17001700
"interval": "",
17011701
"legendFormat": "metrics-compression-delayed",
@@ -1949,7 +1949,7 @@
19491949
"uid": "${datasource}"
19501950
},
19511951
"exemplar": true,
1952-
"expr": "histogram_quantile(0.5, rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
1952+
"expr": "histogram_quantile(0.5, sum(rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
19531953
"interval": "2m",
19541954
"legendFormat": "p50 - {{ method }}",
19551955
"range": true,
@@ -1961,7 +1961,7 @@
19611961
"uid": "${datasource}"
19621962
},
19631963
"exemplar": true,
1964-
"expr": "histogram_quantile(0.9, rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
1964+
"expr": "histogram_quantile(0.9, sum(rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
19651965
"hide": false,
19661966
"interval": "2m",
19671967
"legendFormat": "p90 - {{ method }}",
@@ -1974,7 +1974,7 @@
19741974
"uid": "${datasource}"
19751975
},
19761976
"exemplar": true,
1977-
"expr": "histogram_quantile(0.95, rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
1977+
"expr": "histogram_quantile(0.95, sum(rate(promscale_database_requests_duration_seconds_bucket{method=~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
19781978
"hide": false,
19791979
"interval": "2m",
19801980
"legendFormat": "p95 - {{ method }}",
@@ -2070,7 +2070,7 @@
20702070
"uid": "${datasource}"
20712071
},
20722072
"exemplar": true,
2073-
"expr": "histogram_quantile(0.5, rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
2073+
"expr": "histogram_quantile(0.5, sum(rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
20742074
"interval": "",
20752075
"legendFormat": "p50 - {{ method }}",
20762076
"refId": "A"
@@ -2081,7 +2081,7 @@
20812081
"uid": "${datasource}"
20822082
},
20832083
"exemplar": true,
2084-
"expr": "histogram_quantile(0.9, rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
2084+
"expr": "histogram_quantile(0.9, sum(rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
20852085
"hide": false,
20862086
"interval": "",
20872087
"legendFormat": "p90 - {{ method }}",
@@ -2093,7 +2093,7 @@
20932093
"uid": "${datasource}"
20942094
},
20952095
"exemplar": true,
2096-
"expr": "histogram_quantile(0.95, rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval]))",
2096+
"expr": "histogram_quantile(0.95, sum(rate(promscale_database_requests_duration_seconds_bucket{method!~\"query.*\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, method))",
20972097
"hide": false,
20982098
"interval": "",
20992099
"legendFormat": "p95 - {{ method }}",
@@ -2188,7 +2188,7 @@
21882188
"uid": "${datasource}"
21892189
},
21902190
"exemplar": true,
2191-
"expr": "rate(promscale_database_requests_total{namespace=~\"$namespace\"}[$__rate_interval])",
2191+
"expr": "sum(rate(promscale_database_requests_total{namespace=~\"$namespace\"}[$__rate_interval])) by(method)",
21922192
"interval": "2m",
21932193
"legendFormat": "{{ method }}",
21942194
"range": true,
@@ -2745,7 +2745,7 @@
27452745
},
27462746
"editorMode": "code",
27472747
"exemplar": false,
2748-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2748+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
27492749
"interval": "",
27502750
"legendFormat": "metrics-compression-total",
27512751
"range": true,
@@ -2758,7 +2758,7 @@
27582758
},
27592759
"editorMode": "code",
27602760
"exemplar": false,
2761-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2761+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
27622762
"hide": false,
27632763
"interval": "",
27642764
"legendFormat": "metrics-retention-total",
@@ -2772,7 +2772,7 @@
27722772
},
27732773
"editorMode": "code",
27742774
"exemplar": false,
2775-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2775+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
27762776
"hide": false,
27772777
"interval": "",
27782778
"legendFormat": "traces-retention-total",
@@ -2786,7 +2786,7 @@
27862786
},
27872787
"editorMode": "code",
27882788
"exemplar": false,
2789-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2789+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
27902790
"hide": false,
27912791
"interval": "",
27922792
"legendFormat": "metrics-compression-failures",
@@ -2800,7 +2800,7 @@
28002800
},
28012801
"editorMode": "code",
28022802
"exemplar": false,
2803-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2803+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_metrics_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
28042804
"hide": false,
28052805
"interval": "",
28062806
"legendFormat": "metrics-retention-failures",
@@ -2814,7 +2814,7 @@
28142814
},
28152815
"editorMode": "code",
28162816
"exemplar": false,
2817-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2817+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_retention_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
28182818
"hide": false,
28192819
"interval": "",
28202820
"legendFormat": "traces-retention-failures",
@@ -2828,7 +2828,7 @@
28282828
},
28292829
"editorMode": "code",
28302830
"exemplar": false,
2831-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2831+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_compression_failures_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
28322832
"hide": false,
28332833
"interval": "",
28342834
"legendFormat": "traces-compression-failures",
@@ -2842,7 +2842,7 @@
28422842
},
28432843
"editorMode": "code",
28442844
"exemplar": false,
2845-
"expr": "max by (job, instance)(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
2845+
"expr": "max(rate(promscale_sql_database_worker_maintenance_job_traces_retention_total_runs_count{namespace=~\"$namespace\"}[$__rate_interval])) * 60",
28462846
"hide": false,
28472847
"interval": "",
28482848
"legendFormat": "traces-compression-total",
@@ -3337,7 +3337,7 @@
33373337
},
33383338
"editorMode": "code",
33393339
"exemplar": true,
3340-
"expr": "rate(promscale_cache_query_hits_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval]) / rate(promscale_cache_queries_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])",
3340+
"expr": "sum(rate(promscale_cache_query_hits_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(name) / sum(rate(promscale_cache_queries_total{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(name)",
33413341
"interval": "2m",
33423342
"legendFormat": "{{ name }}",
33433343
"range": true,
@@ -3434,7 +3434,7 @@
34343434
},
34353435
"editorMode": "code",
34363436
"exemplar": true,
3437-
"expr": "histogram_quantile(0.5, rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval]))",
3437+
"expr": "histogram_quantile(0.5, sum(rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, name))",
34383438
"interval": "2m",
34393439
"legendFormat": "p50 - {{ name }}",
34403440
"range": true,
@@ -3447,7 +3447,7 @@
34473447
},
34483448
"editorMode": "code",
34493449
"exemplar": true,
3450-
"expr": "histogram_quantile(0.90, rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval]))",
3450+
"expr": "histogram_quantile(0.90, sum(rate(promscale_cache_query_latency_microseconds_bucket{type=~\"$datatype\",namespace=~\"$namespace\"}[$__rate_interval])) by(le, name))",
34513451
"hide": false,
34523452
"interval": "2m",
34533453
"legendFormat": "p90 - {{ name }}",
@@ -3639,7 +3639,7 @@
36393639
},
36403640
"editorMode": "code",
36413641
"exemplar": true,
3642-
"expr": "promscale_cache_elements{namespace=~\"$namespace\",type=~\"$datatype\"} / promscale_cache_capacity_elements{namespace=~\"$namespace\",type=~\"$datatype\"}",
3642+
"expr": "sum(promscale_cache_elements{namespace=~\"$namespace\",type=~\"$datatype\"}) by(name) / sum(promscale_cache_capacity_elements{namespace=~\"$namespace\",type=~\"$datatype\"}) by(name)",
36433643
"interval": "",
36443644
"legendFormat": "{{ name }}",
36453645
"range": true,

0 commit comments

Comments
 (0)