Skip to content
This repository was archived by the owner on Apr 2, 2024. It is now read-only.

Commit 840b394

Browse files
committed
docs/mixin: split query error high alert by handler and use different threshold for query_range
Signed-off-by: Paweł Krupa (paulfantom) <[email protected]>
1 parent 97a6c13 commit 840b394

File tree

1 file changed

+25
-9
lines changed

1 file changed

+25
-9
lines changed

docs/mixin/alerts/alerts.yaml

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,25 +104,42 @@ groups:
104104
- alert: PromscaleQueryHighErrorRate
105105
expr: |
106106
(
107-
sum by (job, instance, namespace, type) (
108-
rate(promscale_query_requests_total{code=~"5.."}[5m])
107+
sum by (job, instance, namespace, type, handler) (
108+
rate(promscale_query_requests_total{code=~"5..",handler!="/api/v1/query_range",err!="canceled"}[5m])
109109
)
110110
/
111-
sum by (job, instance, namespace, type) (
112-
rate(promscale_query_requests_total[5m])
111+
sum by (job, instance, namespace, type, handler) (
112+
rate(promscale_query_requests_total{handler!="/api/v1/query_range",err!="canceled"}[5m])
113113
)
114114
) > 0.05
115115
labels:
116116
severity: warning
117117
annotations:
118118
summary: High error rate in querying Promscale.
119-
description: "Evaluating queries via Promscale has {{ $value | humanizePercentage }} error rate."
119+
description: "Evaluating queries via Promscale {{ $labels.handler }} endpoint has {{ $value | humanizePercentage }} error rate."
120+
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
121+
- alert: PromscaleQueryHighErrorRate
122+
expr: |
123+
(
124+
sum by (job, instance, namespace, type, handler) (
125+
rate(promscale_query_requests_total{code=~"5..",handler="/api/v1/query_range",err!="canceled"}[5m])
126+
)
127+
/
128+
sum by (job, instance, namespace, type, handler) (
129+
rate(promscale_query_requests_total{handler="/api/v1/query_range",err!="canceled"}[5m])
130+
)
131+
) > 0.1
132+
labels:
133+
severity: warning
134+
annotations:
135+
summary: High error rate in querying Promscale.
136+
description: "Evaluating queries via Promscale {{ $labels.handler }} endpoint has {{ $value | humanizePercentage }} error rate."
120137
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
121138
- alert: PromscaleQueryHighErrorRate
122139
expr: |
123140
(
124141
sum by (job, instance, namespace, type) (
125-
rate(promscale_query_requests_total{code=~"5.."}[5m])
142+
rate(promscale_query_requests_total{code=~"5..",err!="canceled"}[5m])
126143
)
127144
/
128145
sum by (job, instance, namespace, type) (
@@ -345,8 +362,8 @@ groups:
345362
- alert: PromscalePostgreSQLSharedBuffersLow
346363
expr: |
347364
(
348-
((promscale_sql_database_open_chunks_total_table_size + promscale_sql_database_open_chunks_total_index_size)
349-
/
365+
((promscale_sql_database_open_chunks_total_table_size + promscale_sql_database_open_chunks_total_index_size)
366+
/
350367
promscale_sql_database_shared_buffers_size)
351368
> 1 )
352369
for: 10m
@@ -356,4 +373,3 @@ groups:
356373
summary: Promscale database performance will be affected.
357374
description: "Currently open chunks are {{ $value | humanizePercentage }} of PostgreSQL shared_buffers. This will impact database performance."
358375
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscalePostgreSQLSharedBuffersLow.md
359-

0 commit comments

Comments
 (0)