Skip to content
This repository was archived by the owner on Apr 2, 2024. It is now read-only.

Commit 96f1626

Browse files
committed
Dashboard improvements and adding alerts.
1 parent 11e3e1f commit 96f1626

File tree

2 files changed

+55
-7
lines changed

2 files changed

+55
-7
lines changed

docs/mixin/alerts/alerts.yaml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,53 @@ groups:
261261
summary: Promscale maintenance jobs taking too long to complete.
262262
description: "Promscale Database is taking {{ $value }} seconds to respond to Promscale's requests."
263263
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
264+
- alert: PromscaleMaintenanceJobNotKeepingup
265+
expr: |
266+
(
267+
(
268+
min_over_time(promscale_sql_database_chunks_metrics_uncompressed_count[1h]) > 10
269+
)
270+
and
271+
(
272+
delta(promscale_sql_database_chunks_metrics_uncompressed_count[10m]) > 0
273+
)
274+
)
275+
or
276+
(
277+
(
278+
min_over_time(promscale_sql_database_chunks_metrics_expired_count[1h]) > 10
279+
)
280+
and
281+
(
282+
delta(promscale_sql_database_chunks_metrics_expired_count[10m]) > 0
283+
)
284+
)
285+
or
286+
(
287+
(
288+
min_over_time(promscale_sql_database_chunks_traces_uncompressed_count[1h]) > 10
289+
)
290+
and
291+
(
292+
delta(promscale_sql_database_chunks_traces_uncompressed_count[10m]) > 0
293+
)
294+
)
295+
or
296+
(
297+
(
298+
min_over_time(promscale_sql_database_chunks_traces_expired_count[1h]) > 10
299+
)
300+
and
301+
(
302+
delta(promscale_sql_database_chunks_traces_expired_count[10m]) > 0
303+
)
304+
)
305+
labels:
306+
severity: warning
307+
annotations:
308+
summary: Promscale maintenance jobs are not keeping up.
309+
description: "The amount of work for the promscale maintenance jobs is not decreasing for long time."
310+
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
264311
- alert: PromscaleMaintenanceJobFailures
265312
expr: promscale_sql_database_worker_maintenance_job_failed == 1
266313
labels:

docs/mixin/dashboards/promscale.json

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@
277277
"refId": "A"
278278
}
279279
],
280-
"title": "Samples Ingest Rate",
280+
"title": "Ingest Rates",
281281
"type": "timeseries"
282282
},
283283
{
@@ -1619,13 +1619,13 @@
16191619
"exemplar": true,
16201620
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_delayed_compression_count)",
16211621
"interval": "",
1622-
"legendFormat": "metrics-expired",
1622+
"legendFormat": "metrics-compression-delayed",
16231623
"range": true,
16241624
"refId": "E",
16251625
"hide": false
16261626
}
16271627
],
1628-
"title": "Maintenance job backlogs",
1628+
"title": "The number of chunks to be processed by maintenance jobs",
16291629
"type": "timeseries"
16301630
},
16311631
{
@@ -1648,7 +1648,7 @@
16481648
}
16491649
]
16501650
},
1651-
"unit": "dateTimeAsIso"
1651+
"unit": "s"
16521652
},
16531653
"overrides": []
16541654
},
@@ -1680,14 +1680,15 @@
16801680
"type": "prometheus",
16811681
"uid": "${DS_PROMETHEUS}"
16821682
},
1683-
"exemplar": true,
1684-
"expr": "max(promscale_sql_database_worker_maintenance_job_start_timestamp_seconds) * 1000",
1683+
"exemplar": false,
1684+
"expr": "time() - max(promscale_sql_database_worker_maintenance_job_start_timestamp_seconds)",
16851685
"interval": "",
16861686
"legendFormat": "",
16871687
"refId": "A"
16881688
}
16891689
],
1690-
"title": "Last DB maintenance job start",
1690+
"title": "Time since the last job start",
1691+
"description": "Time since the last DB maintenance job started",
16911692
"type": "stat"
16921693
},
16931694
{

0 commit comments

Comments
 (0)