This repository was archived by the owner on Apr 2, 2024. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +55
-7
lines changed Expand file tree Collapse file tree 2 files changed +55
-7
lines changed Original file line number Diff line number Diff line change @@ -261,6 +261,53 @@ groups:
261
261
summary : Promscale maintenance jobs taking too long to complete.
262
262
description : " Promscale Database is taking {{ $value }} seconds to respond to Promscale's requests."
263
263
runbook_url : https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
264
+ - alert : PromscaleMaintenanceJobNotKeepingup
265
+ expr : |
266
+ (
267
+ (
268
+ min_over_time(promscale_sql_database_chunks_metrics_uncompressed_count[1h]) > 10
269
+ )
270
+ and
271
+ (
272
+ delta(promscale_sql_database_chunks_metrics_uncompressed_count[10m]) > 0
273
+ )
274
+ )
275
+ or
276
+ (
277
+ (
278
+ min_over_time(promscale_sql_database_chunks_metrics_expired_count[1h]) > 10
279
+ )
280
+ and
281
+ (
282
+ delta(promscale_sql_database_chunks_metrics_expired_count[10m]) > 0
283
+ )
284
+ )
285
+ or
286
+ (
287
+ (
288
+ min_over_time(promscale_sql_database_chunks_traces_uncompressed_count[1h]) > 10
289
+ )
290
+ and
291
+ (
292
+ delta(promscale_sql_database_chunks_traces_uncompressed_count[10m]) > 0
293
+ )
294
+ )
295
+ or
296
+ (
297
+ (
298
+ min_over_time(promscale_sql_database_chunks_traces_expired_count[1h]) > 10
299
+ )
300
+ and
301
+ (
302
+ delta(promscale_sql_database_chunks_traces_expired_count[10m]) > 0
303
+ )
304
+ )
305
+ labels :
306
+ severity : warning
307
+ annotations :
308
+ summary : Promscale maintenance jobs are not keeping up.
309
+ description : " The amount of work for the promscale maintenance jobs is not decreasing for long time."
310
+ runbook_url : https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
264
311
- alert : PromscaleMaintenanceJobFailures
265
312
expr : promscale_sql_database_worker_maintenance_job_failed == 1
266
313
labels :
Original file line number Diff line number Diff line change 277
277
"refId" : " A"
278
278
}
279
279
],
280
- "title" : " Samples Ingest Rate " ,
280
+ "title" : " Ingest Rates " ,
281
281
"type" : " timeseries"
282
282
},
283
283
{
1619
1619
"exemplar" : true ,
1620
1620
"expr" : " max by (job, instance) (promscale_sql_database_chunks_metrics_delayed_compression_count)" ,
1621
1621
"interval" : " " ,
1622
- "legendFormat" : " metrics-expired " ,
1622
+ "legendFormat" : " metrics-compression-delayed " ,
1623
1623
"range" : true ,
1624
1624
"refId" : " E" ,
1625
1625
"hide" : false
1626
1626
}
1627
1627
],
1628
- "title" : " Maintenance job backlogs " ,
1628
+ "title" : " The number of chunks to be processed by maintenance jobs " ,
1629
1629
"type" : " timeseries"
1630
1630
},
1631
1631
{
1648
1648
}
1649
1649
]
1650
1650
},
1651
- "unit" : " dateTimeAsIso "
1651
+ "unit" : " s "
1652
1652
},
1653
1653
"overrides" : []
1654
1654
},
1680
1680
"type" : " prometheus" ,
1681
1681
"uid" : " ${DS_PROMETHEUS}"
1682
1682
},
1683
- "exemplar" : true ,
1684
- "expr" : " max(promscale_sql_database_worker_maintenance_job_start_timestamp_seconds) * 1000 " ,
1683
+ "exemplar" : false ,
1684
+ "expr" : " time() - max(promscale_sql_database_worker_maintenance_job_start_timestamp_seconds) " ,
1685
1685
"interval" : " " ,
1686
1686
"legendFormat" : " " ,
1687
1687
"refId" : " A"
1688
1688
}
1689
1689
],
1690
- "title" : " Last DB maintenance job start" ,
1690
+ "title" : " Time since the last job start" ,
1691
+ "description" : " Time since the last DB maintenance job started" ,
1691
1692
"type" : " stat"
1692
1693
},
1693
1694
{
You can’t perform that action at this time.
0 commit comments