Skip to content
This repository was archived by the owner on Apr 2, 2024. It is now read-only.

Commit a685504

Browse files
committed
Fix double counting while populating promscale_sql_database_chunks_count metric
Problem statment: `PromscaleCompressionLow` alert fires regardless of a good compression ratio. Root cause: SQL query which populates `promscale_sql_database_chunks_count` is a sum of the below metrics, 1) Uncompressed Chunks 2) Proxy Chunks which points to Compressed Chunks 3) Compressed Chunks However the total chunk count should be just (1) + (3), because (2) is already pointing to (3) and which leads to double counting. Solution: Fix the SQL query to consider entries which has `compressed_chunk_id` as null and this will be true for both (1) and (3). Signed-off-by: Arunprasad Rajkumar <[email protected]>
1 parent 3723dc7 commit a685504

File tree

4 files changed

+71
-6
lines changed

4 files changed

+71
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ We use the following categories for changes:
2525
### Fixed
2626
- Refine check for existence of `prom_schema_migrations` table [#1452]
2727
- Do not run rules-manager in `-db.read-only` mode [#1451]
28+
- Fix underlying metric(`promscale_sql_database_chunks_count`) which leads to false positive firing of PromscaleCompressionLow alert [#1494]
2829

2930
## [0.12.1] - 2022-06-29
3031

docs/runbooks/PromscaleCompressionLow.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ High disk usage by Promscale database
1212
1. Open Grafana and navigate to Promscale dashboard
1313
2. Go to Database section and see `Compressesd chunks ratio`. If you see a ratio of < 10% then compression is not adequate in your system
1414
3. Open psql
15-
4. Check number of uncompressed chunks: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id=null;`
16-
5. Check number of maintenancec jobs: `select count(*) from timescaledb_information.jobs where proc_name = 'execute_maintenance_job'`
17-
6. Run the following debugging query:
15+
4. Check total number of chunks: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id is null;`
16+
5. Check total number of compressed chunks: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id is not null;`
17+
6. Check number of maintenancec jobs: `select count(*) from timescaledb_information.jobs where proc_name = 'execute_maintenance_job'`
18+
7. Run the following debugging query:
1819

1920
```postgresql
2021
SELECT

pkg/pgmodel/metrics/database/metrics.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ var metrics = []metricQueryWrap{
6767
Help: "Total number of chunks in TimescaleDB currently.",
6868
},
6969
),
70-
query: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false`,
70+
// Compressed_chunk_id is null for both yet to be compressed and already compressed chunks.
71+
query: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id is null`,
7172
},
7273
{
7374
metric: prometheus.NewGauge(
@@ -78,7 +79,8 @@ var metrics = []metricQueryWrap{
7879
Help: "Total number of chunks created since creation of database.",
7980
},
8081
),
81-
query: `select count(*)::bigint from _timescaledb_catalog.chunk`,
82+
// Compressed_chunk_id is null for both yet to be compressed and already compressed chunks.
83+
query: `select count(*)::bigint from _timescaledb_catalog.chunk where compressed_chunk_id is null`,
8284
},
8385
{
8486
metric: prometheus.NewGauge(

pkg/tests/end_to_end_tests/database_metrics_test.go

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ func TestDatabaseMetrics(t *testing.T) {
3434
require.Equal(t, float64(0), numMaintenanceJobs)
3535
chunksCreated := getMetricValue(t, "chunks_created")
3636
require.Equal(t, float64(0), chunksCreated)
37+
chunksCount := getMetricValue(t, "chunks_count")
38+
require.Equal(t, float64(0), chunksCount)
39+
chunksCompressedCount := getMetricValue(t, "chunks_compressed_count")
40+
require.Equal(t, float64(0), chunksCompressedCount)
3741

3842
// Update the metrics.
3943
require.NoError(t, dbMetrics.Update())
@@ -45,6 +49,10 @@ func TestDatabaseMetrics(t *testing.T) {
4549
require.Equal(t, float64(2), numMaintenanceJobs)
4650
chunksCreated = getMetricValue(t, "chunks_created")
4751
require.Equal(t, float64(0), chunksCreated)
52+
chunksCount = getMetricValue(t, "chunks_count")
53+
require.Equal(t, float64(0), chunksCount)
54+
chunksCompressedCount = getMetricValue(t, "chunks_compressed_count")
55+
require.Equal(t, float64(0), chunksCompressedCount)
4856

4957
// Ingest some data and then see check the metrics to ensure proper updating.
5058
ingestor, err := ingstr.NewPgxIngestorForTests(pgxconn.NewPgxConn(db), nil)
@@ -57,7 +65,60 @@ func TestDatabaseMetrics(t *testing.T) {
5765
require.NoError(t, dbMetrics.Update())
5866

5967
chunksCreated = getMetricValue(t, "chunks_created")
60-
require.Equal(t, chunksCreated, float64(3))
68+
require.Equal(t, float64(3), chunksCreated)
69+
chunksCount = getMetricValue(t, "chunks_count")
70+
require.Equal(t, float64(3), chunksCount)
71+
chunksCompressedCount = getMetricValue(t, "chunks_compressed_count")
72+
require.Equal(t, float64(0), chunksCompressedCount)
73+
})
74+
}
75+
76+
func TestDatabaseMetricsAfterCompression(t *testing.T) {
77+
if !*useTimescaleDB {
78+
t.Skip("test meaningless without TimescaleDB")
79+
}
80+
ts := generateSmallTimeseries()
81+
withDB(t, *testDatabase, func(db *pgxpool.Pool, t testing.TB) {
82+
ingestor, err := ingstr.NewPgxIngestorForTests(pgxconn.NewPgxConn(db), nil)
83+
require.NoError(t, err)
84+
defer ingestor.Close()
85+
_, _, err = ingestor.Ingest(context.Background(), newWriteRequestWithTs(copyMetrics(ts)))
86+
require.NoError(t, err)
87+
err = ingestor.CompleteMetricCreation(context.Background())
88+
if err != nil {
89+
t.Fatal(err)
90+
}
91+
92+
ctx, cancel := context.WithCancel(context.Background())
93+
defer cancel()
94+
95+
dbMetrics := database.NewEngine(ctx, pgxconn.NewPgxConn(db))
96+
97+
// Update the metrics.
98+
require.NoError(t, dbMetrics.Update())
99+
// Get metrics before compressing the firstMetric metric chunk.
100+
compressionStatus := getMetricValue(t, "compression_status")
101+
require.Equal(t, float64(1), compressionStatus)
102+
numMaintenanceJobs := getMetricValue(t, "worker_maintenance_job")
103+
require.Equal(t, float64(2), numMaintenanceJobs)
104+
chunksCreated := getMetricValue(t, "chunks_created")
105+
require.Equal(t, float64(2), chunksCreated)
106+
chunksCount := getMetricValue(t, "chunks_count")
107+
require.Equal(t, float64(2), chunksCount)
108+
chunksCompressedCount := getMetricValue(t, "chunks_compressed_count")
109+
require.Equal(t, float64(0), chunksCompressedCount)
110+
111+
_, err = db.Exec(context.Background(), `SELECT public.compress_chunk(i) from public.show_chunks('prom_data."firstMetric"') i;`)
112+
require.NoError(t, err)
113+
114+
// Update the metrics after compression.
115+
require.NoError(t, dbMetrics.Update())
116+
chunksCreated = getMetricValue(t, "chunks_created")
117+
require.Equal(t, float64(2), chunksCreated)
118+
chunksCount = getMetricValue(t, "chunks_count")
119+
require.Equal(t, float64(2), chunksCount)
120+
chunksCompressedCount = getMetricValue(t, "chunks_compressed_count")
121+
require.Equal(t, float64(1), chunksCompressedCount)
61122
})
62123
}
63124

0 commit comments

Comments
 (0)