Skip to content

Commit 99a2f6b

Browse files
committed
Fix chunk skipping min/max calculation
When a partially compressed chunk with segmentby column was recompressed the new min/max range would not be calculated correctly and only take the new values into account but not already compressed values.
1 parent 208c306 commit 99a2f6b

File tree

5 files changed

+119
-22
lines changed

5 files changed

+119
-22
lines changed

src/ts_catalog/chunk_column_stats.c

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <access/stratnum.h>
1212
#include <access/tupdesc.h>
1313
#include <catalog/pg_collation.h>
14+
#include <executor/spi.h>
1415
#include <executor/tuptable.h>
1516
#include <funcapi.h>
1617
#include <nodes/makefuncs.h>
@@ -22,6 +23,7 @@
2223
#include <rewrite/rewriteManip.h>
2324
#include <storage/lmgr.h>
2425
#include <storage/lockdefs.h>
26+
#include <utils/datum.h>
2527
#include <utils/syscache.h>
2628

2729
#include "chunk.h"
@@ -781,6 +783,60 @@ ts_chunk_column_stats_lookup(int32 hypertable_id, int32 chunk_id, const char *co
781783
return form_range;
782784
}
783785

786+
static bool
787+
chunk_get_minmax(const Chunk *chunk, Oid col_type, const char *col_name, Datum *minmax)
788+
{
789+
StringInfoData command;
790+
int res;
791+
792+
/* Lock down search_path */
793+
int save_nestlevel = NewGUCNestLevel();
794+
RestrictSearchPath();
795+
796+
initStringInfo(&command);
797+
appendStringInfo(&command,
798+
"SELECT min(%s), max(%s) FROM %s.%s",
799+
quote_identifier(col_name),
800+
quote_identifier(col_name),
801+
quote_identifier(NameStr(chunk->fd.schema_name)),
802+
quote_identifier(NameStr(chunk->fd.table_name)));
803+
804+
if (SPI_connect() != SPI_OK_CONNECT)
805+
elog(ERROR, "could not connect to SPI");
806+
807+
res = SPI_execute(command.data, true /* read_only */, 0 /*count*/);
808+
if (res < 0)
809+
ereport(ERROR,
810+
(errcode(ERRCODE_INTERNAL_ERROR),
811+
(errmsg("could not get the min/max values for column \"%s\" of chunk \"%s.%s\"",
812+
col_name, chunk->fd.schema_name.data,chunk->fd.table_name.data))));
813+
814+
pfree(command.data);
815+
816+
bool isnull_min = false, isnull_max = false, ret = false;
817+
Datum min = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull_min);
818+
Datum max = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull_max);
819+
820+
if (!isnull_min && !isnull_max)
821+
{
822+
bool typbyval;
823+
int16 typlen;
824+
get_typlenbyval(col_type, &typlen, &typbyval);
825+
minmax[0] = datumCopy(min, typbyval, typlen);
826+
minmax[1] = datumCopy(max, typbyval, typlen);
827+
ret = true;
828+
}
829+
830+
/* Reset search path since this can be executed as part of a larger transaction */
831+
AtEOXact_GUC(false, save_nestlevel);
832+
833+
res = SPI_finish();
834+
if (res != SPI_OK_FINISH)
835+
elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res));
836+
837+
return ret;
838+
}
839+
784840
/*
785841
* Update column dimension ranges in the catalog for the
786842
* provided chunk (it's assumed that the chunk is locked
@@ -821,7 +877,7 @@ ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk)
821877
col_type = get_atttype(chunk->table_id, attno);
822878

823879
/* calculate the min/max range for this column on this chunk */
824-
if (ts_chunk_get_minmax(chunk->table_id, col_type, attno, "column range", minmax))
880+
if (chunk_get_minmax(chunk, col_type, col_name, minmax))
825881
{
826882
Form_chunk_column_stats range;
827883
int64 min = ts_time_value_to_internal(minmax[0], col_type);

tsl/src/compression/api.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -502,12 +502,13 @@ compress_chunk_impl(Oid hypertable_relid, Oid chunk_relid)
502502
* In the future, we can look at computing min/max entries in the compressed chunk
503503
* using the batch metadata and then recompute the range to handle DELETE cases.
504504
*/
505-
if (cxt.srcht->range_space)
506-
ts_chunk_column_stats_calculate(cxt.srcht, cxt.srcht_chunk);
507505

508506
cstat = compress_chunk(cxt.srcht_chunk->table_id, compress_ht_chunk->table_id, insert_options);
509507
after_size = ts_relation_size_impl(compress_ht_chunk->table_id);
510508

509+
if (cxt.srcht->range_space)
510+
ts_chunk_column_stats_calculate(cxt.srcht, cxt.srcht_chunk);
511+
511512
if (new_compressed_chunk)
512513
{
513514
compression_chunk_size_catalog_insert(cxt.srcht_chunk->fd.id,

tsl/src/compression/recompress.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -184,22 +184,6 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)
184184
}
185185
}
186186

187-
/*
188-
* Calculate and add the column dimension ranges for the src chunk used by chunk skipping
189-
* feature. This has to be done before the compression. In case of recompression, the logic will
190-
* get the min/max entries for the uncompressed portion and reconcile and update the existing
191-
* entry for ht/chunk/column combination. This case handles:
192-
*
193-
* * INSERTs into uncompressed chunk
194-
* * UPDATEs into uncompressed chunk
195-
*
196-
* In case of DELETEs, the entries won't exist in the uncompressed chunk, but since
197-
* we are deleting, we will stay within the earlier computed max/min range. This
198-
* means that the chunk will not get pruned for a larger range of values. This will
199-
* work ok enough if only a few of the compressed chunks get DELETEs down the line.
200-
* In the future, we can look at computing min/max entries in the compressed chunk
201-
* using the batch metadata and then recompute the range to handle DELETE cases.
202-
*/
203187
Hypertable *ht = ts_hypertable_get_by_id(uncompressed_chunk->fd.hypertable_id);
204188
if (ht->range_space)
205189
ts_chunk_column_stats_calculate(ht, uncompressed_chunk);

tsl/test/expected/chunk_column_stats.out

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ SELECT * from _timescaledb_catalog.chunk_column_stats WHERE chunk_id = :'CHUNK_I
185185
DROP INDEX sense_idx;
186186
-- recompress the partial chunk
187187
SELECT compress_chunk(:'CH_NAME');
188-
WARNING: no index on "sensor_id" found for column range on chunk "_hyper_1_1_chunk"
189188
compress_chunk
190189
----------------------------------------
191190
_timescaledb_internal._hyper_1_1_chunk
@@ -201,6 +200,12 @@ WHERE hypertable_name = 'sample_table' AND chunk_name = :'CH_NAME';
201200
(1 row)
202201

203202
-- The chunk entry should become "valid" again
203+
SELECT min(sensor_id), max(sensor_id) FROM :CH_NAME;
204+
min | max
205+
-----+-----
206+
1 | 8
207+
(1 row)
208+
204209
SELECT * from _timescaledb_catalog.chunk_column_stats WHERE chunk_id = :'CHUNK_ID';
205210
id | hypertable_id | chunk_id | column_name | range_start | range_end | valid
206211
----+---------------+----------+-------------+-------------+-----------+-------
@@ -632,7 +637,6 @@ SELECT * from _timescaledb_catalog.chunk_column_stats;
632637

633638
-- Compressing a chunk again should calculate proper ranges
634639
SELECT compress_chunk(:'CH_NAME');
635-
WARNING: no index on "sensor_id" found for column range on chunk "_hyper_1_1_chunk"
636640
compress_chunk
637641
----------------------------------------
638642
_timescaledb_internal._hyper_1_1_chunk
@@ -665,7 +669,6 @@ SELECT * from _timescaledb_catalog.chunk_column_stats;
665669

666670
-- Check that truncate resets the entry in the catalog
667671
SELECT compress_chunk(:'CH_NAME');
668-
WARNING: no index on "sensor_id" found for column range on chunk "_hyper_1_1_chunk"
669672
compress_chunk
670673
----------------------------------------
671674
_timescaledb_internal._hyper_1_1_chunk
@@ -820,3 +823,39 @@ SELECT * FROM _timescaledb_catalog.chunk_column_stats;
820823
12 | 4 | 8 | temperature | 366 | 502 | t
821824
(2 rows)
822825

826+
-- Check min/max ranges for partial chunks with segmentby columns get recalculated correctly by seementwise recompression
827+
CREATE TABLE chunk_skipping(time timestamptz,device text, updated_at timestamptz)
828+
WITH (tsdb.hypertable, tsdb.partition_column='time',tsdb.segmentby='device');
829+
NOTICE: adding not-null constraint to column "time"
830+
SELECT enable_chunk_skipping('chunk_skipping', 'updated_at');
831+
enable_chunk_skipping
832+
-----------------------
833+
(13,t)
834+
(1 row)
835+
836+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd1', '2025-01-01';
837+
SELECT compress_chunk(show_chunks('chunk_skipping'));
838+
compress_chunk
839+
-----------------------------------------
840+
_timescaledb_internal._hyper_6_10_chunk
841+
(1 row)
842+
843+
SELECT * from chunk_skipping where updated_at < '2026-01-01';
844+
time | device | updated_at
845+
------------------------------+--------+------------------------------
846+
Wed Jan 01 00:00:00 2025 PST | d1 | Wed Jan 01 00:00:00 2025 PST
847+
(1 row)
848+
849+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd2', '2026-01-01';
850+
SELECT compress_chunk(show_chunks('chunk_skipping'));
851+
compress_chunk
852+
-----------------------------------------
853+
_timescaledb_internal._hyper_6_10_chunk
854+
(1 row)
855+
856+
SELECT * from chunk_skipping where updated_at < '2026-01-01';
857+
time | device | updated_at
858+
------------------------------+--------+------------------------------
859+
Wed Jan 01 00:00:00 2025 PST | d1 | Wed Jan 01 00:00:00 2025 PST
860+
(1 row)
861+

tsl/test/sql/chunk_column_stats.sql

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ FROM compressed_chunk_info_view
135135
WHERE hypertable_name = 'sample_table' AND chunk_name = :'CH_NAME';
136136

137137
-- The chunk entry should become "valid" again
138+
SELECT min(sensor_id), max(sensor_id) FROM :CH_NAME;
138139
SELECT * from _timescaledb_catalog.chunk_column_stats WHERE chunk_id = :'CHUNK_ID';
139140

140141
-- A query using a WHERE clause on "sensor_id" column will scan the proper chunk
@@ -310,3 +311,19 @@ SELECT enable_chunk_skipping('sample_table', 'temperature');
310311
SELECT show_chunks('sample_table') AS "CH_NAME" order by 1 limit 1 \gset
311312
SELECT compress_chunk(:'CH_NAME');
312313
SELECT * FROM _timescaledb_catalog.chunk_column_stats;
314+
315+
-- Check min/max ranges for partial chunks with segmentby columns get recalculated correctly by seementwise recompression
316+
CREATE TABLE chunk_skipping(time timestamptz,device text, updated_at timestamptz)
317+
WITH (tsdb.hypertable, tsdb.partition_column='time',tsdb.segmentby='device');
318+
319+
SELECT enable_chunk_skipping('chunk_skipping', 'updated_at');
320+
321+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd1', '2025-01-01';
322+
SELECT compress_chunk(show_chunks('chunk_skipping'));
323+
324+
SELECT * from chunk_skipping where updated_at < '2026-01-01';
325+
326+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd2', '2026-01-01';
327+
SELECT compress_chunk(show_chunks('chunk_skipping'));
328+
329+
SELECT * from chunk_skipping where updated_at < '2026-01-01';

0 commit comments

Comments
 (0)