Skip to content

Commit 6e72782

Browse files
committed
Fix chunk skipping min/max calculation
When a partially compressed chunk with segmentby column was recompressed the new min/max range would not be calculated correctly and only take the new values into account but not already compressed values.
1 parent a10116b commit 6e72782

File tree

8 files changed

+144
-49
lines changed

8 files changed

+144
-49
lines changed

.unreleased/pr_8426

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes: #8426 Fix chunk skipping min/max calculation

src/chunk_adaptive.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,9 @@ table_has_minmax_index(Oid relid, Oid atttype, Name attname, AttrNumber attnum)
276276
*
277277
* Returns true iff min and max is found, otherwise false.
278278
*/
279-
bool
280-
ts_chunk_get_minmax(Oid relid, Oid atttype, AttrNumber attnum, const char *call_context,
281-
Datum minmax[2])
279+
static bool
280+
chunk_get_minmax(Oid relid, Oid atttype, AttrNumber attnum, const char *call_context,
281+
Datum minmax[2])
282282
{
283283
Relation rel = table_open(relid, AccessShareLock);
284284
NameData attname;
@@ -484,11 +484,11 @@ ts_calculate_chunk_interval(PG_FUNCTION_ARGS)
484484

485485
slice_interval = slice->fd.range_end - slice->fd.range_start;
486486

487-
if (ts_chunk_get_minmax(chunk->table_id,
488-
dim->fd.column_type,
489-
attno,
490-
"adaptive chunking",
491-
minmax))
487+
if (chunk_get_minmax(chunk->table_id,
488+
dim->fd.column_type,
489+
attno,
490+
"adaptive chunking",
491+
minmax))
492492
{
493493
int64 min = ts_time_value_to_internal(minmax[0], dim->fd.column_type);
494494
int64 max = ts_time_value_to_internal(minmax[1], dim->fd.column_type);

src/chunk_adaptive.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ typedef struct ChunkSizingInfo
2626

2727
extern void ts_chunk_adaptive_sizing_info_validate(ChunkSizingInfo *info);
2828
extern void ts_chunk_sizing_func_validate(regproc func, ChunkSizingInfo *info);
29-
extern bool ts_chunk_get_minmax(Oid relid, Oid atttype, AttrNumber attnum, const char *call_context,
30-
Datum minmax[2]);
3129
extern TSDLLEXPORT ChunkSizingInfo *ts_chunk_sizing_info_get_default_disabled(Oid table_relid);
3230

3331
extern TSDLLEXPORT int64 ts_chunk_calculate_initial_chunk_target_size(void);

src/ts_catalog/chunk_column_stats.c

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <access/stratnum.h>
1212
#include <access/tupdesc.h>
1313
#include <catalog/pg_collation.h>
14+
#include <executor/spi.h>
1415
#include <executor/tuptable.h>
1516
#include <funcapi.h>
1617
#include <nodes/makefuncs.h>
@@ -22,6 +23,7 @@
2223
#include <rewrite/rewriteManip.h>
2324
#include <storage/lmgr.h>
2425
#include <storage/lockdefs.h>
26+
#include <utils/datum.h>
2527
#include <utils/syscache.h>
2628

2729
#include "chunk.h"
@@ -781,6 +783,76 @@ ts_chunk_column_stats_lookup(int32 hypertable_id, int32 chunk_id, const char *co
781783
return form_range;
782784
}
783785

786+
static bool
787+
chunk_get_minmax(const Chunk *chunk, Oid col_type, const char *col_name, Datum *minmax)
788+
{
789+
StringInfoData command;
790+
int res;
791+
792+
/* Lock down search_path */
793+
int save_nestlevel = NewGUCNestLevel();
794+
RestrictSearchPath();
795+
796+
initStringInfo(&command);
797+
appendStringInfo(&command,
798+
"SELECT pg_catalog.min(%s), pg_catalog.max(%s) FROM %s.%s",
799+
quote_identifier(col_name),
800+
quote_identifier(col_name),
801+
quote_identifier(NameStr(chunk->fd.schema_name)),
802+
quote_identifier(NameStr(chunk->fd.table_name)));
803+
804+
/*
805+
* SPI_connect will switch MemoryContext so we need to keep track
806+
* of caller context as we need to copy the values into caller
807+
* context.
808+
*/
809+
MemoryContext caller = CurrentMemoryContext;
810+
811+
if (SPI_connect() != SPI_OK_CONNECT)
812+
elog(ERROR, "could not connect to SPI");
813+
814+
res = SPI_execute(command.data, true /* read_only */, 0 /*count*/);
815+
if (res < 0)
816+
ereport(ERROR,
817+
(errcode(ERRCODE_INTERNAL_ERROR),
818+
(errmsg("could not get the min/max values for column \"%s\" of chunk \"%s.%s\"",
819+
col_name,
820+
chunk->fd.schema_name.data,
821+
chunk->fd.table_name.data))));
822+
823+
pfree(command.data);
824+
825+
Datum min, max;
826+
bool isnull_min = false, isnull_max = false;
827+
min = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull_min);
828+
max = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull_max);
829+
Assert(SPI_gettypeid(SPI_tuptable->tupdesc, 1) == col_type);
830+
Assert(SPI_gettypeid(SPI_tuptable->tupdesc, 2) == col_type);
831+
832+
bool found = !isnull_min && !isnull_max;
833+
if (found)
834+
{
835+
bool typbyval;
836+
int16 typlen;
837+
get_typlenbyval(col_type, &typlen, &typbyval);
838+
839+
/* Copy the values into caller context */
840+
MemoryContext spi = MemoryContextSwitchTo(caller);
841+
minmax[0] = datumCopy(min, typbyval, typlen);
842+
minmax[1] = datumCopy(max, typbyval, typlen);
843+
MemoryContextSwitchTo(spi);
844+
}
845+
846+
/* Restore search_path */
847+
AtEOXact_GUC(false, save_nestlevel);
848+
849+
res = SPI_finish();
850+
if (res != SPI_OK_FINISH)
851+
elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res));
852+
853+
return found;
854+
}
855+
784856
/*
785857
* Update column dimension ranges in the catalog for the
786858
* provided chunk (it's assumed that the chunk is locked
@@ -821,7 +893,7 @@ ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk)
821893
col_type = get_atttype(chunk->table_id, attno);
822894

823895
/* calculate the min/max range for this column on this chunk */
824-
if (ts_chunk_get_minmax(chunk->table_id, col_type, attno, "column range", minmax))
896+
if (chunk_get_minmax(chunk, col_type, col_name, minmax))
825897
{
826898
Form_chunk_column_stats range;
827899
int64 min = ts_time_value_to_internal(minmax[0], col_type);

tsl/src/compression/api.c

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -486,28 +486,12 @@ compress_chunk_impl(Oid hypertable_relid, Oid chunk_relid)
486486

487487
before_size = ts_relation_size_impl(cxt.srcht_chunk->table_id);
488488

489-
/*
490-
* Calculate and add the column dimension ranges for the src chunk. This has to
491-
* be done before the compression. In case of recompression, the logic will get the
492-
* min/max entries for the uncompressed portion and reconcile and update the existing
493-
* entry for ht/chunk/column combination. This case handles:
494-
*
495-
* * INSERTs into uncompressed chunk
496-
* * UPDATEs into uncompressed chunk
497-
*
498-
* In case of DELETEs, the entries won't exist in the uncompressed chunk, but since
499-
* we are deleting, we will stay within the earlier computed max/min range. This
500-
* means that the chunk will not get pruned for a larger range of values. This will
501-
* work ok enough if only a few of the compressed chunks get DELETEs down the line.
502-
* In the future, we can look at computing min/max entries in the compressed chunk
503-
* using the batch metadata and then recompute the range to handle DELETE cases.
504-
*/
505-
if (cxt.srcht->range_space)
506-
ts_chunk_column_stats_calculate(cxt.srcht, cxt.srcht_chunk);
507-
508489
cstat = compress_chunk(cxt.srcht_chunk->table_id, compress_ht_chunk->table_id, insert_options);
509490
after_size = ts_relation_size_impl(compress_ht_chunk->table_id);
510491

492+
if (cxt.srcht->range_space)
493+
ts_chunk_column_stats_calculate(cxt.srcht, cxt.srcht_chunk);
494+
511495
if (new_compressed_chunk)
512496
{
513497
compression_chunk_size_catalog_insert(cxt.srcht_chunk->fd.id,

tsl/src/compression/recompress.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -184,22 +184,6 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)
184184
}
185185
}
186186

187-
/*
188-
* Calculate and add the column dimension ranges for the src chunk used by chunk skipping
189-
* feature. This has to be done before the compression. In case of recompression, the logic will
190-
* get the min/max entries for the uncompressed portion and reconcile and update the existing
191-
* entry for ht/chunk/column combination. This case handles:
192-
*
193-
* * INSERTs into uncompressed chunk
194-
* * UPDATEs into uncompressed chunk
195-
*
196-
* In case of DELETEs, the entries won't exist in the uncompressed chunk, but since
197-
* we are deleting, we will stay within the earlier computed max/min range. This
198-
* means that the chunk will not get pruned for a larger range of values. This will
199-
* work ok enough if only a few of the compressed chunks get DELETEs down the line.
200-
* In the future, we can look at computing min/max entries in the compressed chunk
201-
* using the batch metadata and then recompute the range to handle DELETE cases.
202-
*/
203187
Hypertable *ht = ts_hypertable_get_by_id(uncompressed_chunk->fd.hypertable_id);
204188
if (ht->range_space)
205189
ts_chunk_column_stats_calculate(ht, uncompressed_chunk);

tsl/test/expected/chunk_column_stats.out

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ SELECT * from _timescaledb_catalog.chunk_column_stats WHERE chunk_id = :'CHUNK_I
185185
DROP INDEX sense_idx;
186186
-- recompress the partial chunk
187187
SELECT compress_chunk(:'CH_NAME');
188-
WARNING: no index on "sensor_id" found for column range on chunk "_hyper_1_1_chunk"
189188
compress_chunk
190189
----------------------------------------
191190
_timescaledb_internal._hyper_1_1_chunk
@@ -201,6 +200,12 @@ WHERE hypertable_name = 'sample_table' AND chunk_name = :'CH_NAME';
201200
(1 row)
202201

203202
-- The chunk entry should become "valid" again
203+
SELECT min(sensor_id), max(sensor_id) FROM :CH_NAME;
204+
min | max
205+
-----+-----
206+
1 | 8
207+
(1 row)
208+
204209
SELECT * from _timescaledb_catalog.chunk_column_stats WHERE chunk_id = :'CHUNK_ID';
205210
id | hypertable_id | chunk_id | column_name | range_start | range_end | valid
206211
----+---------------+----------+-------------+-------------+-----------+-------
@@ -632,7 +637,6 @@ SELECT * from _timescaledb_catalog.chunk_column_stats;
632637

633638
-- Compressing a chunk again should calculate proper ranges
634639
SELECT compress_chunk(:'CH_NAME');
635-
WARNING: no index on "sensor_id" found for column range on chunk "_hyper_1_1_chunk"
636640
compress_chunk
637641
----------------------------------------
638642
_timescaledb_internal._hyper_1_1_chunk
@@ -665,7 +669,6 @@ SELECT * from _timescaledb_catalog.chunk_column_stats;
665669

666670
-- Check that truncate resets the entry in the catalog
667671
SELECT compress_chunk(:'CH_NAME');
668-
WARNING: no index on "sensor_id" found for column range on chunk "_hyper_1_1_chunk"
669672
compress_chunk
670673
----------------------------------------
671674
_timescaledb_internal._hyper_1_1_chunk
@@ -820,3 +823,39 @@ SELECT * FROM _timescaledb_catalog.chunk_column_stats;
820823
12 | 4 | 8 | temperature | 366 | 502 | t
821824
(2 rows)
822825

826+
-- Check min/max ranges for partial chunks with segmentby columns get recalculated correctly by seementwise recompression
827+
CREATE TABLE chunk_skipping(time timestamptz,device text, updated_at timestamptz)
828+
WITH (tsdb.hypertable, tsdb.partition_column='time',tsdb.segmentby='device');
829+
NOTICE: adding not-null constraint to column "time"
830+
SELECT enable_chunk_skipping('chunk_skipping', 'updated_at');
831+
enable_chunk_skipping
832+
-----------------------
833+
(13,t)
834+
(1 row)
835+
836+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd1', '2025-01-01';
837+
SELECT compress_chunk(show_chunks('chunk_skipping'));
838+
compress_chunk
839+
-----------------------------------------
840+
_timescaledb_internal._hyper_6_10_chunk
841+
(1 row)
842+
843+
SELECT * from chunk_skipping where updated_at < '2026-01-01';
844+
time | device | updated_at
845+
------------------------------+--------+------------------------------
846+
Wed Jan 01 00:00:00 2025 PST | d1 | Wed Jan 01 00:00:00 2025 PST
847+
(1 row)
848+
849+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd2', '2026-01-01';
850+
SELECT compress_chunk(show_chunks('chunk_skipping'));
851+
compress_chunk
852+
-----------------------------------------
853+
_timescaledb_internal._hyper_6_10_chunk
854+
(1 row)
855+
856+
SELECT * from chunk_skipping where updated_at < '2026-01-01';
857+
time | device | updated_at
858+
------------------------------+--------+------------------------------
859+
Wed Jan 01 00:00:00 2025 PST | d1 | Wed Jan 01 00:00:00 2025 PST
860+
(1 row)
861+

tsl/test/sql/chunk_column_stats.sql

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ FROM compressed_chunk_info_view
135135
WHERE hypertable_name = 'sample_table' AND chunk_name = :'CH_NAME';
136136

137137
-- The chunk entry should become "valid" again
138+
SELECT min(sensor_id), max(sensor_id) FROM :CH_NAME;
138139
SELECT * from _timescaledb_catalog.chunk_column_stats WHERE chunk_id = :'CHUNK_ID';
139140

140141
-- A query using a WHERE clause on "sensor_id" column will scan the proper chunk
@@ -310,3 +311,19 @@ SELECT enable_chunk_skipping('sample_table', 'temperature');
310311
SELECT show_chunks('sample_table') AS "CH_NAME" order by 1 limit 1 \gset
311312
SELECT compress_chunk(:'CH_NAME');
312313
SELECT * FROM _timescaledb_catalog.chunk_column_stats;
314+
315+
-- Check min/max ranges for partial chunks with segmentby columns get recalculated correctly by seementwise recompression
316+
CREATE TABLE chunk_skipping(time timestamptz,device text, updated_at timestamptz)
317+
WITH (tsdb.hypertable, tsdb.partition_column='time',tsdb.segmentby='device');
318+
319+
SELECT enable_chunk_skipping('chunk_skipping', 'updated_at');
320+
321+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd1', '2025-01-01';
322+
SELECT compress_chunk(show_chunks('chunk_skipping'));
323+
324+
SELECT * from chunk_skipping where updated_at < '2026-01-01';
325+
326+
INSERT INTO chunk_skipping SELECT '2025-01-01', 'd2', '2026-01-01';
327+
SELECT compress_chunk(show_chunks('chunk_skipping'));
328+
329+
SELECT * from chunk_skipping where updated_at < '2026-01-01';

0 commit comments

Comments
 (0)