Skip to content

Commit 7247bce

Browse files
committed
Optimize direct compress status handling
Only mark the status as partial if we insert into an existing uncompressed chunk but skip marking as partial when it is a newly created chunk so the resulting status is fully compressed instead.
1 parent 106b2f7 commit 7247bce

File tree

7 files changed

+233
-45
lines changed

7 files changed

+233
-45
lines changed

.unreleased/pr_8529

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implements: #8529 Optimize direct compress status handling

src/chunk_tuple_routing.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ ts_chunk_tuple_routing_find_chunk(ChunkTupleRouting *ctr, Point *point)
6666
{
6767
Chunk *chunk = NULL;
6868
ChunkInsertState *cis = NULL;
69-
7069
cis = ts_subspace_store_get(ctr->subspace, point);
7170

7271
/*
@@ -77,6 +76,9 @@ ts_chunk_tuple_routing_find_chunk(ChunkTupleRouting *ctr, Point *point)
7776

7877
if (!cis)
7978
{
79+
bool chunk_created = false;
80+
bool needs_partial = false;
81+
8082
/*
8183
* Normally, for every row of the chunk except the first one, we expect
8284
* the chunk to exist already. The "create" function would take a lock
@@ -116,7 +118,10 @@ ts_chunk_tuple_routing_find_chunk(ChunkTupleRouting *ctr, Point *point)
116118
}
117119

118120
if (!chunk)
121+
{
119122
chunk = ts_hypertable_create_chunk_for_point(ctr->hypertable, point);
123+
chunk_created = true;
124+
}
120125

121126
Ensure(chunk, "no chunk found or created");
122127

@@ -137,10 +142,15 @@ ts_chunk_tuple_routing_find_chunk(ChunkTupleRouting *ctr, Point *point)
137142
Chunk *compressed_chunk =
138143
ts_cm_functions->compression_chunk_create(compressed_ht, chunk);
139144
ts_chunk_set_compressed_chunk(chunk, compressed_chunk->fd.id);
145+
146+
/* mark chunk as partial unless completely new chunk */
147+
if (!chunk_created)
148+
needs_partial = true;
140149
}
141150
}
142151

143152
cis = chunk_insert_state_create(chunk->table_id, ctr);
153+
cis->needs_partial = needs_partial;
144154
ts_subspace_store_add(ctr->subspace, chunk->cube, cis, destroy_chunk_insert_state);
145155
}
146156

src/copy.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ TSCopyMultiInsertBufferInit(TSCopyMultiInsertInfo *miinfo, ChunkInsertState *cis
289289
if (!ts_chunk_is_unordered(chunk))
290290
ts_chunk_set_unordered(chunk);
291291
}
292+
cis->columnstore_insert = true;
292293
break;
293294
}
294295
}

src/nodes/chunk_dispatch/chunk_insert_state.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,15 @@ ts_chunk_insert_state_destroy(ChunkInsertState *state)
582582
{
583583
ResultRelInfo *rri = state->result_relation_info;
584584

585-
if (state->chunk_compressed && !state->chunk_partial)
585+
/*
586+
* Check if we need to mark the chunk as partial.
587+
* We need to change chunk status to partial in the following cases:
588+
* - rowstore insert into compressed chunk
589+
* - columnstore insert into uncompressed chunk that is not a new chunk (flagged as
590+
* needs_partial in chunk_tuple_routing.c)
591+
*/
592+
if (state->chunk_compressed && !state->chunk_partial &&
593+
(!state->columnstore_insert || state->needs_partial))
586594
{
587595
Oid chunk_relid = RelationGetRelid(state->result_relation_info->ri_RelationDesc);
588596
Chunk *chunk = ts_chunk_get_by_relid(chunk_relid, true);

src/nodes/chunk_dispatch/chunk_insert_state.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ typedef struct ChunkInsertState
9898
/* for tracking compressed chunks */
9999
bool chunk_compressed;
100100
bool chunk_partial;
101+
bool columnstore_insert;
102+
bool needs_partial;
101103

102104
/* To speedup repeated calls of `decompress_batches_for_insert` */
103105
CachedDecompressionState *cached_decompression_state;

tsl/test/expected/compressed_copy.out

Lines changed: 133 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,9 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM metrics;
2828
Append (actual rows=3000 loops=1)
2929
-> Custom Scan (ColumnarScan) on _hyper_1_3_chunk (actual rows=959 loops=1)
3030
-> Seq Scan on compress_hyper_2_4_chunk (actual rows=1 loops=1)
31-
-> Seq Scan on _hyper_1_3_chunk (actual rows=0 loops=1)
3231
-> Custom Scan (ColumnarScan) on _hyper_1_5_chunk (actual rows=2041 loops=1)
3332
-> Seq Scan on compress_hyper_2_6_chunk (actual rows=3 loops=1)
34-
-> Seq Scan on _hyper_1_5_chunk (actual rows=0 loops=1)
35-
(7 rows)
33+
(5 rows)
3634

3735
SELECT first(time,rn), last(time,rn) FROM (SELECT ROW_NUMBER() OVER () as rn, time FROM metrics) sub;
3836
first | last
@@ -45,13 +43,11 @@ ROLLBACK;
4543
BEGIN;
4644
COPY metrics FROM PROGRAM 'seq 3000 | xargs -II date -d "2025-01-01 - I minute" +"%Y-%m-%d %H:%M:%S,d1,0.I"' WITH (FORMAT CSV);
4745
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM metrics;
48-
QUERY PLAN
49-
---------------------------------------------------------------------------------
50-
Append (actual rows=3000 loops=1)
51-
-> Custom Scan (ColumnarScan) on _hyper_1_7_chunk (actual rows=3000 loops=1)
52-
-> Seq Scan on compress_hyper_2_8_chunk (actual rows=3 loops=1)
53-
-> Seq Scan on _hyper_1_7_chunk (actual rows=0 loops=1)
54-
(4 rows)
46+
QUERY PLAN
47+
---------------------------------------------------------------------------
48+
Custom Scan (ColumnarScan) on _hyper_1_7_chunk (actual rows=3000 loops=1)
49+
-> Seq Scan on compress_hyper_2_8_chunk (actual rows=3 loops=1)
50+
(2 rows)
5551

5652
SELECT first(time,rn), last(time,rn) FROM (SELECT ROW_NUMBER() OVER () as rn, time FROM metrics) sub;
5753
first | last
@@ -70,11 +66,9 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM metrics;
7066
Append (actual rows=3000 loops=1)
7167
-> Custom Scan (ColumnarScan) on _hyper_1_9_chunk (actual rows=959 loops=1)
7268
-> Seq Scan on compress_hyper_2_10_chunk (actual rows=1 loops=1)
73-
-> Seq Scan on _hyper_1_9_chunk (actual rows=0 loops=1)
7469
-> Custom Scan (ColumnarScan) on _hyper_1_11_chunk (actual rows=2041 loops=1)
7570
-> Seq Scan on compress_hyper_2_12_chunk (actual rows=3 loops=1)
76-
-> Seq Scan on _hyper_1_11_chunk (actual rows=0 loops=1)
77-
(7 rows)
71+
(5 rows)
7872

7973
SELECT first(time,rn), last(time,rn) FROM (SELECT ROW_NUMBER() OVER () as rn, time FROM metrics) sub;
8074
first | last
@@ -87,13 +81,11 @@ ROLLBACK;
8781
BEGIN;
8882
COPY metrics FROM PROGRAM 'seq 3000 | xargs -II date -d "2025-01-01 - I minute" +"%Y-%m-%d %H:%M:%S,d1,0.I"' WITH (FORMAT CSV);
8983
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM metrics;
90-
QUERY PLAN
91-
----------------------------------------------------------------------------------
92-
Append (actual rows=3000 loops=1)
93-
-> Custom Scan (ColumnarScan) on _hyper_1_13_chunk (actual rows=3000 loops=1)
94-
-> Seq Scan on compress_hyper_2_14_chunk (actual rows=3 loops=1)
95-
-> Seq Scan on _hyper_1_13_chunk (actual rows=0 loops=1)
96-
(4 rows)
84+
QUERY PLAN
85+
----------------------------------------------------------------------------
86+
Custom Scan (ColumnarScan) on _hyper_1_13_chunk (actual rows=3000 loops=1)
87+
-> Seq Scan on compress_hyper_2_14_chunk (actual rows=3 loops=1)
88+
(2 rows)
9789

9890
SELECT first(time,rn), last(time,rn) FROM (SELECT ROW_NUMBER() OVER () as rn, time FROM metrics) sub;
9991
first | last
@@ -136,19 +128,17 @@ SET timescaledb.enable_direct_compress_copy = true;
136128
SET timescaledb.enable_direct_compress_copy_client_sorted = true;
137129
COPY metrics FROM PROGRAM 'seq 3000 | xargs -II date -d "2025-01-01 - I minute" +"%Y-%m-%d %H:%M:%S,d1,0.I"' WITH (FORMAT CSV);
138130
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM metrics;
139-
QUERY PLAN
140-
----------------------------------------------------------------------------------
141-
Append (actual rows=3000 loops=1)
142-
-> Custom Scan (ColumnarScan) on _hyper_1_19_chunk (actual rows=3000 loops=1)
143-
-> Seq Scan on compress_hyper_2_20_chunk (actual rows=3 loops=1)
144-
-> Seq Scan on _hyper_1_19_chunk (actual rows=0 loops=1)
145-
(4 rows)
131+
QUERY PLAN
132+
----------------------------------------------------------------------------
133+
Custom Scan (ColumnarScan) on _hyper_1_19_chunk (actual rows=3000 loops=1)
134+
-> Seq Scan on compress_hyper_2_20_chunk (actual rows=3 loops=1)
135+
(2 rows)
146136

147137
-- status should be 9
148138
SELECT DISTINCT status FROM _timescaledb_catalog.chunk WHERE compressed_chunk_id IS NOT NULL;
149139
status
150140
--------
151-
9
141+
1
152142
(1 row)
153143

154144
ROLLBACK;
@@ -158,19 +148,17 @@ SET timescaledb.enable_direct_compress_copy = true;
158148
SET timescaledb.enable_direct_compress_copy_client_sorted = false;
159149
COPY metrics FROM PROGRAM 'seq 3000 | xargs -II date -d "2025-01-01 - I minute" +"%Y-%m-%d %H:%M:%S,d1,0.I"' WITH (FORMAT CSV);
160150
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM metrics;
161-
QUERY PLAN
162-
----------------------------------------------------------------------------------
163-
Append (actual rows=3000 loops=1)
164-
-> Custom Scan (ColumnarScan) on _hyper_1_21_chunk (actual rows=3000 loops=1)
165-
-> Seq Scan on compress_hyper_2_22_chunk (actual rows=3 loops=1)
166-
-> Seq Scan on _hyper_1_21_chunk (actual rows=0 loops=1)
167-
(4 rows)
151+
QUERY PLAN
152+
----------------------------------------------------------------------------
153+
Custom Scan (ColumnarScan) on _hyper_1_21_chunk (actual rows=3000 loops=1)
154+
-> Seq Scan on compress_hyper_2_22_chunk (actual rows=3 loops=1)
155+
(2 rows)
168156

169157
-- status should be 11
170158
SELECT DISTINCT status FROM _timescaledb_catalog.chunk WHERE compressed_chunk_id IS NOT NULL;
171159
status
172160
--------
173-
11
161+
3
174162
(1 row)
175163

176164
ROLLBACK;
@@ -181,13 +169,11 @@ SET timescaledb.enable_direct_compress_copy = true;
181169
SET timescaledb.enable_direct_compress_copy_client_sorted = true;
182170
COPY metrics FROM PROGRAM 'seq 0 0.2 9.8 | sed -e ''s!.[0-9]$!!'' | xargs -II date -d "2025-01-01 - I minute" +"%Y-%m-%d %H:%M:%S,dI,0.I"' WITH (FORMAT CSV);
183171
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM metrics;
184-
QUERY PLAN
185-
--------------------------------------------------------------------------------
186-
Append (actual rows=50 loops=1)
187-
-> Custom Scan (ColumnarScan) on _hyper_1_23_chunk (actual rows=50 loops=1)
188-
-> Seq Scan on compress_hyper_2_24_chunk (actual rows=10 loops=1)
189-
-> Seq Scan on _hyper_1_23_chunk (actual rows=0 loops=1)
190-
(4 rows)
172+
QUERY PLAN
173+
--------------------------------------------------------------------------
174+
Custom Scan (ColumnarScan) on _hyper_1_23_chunk (actual rows=50 loops=1)
175+
-> Seq Scan on compress_hyper_2_24_chunk (actual rows=10 loops=1)
176+
(2 rows)
191177

192178
SELECT format('%I.%I',schema_name,table_name) AS "COMPRESSED_CHUNK" FROM _timescaledb_catalog.chunk where compressed_chunk_id IS NULL \gset
193179
-- should have 10 batches
@@ -215,3 +201,107 @@ SELECT DISTINCT status FROM _timescaledb_catalog.chunk WHERE compressed_chunk_id
215201
(0 rows)
216202

217203
ROLLBACK;
204+
-- test chunk status handling
205+
CREATE TABLE metrics_status(time timestamptz) WITH (tsdb.hypertable,tsdb.partition_column='time');
206+
-- normal insert should result in chunk status 0
207+
INSERT INTO metrics_status SELECT '2025-01-01';
208+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
209+
chunk_status_text
210+
-------------------
211+
{}
212+
(1 row)
213+
214+
BEGIN;
215+
-- compressed copy into uncompressed chunk should result in chunk status 11 (compressed,partial,unordered)
216+
SET timescaledb.enable_direct_compress_copy = true;
217+
SET timescaledb.enable_direct_compress_copy_client_sorted = false;
218+
COPY metrics_status FROM STDIN;
219+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
220+
chunk_status_text
221+
--------------------------------
222+
{COMPRESSED,UNORDERED,PARTIAL}
223+
(1 row)
224+
225+
ROLLBACK;
226+
BEGIN;
227+
-- compressed sorted copy into uncompressed chunk should result in chunk status 9 (compressed,partial)
228+
SET timescaledb.enable_direct_compress_copy = true;
229+
SET timescaledb.enable_direct_compress_copy_client_sorted = true;
230+
COPY metrics_status FROM STDIN;
231+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
232+
chunk_status_text
233+
----------------------
234+
{COMPRESSED,PARTIAL}
235+
(1 row)
236+
237+
ROLLBACK;
238+
TRUNCATE metrics_status;
239+
BEGIN;
240+
-- compressed copy into new chunk should result in chunk status 3 (compressed,unordered)
241+
SET timescaledb.enable_direct_compress_copy = true;
242+
SET timescaledb.enable_direct_compress_copy_client_sorted = false;
243+
COPY metrics_status FROM STDIN;
244+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
245+
chunk_status_text
246+
------------------------
247+
{COMPRESSED,UNORDERED}
248+
(1 row)
249+
250+
ROLLBACK;
251+
BEGIN;
252+
-- compressed sorted copy into new chunk should result in chunk status 1 (compressed)
253+
SET timescaledb.enable_direct_compress_copy = true;
254+
SET timescaledb.enable_direct_compress_copy_client_sorted = true;
255+
COPY metrics_status FROM STDIN;
256+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
257+
chunk_status_text
258+
-------------------
259+
{COMPRESSED}
260+
(1 row)
261+
262+
ROLLBACK;
263+
SET timescaledb.enable_direct_compress_copy = false;
264+
SET timescaledb.enable_direct_compress_copy_client_sorted = false;
265+
INSERT INTO metrics_status SELECT '2025-01-01';
266+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
267+
chunk_status_text
268+
-------------------
269+
{}
270+
(1 row)
271+
272+
SELECT compress_chunk(show_chunks('metrics_status'));
273+
compress_chunk
274+
-----------------------------------------
275+
_timescaledb_internal._hyper_3_33_chunk
276+
(1 row)
277+
278+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
279+
chunk_status_text
280+
-------------------
281+
{COMPRESSED}
282+
(1 row)
283+
284+
BEGIN;
285+
-- compressed copy into fully compressed chunk should result in chunk status 3 (compressed,unordered)
286+
SET timescaledb.enable_direct_compress_copy = true;
287+
SET timescaledb.enable_direct_compress_copy_client_sorted = false;
288+
COPY metrics_status FROM STDIN;
289+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
290+
chunk_status_text
291+
------------------------
292+
{COMPRESSED,UNORDERED}
293+
(1 row)
294+
295+
ROLLBACK;
296+
BEGIN;
297+
-- compressed copy new chunk should result in chunk status 1 (compressed)
298+
SET timescaledb.enable_direct_compress_copy = true;
299+
SET timescaledb.enable_direct_compress_copy_client_sorted = true;
300+
COPY metrics_status FROM STDIN;
301+
SELECT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_status') chunk;
302+
chunk_status_text
303+
-------------------
304+
{COMPRESSED}
305+
(1 row)
306+
307+
ROLLBACK;

0 commit comments

Comments
 (0)