Skip to content

Commit 345a0f0

Browse files
authored
Improve uuid support in the vectorized pipeline (timescale#8585)
Some places did not handle the uuids which led to internal program errors.
1 parent ede3c23 commit 345a0f0

File tree

10 files changed

+142
-52
lines changed

10 files changed

+142
-52
lines changed

.github/workflows/linux-32bit-build-and-test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ jobs:
6666
telemetry
6767
transparent_decompress_chunk-*
6868
transparent_decompression-*
69+
vector_agg_filter
6970
vector_agg_groupagg
7071
vector_agg_grouping
7172
vector_agg_text

.github/workflows/windows-build-and-test.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,15 @@ jobs:
8585
merge_append_partially_compressed
8686
metadata
8787
telemetry
88-
SKIPS: >-
89-
bgw_db_scheduler
90-
bgw_db_scheduler_fixed
88+
vector_agg_filter
9189
vector_agg_groupagg
9290
vector_agg_grouping
9391
vector_agg_text
9492
vector_agg_uuid
9593
vectorized_aggregation
94+
SKIPS: >-
95+
bgw_db_scheduler
96+
bgw_db_scheduler_fixed
9697
steps:
9798
- name: Setup WSL
9899
uses: Vampire/[email protected]

tsl/src/nodes/decompress_chunk/compressed_batch.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <utils/builtins.h>
1212
#include <utils/date.h>
1313
#include <utils/timestamp.h>
14+
#include <utils/uuid.h>
1415

1516
#include "compression/arrow_c_data_interface.h"
1617
#include "compression/compression.h"
@@ -73,6 +74,7 @@ make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull
7374
FOR_TYPE(TIMESTAMPTZOID, TimestampTz, DatumGetTimestampTz);
7475
FOR_TYPE(TIMESTAMPOID, Timestamp, DatumGetTimestamp);
7576
FOR_TYPE(DATEOID, DateADT, DatumGetDateADT);
77+
FOR_TYPE(UUIDOID, pg_uuid_t, *DatumGetUUIDP);
7678
default:
7779
elog(ERROR, "unexpected column type '%s'", format_type_be(arithmetic_type));
7880
pg_unreachable();

tsl/src/nodes/vector_agg/plan.c

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -366,11 +366,14 @@ get_vectorized_grouping_type(const VectorQualInfo *vqinfo, Agg *agg, List *resol
366366
}
367367
}
368368
#ifdef TS_USE_UMASH
369-
else
369+
/*
370+
* We also have the UUID type which is by-reference and has a
371+
* columnar in-memory representation, but no specialized single-column
372+
* vectorized grouping support. It can use the serialized grouping
373+
* strategy.
374+
*/
375+
else if (single_grouping_var->vartype == TEXTOID)
370376
{
371-
Ensure(single_grouping_var->vartype == TEXTOID,
372-
"invalid vector type %d for grouping",
373-
single_grouping_var->vartype);
374377
return VAGT_HashSingleText;
375378
}
376379
#endif
@@ -391,19 +394,20 @@ get_vectorized_grouping_type(const VectorQualInfo *vqinfo, Agg *agg, List *resol
391394
* aggregation node in the plan tree. This is used for testing.
392395
*/
393396
bool
394-
has_vector_agg_node(Plan *plan, bool *has_normal_agg)
397+
has_vector_agg_node(Plan *plan, bool *has_postgres_partial_agg)
395398
{
396-
if (IsA(plan, Agg))
399+
if (IsA(plan, Agg) && castNode(Agg, plan)->aggsplit == AGGSPLIT_INITIAL_SERIAL)
397400
{
398-
*has_normal_agg = true;
401+
*has_postgres_partial_agg = true;
402+
return false;
399403
}
400404

401-
if (plan->lefttree && has_vector_agg_node(plan->lefttree, has_normal_agg))
405+
if (plan->lefttree && has_vector_agg_node(plan->lefttree, has_postgres_partial_agg))
402406
{
403407
return true;
404408
}
405409

406-
if (plan->righttree && has_vector_agg_node(plan->righttree, has_normal_agg))
410+
if (plan->righttree && has_vector_agg_node(plan->righttree, has_postgres_partial_agg))
407411
{
408412
return true;
409413
}
@@ -437,7 +441,7 @@ has_vector_agg_node(Plan *plan, bool *has_normal_agg)
437441
ListCell *lc;
438442
foreach (lc, append_plans)
439443
{
440-
if (has_vector_agg_node(lfirst(lc), has_normal_agg))
444+
if (has_vector_agg_node(lfirst(lc), has_postgres_partial_agg))
441445
{
442446
return true;
443447
}

tsl/src/planner.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -219,19 +219,31 @@ tsl_postprocess_plan(PlannedStmt *stmt)
219219
#ifdef TS_DEBUG
220220
if (ts_guc_debug_require_vector_agg != DRO_Allow)
221221
{
222-
bool has_normal_agg = false;
223-
const bool has_vector_agg = has_vector_agg_node(stmt->planTree, &has_normal_agg);
224-
const bool should_have_vector_agg = (ts_guc_debug_require_vector_agg == DRO_Require);
222+
bool has_postgres_partial_agg = false;
223+
const bool has_vector_partial_agg =
224+
has_vector_agg_node(stmt->planTree, &has_postgres_partial_agg);
225225

226226
/*
227227
* For convenience, we don't complain about queries that don't have
228228
* aggregation at all.
229229
*/
230-
if ((has_normal_agg || has_vector_agg) && (has_vector_agg != should_have_vector_agg))
230+
if (has_postgres_partial_agg || has_vector_partial_agg)
231231
{
232-
ereport(ERROR,
233-
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
234-
errmsg("vector aggregation inconsistent with debug_require_vector_agg GUC")));
232+
if (has_postgres_partial_agg && ts_guc_debug_require_vector_agg == DRO_Require)
233+
{
234+
ereport(ERROR,
235+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
236+
errmsg("postgres partial aggregation nodes inconsistent with "
237+
"debug_require_vector_agg GUC")));
238+
}
239+
240+
if (has_vector_partial_agg && ts_guc_debug_require_vector_agg == DRO_Forbid)
241+
{
242+
ereport(ERROR,
243+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
244+
errmsg("vectorized partial aggregation nodes inconsistent with "
245+
"debug_require_vector_agg GUC")));
246+
}
235247
}
236248
}
237249
#endif

tsl/test/expected/vector_agg_filter.out

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3033,5 +3033,47 @@ order by 2, 3;
30333033
| 20019 | 10125
30343034
(10 rows)
30353035

3036+
reset timescaledb.debug_require_vector_agg;
3037+
-- Grouping with a scalar UUID column (segmentby or default).
3038+
create table uuid_default(ts int, value int4)
3039+
with (tsdb.hypertable, tsdb.partition_column = 'ts', tsdb.compress,
3040+
tsdb.chunk_interval = 1000);
3041+
insert into uuid_default select generate_series(0, 999), 1;
3042+
select count(compress_chunk(x)) from show_chunks('uuid_default') x;
3043+
count
3044+
-------
3045+
1
3046+
(1 row)
3047+
3048+
alter table uuid_default add column id uuid default '842ab294-923a-4f50-be7d-af6c51903a5f';
3049+
alter table uuid_default set (tsdb.compress_segmentby = 'id');
3050+
insert into uuid_default select generate_series(1000, 1999), 2, '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993';
3051+
select count(compress_chunk(x)) from show_chunks('uuid_default') x;
3052+
NOTICE: chunk "_hyper_3_5_chunk" is already converted to columnstore
3053+
count
3054+
-------
3055+
2
3056+
(1 row)
3057+
3058+
set timescaledb.debug_require_vector_agg = 'require';
3059+
select id, sum(value) from uuid_default group by id;
3060+
id | sum
3061+
--------------------------------------+------
3062+
5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993 | 2000
3063+
842ab294-923a-4f50-be7d-af6c51903a5f | 1000
3064+
(2 rows)
3065+
3066+
select sum(value) filter (where id = '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993') from uuid_default;
3067+
sum
3068+
------
3069+
2000
3070+
(1 row)
3071+
3072+
select sum(value) filter (where id = '842ab294-923a-4f50-be7d-af6c51903a5f') from uuid_default;
3073+
sum
3074+
------
3075+
1000
3076+
(1 row)
3077+
30363078
reset timescaledb.debug_require_vector_agg;
30373079
reset max_parallel_workers_per_gather;

tsl/test/expected/vector_agg_groupagg.out

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -101,56 +101,56 @@ select s, sum(value) from groupagg group by s order by s nulls first limit 10;
101101

102102
reset timescaledb.debug_require_vector_agg;
103103
-- More tests for dictionary encoding.
104-
create table text_table(ts int);
104+
create table text_table(ts int, a text);
105105
select create_hypertable('text_table', 'ts', chunk_time_interval => 3);
106106
create_hypertable
107107
-------------------------
108108
(3,public,text_table,t)
109109
(1 row)
110110

111-
alter table text_table set (timescaledb.compress);
112-
insert into text_table select 0 /*, default */ from generate_series(1, 1000) x;
111+
alter table text_table set (timescaledb.compress, timescaledb.compress_segmentby = 'a');
112+
insert into text_table select -1, 'scalar' from generate_series(1, 1000) x;
113113
select count(compress_chunk(x)) from show_chunks('text_table') x;
114114
count
115115
-------
116116
1
117117
(1 row)
118118

119-
alter table text_table add column a text collate "POSIX" default 'default';
120-
alter table text_table set (timescaledb.compress,
121-
timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a');
122119
insert into text_table select 1, '' from generate_series(1, 1000) x;
123120
insert into text_table select 2, 'same' from generate_series(1, 1000) x;
124121
insert into text_table select 3, 'different' || x from generate_series(1, 1000) x;
125122
insert into text_table select 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x;
126123
insert into text_table select 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x;
124+
alter table text_table set (timescaledb.compress,
125+
timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a');
127126
select count(compress_chunk(x)) from show_chunks('text_table') x;
127+
NOTICE: chunk "_hyper_3_7_chunk" is already converted to columnstore
128128
count
129129
-------
130-
2
130+
3
131131
(1 row)
132132

133133
vacuum analyze text_table;
134134
set timescaledb.debug_require_vector_agg to 'require';
135-
select a, count(*) from text_table group by a order by a limit 10;
135+
select a, count(*) from text_table group by a order by count(*) desc, a limit 10;
136136
a | count
137137
-------------------------+-------
138138
| 1000
139-
default | 1000
139+
same | 1000
140+
scalar | 1000
141+
$ | 1000
142+
same-with-nulls | 500
140143
different-with-nulls1 | 1
141144
different-with-nulls101 | 1
142145
different-with-nulls103 | 1
143146
different-with-nulls105 | 1
144147
different-with-nulls107 | 1
145-
different-with-nulls109 | 1
146-
different-with-nulls11 | 1
147-
different-with-nulls111 | 1
148148
(10 rows)
149149

150150
-- The hash grouping policies do not support the GroupAggregate mode in the
151-
-- reverse order.
151+
-- reverse order. We have to filter out the chunk where 'a' is segmentby.
152152
set timescaledb.debug_require_vector_agg to 'forbid';
153-
select a, count(*) from text_table group by a order by a desc limit 10;
153+
select a, count(*) from text_table where ts >= 0 group by a order by a desc limit 10;
154154
a | count
155155
-----------------+-------
156156
$ | 1000
@@ -166,19 +166,20 @@ select a, count(*) from text_table group by a order by a desc limit 10;
166166
(10 rows)
167167

168168
reset timescaledb.debug_require_vector_agg;
169+
reset enable_sort;
169170
-- with NULLS FIRST
170171
select count(decompress_chunk(x)) from show_chunks('text_table') x;
171172
count
172173
-------
173-
2
174+
3
174175
(1 row)
175176

176177
alter table text_table set (timescaledb.compress,
177178
timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a nulls first');
178179
select count(compress_chunk(x)) from show_chunks('text_table') x;
179180
count
180181
-------
181-
2
182+
3
182183
(1 row)
183184

184185
set timescaledb.debug_require_vector_agg to 'require';
@@ -187,14 +188,14 @@ select a, count(*) from text_table group by a order by a nulls first limit 10;
187188
-------------------------+-------
188189
$ | 1000
189190
| 1000
190-
default | 1000
191191
different-with-nulls1 | 1
192192
different-with-nulls101 | 1
193193
different-with-nulls103 | 1
194194
different-with-nulls105 | 1
195195
different-with-nulls107 | 1
196196
different-with-nulls109 | 1
197197
different-with-nulls11 | 1
198+
different-with-nulls111 | 1
198199
(10 rows)
199200

200201
reset timescaledb.debug_require_vector_agg;
@@ -203,7 +204,7 @@ set timescaledb.debug_require_vector_agg to 'forbid';
203204
select ts, a, count(*) from text_table group by ts, a order by ts, a limit 10;
204205
ts | a | count
205206
----+---------------+-------
206-
0 | default | 1000
207+
-1 | scalar | 1000
207208
1 | | 1000
208209
2 | same | 1000
209210
3 | different1 | 1
@@ -220,14 +221,14 @@ select a, ts, count(*) from text_table group by a, ts order by a desc, ts desc l
220221
-----------------+----+-------
221222
$ | 5 | 500
222223
$ | 4 | 500
224+
scalar | -1 | 1000
223225
same-with-nulls | 4 | 500
224226
same | 2 | 1000
225227
different999 | 3 | 1
226228
different998 | 3 | 1
227229
different997 | 3 | 1
228230
different996 | 3 | 1
229231
different995 | 3 | 1
230-
different994 | 3 | 1
231232
(10 rows)
232233

233234
reset timescaledb.debug_require_vector_agg;

tsl/test/expected/vector_agg_segmentby.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ set max_parallel_workers_per_gather = 0;
3131
set timescaledb.debug_require_vector_agg = 'require';
3232
set timescaledb.enable_vectorized_aggregation to off;
3333
select sum(t) from svagg;
34-
ERROR: vector aggregation inconsistent with debug_require_vector_agg GUC
34+
ERROR: postgres partial aggregation nodes inconsistent with debug_require_vector_agg GUC
3535
set timescaledb.debug_require_vector_agg = 'forbid';
3636
set timescaledb.enable_vectorized_aggregation to off;
3737
select sum(t) from svagg;
@@ -43,7 +43,7 @@ select sum(t) from svagg;
4343
set timescaledb.debug_require_vector_agg = 'forbid';
4444
set timescaledb.enable_vectorized_aggregation to on;
4545
select sum(t) from svagg;
46-
ERROR: vector aggregation inconsistent with debug_require_vector_agg GUC
46+
ERROR: vectorized partial aggregation nodes inconsistent with debug_require_vector_agg GUC
4747
set timescaledb.debug_require_vector_agg = 'require';
4848
set timescaledb.enable_vectorized_aggregation to on;
4949
select sum(t) from svagg;

tsl/test/sql/vector_agg_filter.sql

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,33 @@ group by ss
142142
order by 2, 3;
143143

144144
reset timescaledb.debug_require_vector_agg;
145+
146+
147+
-- Grouping with a scalar UUID column (segmentby or default).
148+
create table uuid_default(ts int, value int4)
149+
with (tsdb.hypertable, tsdb.partition_column = 'ts', tsdb.compress,
150+
tsdb.chunk_interval = 1000);
151+
152+
insert into uuid_default select generate_series(0, 999), 1;
153+
154+
select count(compress_chunk(x)) from show_chunks('uuid_default') x;
155+
156+
alter table uuid_default add column id uuid default '842ab294-923a-4f50-be7d-af6c51903a5f';
157+
158+
alter table uuid_default set (tsdb.compress_segmentby = 'id');
159+
160+
insert into uuid_default select generate_series(1000, 1999), 2, '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993';
161+
162+
select count(compress_chunk(x)) from show_chunks('uuid_default') x;
163+
164+
set timescaledb.debug_require_vector_agg = 'require';
165+
166+
select id, sum(value) from uuid_default group by id;
167+
168+
select sum(value) filter (where id = '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993') from uuid_default;
169+
170+
select sum(value) filter (where id = '842ab294-923a-4f50-be7d-af6c51903a5f') from uuid_default;
171+
172+
reset timescaledb.debug_require_vector_agg;
173+
145174
reset max_parallel_workers_per_gather;

0 commit comments

Comments
 (0)