Improve uuid support in the vectorized pipeline (timescale#8585)

akuzm · web-flow · commit 345a0f0fe4ea · 2025-09-05T12:40:52.000Z
Some places did not handle the uuids which led to internal program
errors.
diff --git a/.github/workflows/linux-32bit-build-and-test.yaml b/.github/workflows/linux-32bit-build-and-test.yaml
@@ -66,6 +66,7 @@ jobs:
           telemetry
           transparent_decompress_chunk-*
           transparent_decompression-*
+          vector_agg_filter
           vector_agg_groupagg
           vector_agg_grouping
           vector_agg_text
diff --git a/.github/workflows/windows-build-and-test.yaml b/.github/workflows/windows-build-and-test.yaml
@@ -85,14 +85,15 @@ jobs:
         merge_append_partially_compressed
         metadata
         telemetry
-      SKIPS: >-
-        bgw_db_scheduler
-        bgw_db_scheduler_fixed
+        vector_agg_filter
         vector_agg_groupagg
         vector_agg_grouping
         vector_agg_text
         vector_agg_uuid
         vectorized_aggregation
+      SKIPS: >-
+        bgw_db_scheduler
+        bgw_db_scheduler_fixed
     steps:
     - name: Setup WSL
       uses: Vampire/setup-wsl@v3.1.4
diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c
@@ -11,6 +11,7 @@
 #include <utils/builtins.h>
 #include <utils/date.h>
 #include <utils/timestamp.h>
+#include <utils/uuid.h>
 
 #include "compression/arrow_c_data_interface.h"
 #include "compression/compression.h"
@@ -73,6 +74,7 @@ make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull
 		FOR_TYPE(TIMESTAMPTZOID, TimestampTz, DatumGetTimestampTz);
 		FOR_TYPE(TIMESTAMPOID, Timestamp, DatumGetTimestamp);
 		FOR_TYPE(DATEOID, DateADT, DatumGetDateADT);
+		FOR_TYPE(UUIDOID, pg_uuid_t, *DatumGetUUIDP);
 		default:
 			elog(ERROR, "unexpected column type '%s'", format_type_be(arithmetic_type));
 			pg_unreachable();
diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c
@@ -366,11 +366,14 @@ get_vectorized_grouping_type(const VectorQualInfo *vqinfo, Agg *agg, List *resol
 			}
 		}
 #ifdef TS_USE_UMASH
-		else
+		/*
+		 * We also have the UUID type which is by-reference and has a
+		 * columnar in-memory representation, but no specialized single-column
+		 * vectorized grouping support. It can use the serialized grouping
+		 * strategy.
+		 */
+		else if (single_grouping_var->vartype == TEXTOID)
 		{
-			Ensure(single_grouping_var->vartype == TEXTOID,
-				   "invalid vector type %d for grouping",
-				   single_grouping_var->vartype);
 			return VAGT_HashSingleText;
 		}
 #endif
@@ -391,19 +394,20 @@ get_vectorized_grouping_type(const VectorQualInfo *vqinfo, Agg *agg, List *resol
  * aggregation node in the plan tree. This is used for testing.
  */
 bool
-has_vector_agg_node(Plan *plan, bool *has_normal_agg)
+has_vector_agg_node(Plan *plan, bool *has_postgres_partial_agg)
 {
-	if (IsA(plan, Agg))
+	if (IsA(plan, Agg) && castNode(Agg, plan)->aggsplit == AGGSPLIT_INITIAL_SERIAL)
 	{
-		*has_normal_agg = true;
+		*has_postgres_partial_agg = true;
+		return false;
 	}
 
-	if (plan->lefttree && has_vector_agg_node(plan->lefttree, has_normal_agg))
+	if (plan->lefttree && has_vector_agg_node(plan->lefttree, has_postgres_partial_agg))
 	{
 		return true;
 	}
 
-	if (plan->righttree && has_vector_agg_node(plan->righttree, has_normal_agg))
+	if (plan->righttree && has_vector_agg_node(plan->righttree, has_postgres_partial_agg))
 	{
 		return true;
 	}
@@ -437,7 +441,7 @@ has_vector_agg_node(Plan *plan, bool *has_normal_agg)
 		ListCell *lc;
 		foreach (lc, append_plans)
 		{
-			if (has_vector_agg_node(lfirst(lc), has_normal_agg))
+			if (has_vector_agg_node(lfirst(lc), has_postgres_partial_agg))
 			{
 				return true;
 			}
diff --git a/tsl/src/planner.c b/tsl/src/planner.c
@@ -219,19 +219,31 @@ tsl_postprocess_plan(PlannedStmt *stmt)
 #ifdef TS_DEBUG
 	if (ts_guc_debug_require_vector_agg != DRO_Allow)
 	{
-		bool has_normal_agg = false;
-		const bool has_vector_agg = has_vector_agg_node(stmt->planTree, &has_normal_agg);
-		const bool should_have_vector_agg = (ts_guc_debug_require_vector_agg == DRO_Require);
+		bool has_postgres_partial_agg = false;
+		const bool has_vector_partial_agg =
+			has_vector_agg_node(stmt->planTree, &has_postgres_partial_agg);
 
 		/*
 		 * For convenience, we don't complain about queries that don't have
 		 * aggregation at all.
 		 */
-		if ((has_normal_agg || has_vector_agg) && (has_vector_agg != should_have_vector_agg))
+		if (has_postgres_partial_agg || has_vector_partial_agg)
 		{
-			ereport(ERROR,
-					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg("vector aggregation inconsistent with debug_require_vector_agg GUC")));
+			if (has_postgres_partial_agg && ts_guc_debug_require_vector_agg == DRO_Require)
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("postgres partial aggregation nodes inconsistent with "
+								"debug_require_vector_agg GUC")));
+			}
+
+			if (has_vector_partial_agg && ts_guc_debug_require_vector_agg == DRO_Forbid)
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("vectorized partial aggregation nodes inconsistent with "
+								"debug_require_vector_agg GUC")));
+			}
 		}
 	}
 #endif
diff --git a/tsl/test/expected/vector_agg_filter.out b/tsl/test/expected/vector_agg_filter.out
@@ -3033,5 +3033,47 @@ order by 2, 3;
     | 20019 | 10125
 (10 rows)
 
+reset timescaledb.debug_require_vector_agg;
+-- Grouping with a scalar UUID column (segmentby or default).
+create table uuid_default(ts int, value int4)
+    with (tsdb.hypertable, tsdb.partition_column = 'ts', tsdb.compress,
+        tsdb.chunk_interval = 1000);
+insert into uuid_default select generate_series(0, 999), 1;
+select count(compress_chunk(x)) from show_chunks('uuid_default') x;
+ count 
+-------
+     1
+(1 row)
+
+alter table uuid_default add column id uuid default '842ab294-923a-4f50-be7d-af6c51903a5f';
+alter table uuid_default set (tsdb.compress_segmentby = 'id');
+insert into uuid_default select generate_series(1000, 1999), 2, '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993';
+select count(compress_chunk(x)) from show_chunks('uuid_default') x;
+NOTICE:  chunk "_hyper_3_5_chunk" is already converted to columnstore
+ count 
+-------
+     2
+(1 row)
+
+set timescaledb.debug_require_vector_agg = 'require';
+select id, sum(value) from uuid_default group by id;
+                  id                  | sum  
+--------------------------------------+------
+ 5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993 | 2000
+ 842ab294-923a-4f50-be7d-af6c51903a5f | 1000
+(2 rows)
+
+select sum(value) filter (where id = '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993') from uuid_default;
+ sum  
+------
+ 2000
+(1 row)
+
+select sum(value) filter (where id = '842ab294-923a-4f50-be7d-af6c51903a5f') from uuid_default;
+ sum  
+------
+ 1000
+(1 row)
+
 reset timescaledb.debug_require_vector_agg;
 reset max_parallel_workers_per_gather;
diff --git a/tsl/test/expected/vector_agg_groupagg.out b/tsl/test/expected/vector_agg_groupagg.out
@@ -101,56 +101,56 @@ select s, sum(value) from groupagg group by s order by s nulls first limit 10;
 
 reset timescaledb.debug_require_vector_agg;
 -- More tests for dictionary encoding.
-create table text_table(ts int);
+create table text_table(ts int, a text);
 select create_hypertable('text_table', 'ts', chunk_time_interval => 3);
     create_hypertable    
 -------------------------
  (3,public,text_table,t)
 (1 row)
 
-alter table text_table set (timescaledb.compress);
-insert into text_table select 0 /*, default */ from generate_series(1, 1000) x;
+alter table text_table set (timescaledb.compress, timescaledb.compress_segmentby = 'a');
+insert into text_table select -1, 'scalar' from generate_series(1, 1000) x;
 select count(compress_chunk(x)) from show_chunks('text_table') x;
  count 
 -------
      1
 (1 row)
 
-alter table text_table add column a text collate "POSIX" default 'default';
-alter table text_table set (timescaledb.compress,
-    timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a');
 insert into text_table select 1, '' from generate_series(1, 1000) x;
 insert into text_table select 2, 'same' from generate_series(1, 1000) x;
 insert into text_table select 3, 'different' || x from generate_series(1, 1000) x;
 insert into text_table select 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x;
 insert into text_table select 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x;
+alter table text_table set (timescaledb.compress,
+    timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a');
 select count(compress_chunk(x)) from show_chunks('text_table') x;
+NOTICE:  chunk "_hyper_3_7_chunk" is already converted to columnstore
  count 
 -------
-     2
+     3
 (1 row)
 
 vacuum analyze text_table;
 set timescaledb.debug_require_vector_agg to 'require';
-select a, count(*) from text_table group by a order by a limit 10;
+select a, count(*) from text_table group by a order by count(*) desc, a limit 10;
             a            | count 
 -------------------------+-------
                          |  1000
- default                 |  1000
+ same                    |  1000
+ scalar                  |  1000
+ $                       |  1000
+ same-with-nulls         |   500
  different-with-nulls1   |     1
  different-with-nulls101 |     1
  different-with-nulls103 |     1
  different-with-nulls105 |     1
  different-with-nulls107 |     1
- different-with-nulls109 |     1
- different-with-nulls11  |     1
- different-with-nulls111 |     1
 (10 rows)
 
 -- The hash grouping policies do not support the GroupAggregate mode in the
--- reverse order.
+-- reverse order. We have to filter out the chunk where 'a' is segmentby.
 set timescaledb.debug_require_vector_agg to 'forbid';
-select a, count(*) from text_table group by a order by a desc limit 10;
+select a, count(*) from text_table where ts >= 0 group by a order by a desc limit 10;
         a        | count 
 -----------------+-------
  $               |  1000
@@ -166,19 +166,20 @@ select a, count(*) from text_table group by a order by a desc limit 10;
 (10 rows)
 
 reset timescaledb.debug_require_vector_agg;
+reset enable_sort;
 -- with NULLS FIRST
 select count(decompress_chunk(x)) from show_chunks('text_table') x;
  count 
 -------
-     2
+     3
 (1 row)
 
 alter table text_table set (timescaledb.compress,
     timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a nulls first');
 select count(compress_chunk(x)) from show_chunks('text_table') x;
  count 
 -------
-     2
+     3
 (1 row)
 
 set timescaledb.debug_require_vector_agg to 'require';
@@ -187,14 +188,14 @@ select a, count(*) from text_table group by a order by a nulls first limit 10;
 -------------------------+-------
  $                       |  1000
                          |  1000
- default                 |  1000
  different-with-nulls1   |     1
  different-with-nulls101 |     1
  different-with-nulls103 |     1
  different-with-nulls105 |     1
  different-with-nulls107 |     1
  different-with-nulls109 |     1
  different-with-nulls11  |     1
+ different-with-nulls111 |     1
 (10 rows)
 
 reset timescaledb.debug_require_vector_agg;
@@ -203,7 +204,7 @@ set timescaledb.debug_require_vector_agg to 'forbid';
 select ts, a, count(*) from text_table group by ts, a order by ts, a limit 10;
  ts |       a       | count 
 ----+---------------+-------
-  0 | default       |  1000
+ -1 | scalar        |  1000
   1 |               |  1000
   2 | same          |  1000
   3 | different1    |     1
@@ -220,14 +221,14 @@ select a, ts, count(*) from text_table group by a, ts order by a desc, ts desc l
 -----------------+----+-------
  $               |  5 |   500
  $               |  4 |   500
+ scalar          | -1 |  1000
  same-with-nulls |  4 |   500
  same            |  2 |  1000
  different999    |  3 |     1
  different998    |  3 |     1
  different997    |  3 |     1
  different996    |  3 |     1
  different995    |  3 |     1
- different994    |  3 |     1
 (10 rows)
 
 reset timescaledb.debug_require_vector_agg;
diff --git a/tsl/test/expected/vector_agg_segmentby.out b/tsl/test/expected/vector_agg_segmentby.out
@@ -31,7 +31,7 @@ set max_parallel_workers_per_gather = 0;
 set timescaledb.debug_require_vector_agg = 'require';
 set timescaledb.enable_vectorized_aggregation to off;
 select sum(t) from svagg;
-ERROR:  vector aggregation inconsistent with debug_require_vector_agg GUC
+ERROR:  postgres partial aggregation nodes inconsistent with debug_require_vector_agg GUC
 set timescaledb.debug_require_vector_agg = 'forbid';
 set timescaledb.enable_vectorized_aggregation to off;
 select sum(t) from svagg;
@@ -43,7 +43,7 @@ select sum(t) from svagg;
 set timescaledb.debug_require_vector_agg = 'forbid';
 set timescaledb.enable_vectorized_aggregation to on;
 select sum(t) from svagg;
-ERROR:  vector aggregation inconsistent with debug_require_vector_agg GUC
+ERROR:  vectorized partial aggregation nodes inconsistent with debug_require_vector_agg GUC
 set timescaledb.debug_require_vector_agg = 'require';
 set timescaledb.enable_vectorized_aggregation to on;
 select sum(t) from svagg;
diff --git a/tsl/test/sql/vector_agg_filter.sql b/tsl/test/sql/vector_agg_filter.sql
@@ -142,4 +142,33 @@ group by ss
 order by 2, 3;
 
 reset timescaledb.debug_require_vector_agg;
+
+
+-- Grouping with a scalar UUID column (segmentby or default).
+create table uuid_default(ts int, value int4)
+    with (tsdb.hypertable, tsdb.partition_column = 'ts', tsdb.compress,
+        tsdb.chunk_interval = 1000);
+
+insert into uuid_default select generate_series(0, 999), 1;
+
+select count(compress_chunk(x)) from show_chunks('uuid_default') x;
+
+alter table uuid_default add column id uuid default '842ab294-923a-4f50-be7d-af6c51903a5f';
+
+alter table uuid_default set (tsdb.compress_segmentby = 'id');
+
+insert into uuid_default select generate_series(1000, 1999), 2, '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993';
+
+select count(compress_chunk(x)) from show_chunks('uuid_default') x;
+
+set timescaledb.debug_require_vector_agg = 'require';
+
+select id, sum(value) from uuid_default group by id;
+
+select sum(value) filter (where id = '5dd0565f-1ddf-4a6c-9e96-9b2b8c8c3993') from uuid_default;
+
+select sum(value) filter (where id = '842ab294-923a-4f50-be7d-af6c51903a5f') from uuid_default;
+
+reset timescaledb.debug_require_vector_agg;
+
 reset max_parallel_workers_per_gather;
diff --git a/tsl/test/sql/vector_agg_groupagg.sql b/tsl/test/sql/vector_agg_groupagg.sql

Original file line number	Diff line number	Diff line change
`@@ -366,11 +366,14 @@ get_vectorized_grouping_type(const VectorQualInfo vqinfo, Agg agg, List *resol`
`366`	`366`	`}`
`367`	`367`	`}`
`368`	`368`	`#ifdef TS_USE_UMASH`
`369`		`- else`
	`369`	`+ /*`
	`370`	`+ * We also have the UUID type which is by-reference and has a`
	`371`	`+ * columnar in-memory representation, but no specialized single-column`
	`372`	`+ * vectorized grouping support. It can use the serialized grouping`
	`373`	`+ * strategy.`
	`374`	`+ */`
	`375`	`+ else if (single_grouping_var->vartype == TEXTOID)`
`370`	`376`	`{`
`371`		`- Ensure(single_grouping_var->vartype == TEXTOID,`
`372`		`- "invalid vector type %d for grouping",`
`373`		`- single_grouping_var->vartype);`
`374`	`377`	`return VAGT_HashSingleText;`
`375`	`378`	`}`
`376`	`379`	`#endif`
`@@ -391,19 +394,20 @@ get_vectorized_grouping_type(const VectorQualInfo vqinfo, Agg agg, List *resol`
`391`	`394`	`* aggregation node in the plan tree. This is used for testing.`
`392`	`395`	`*/`
`393`	`396`	`bool`
`394`		`-has_vector_agg_node(Plan plan, bool has_normal_agg)`
	`397`	`+has_vector_agg_node(Plan plan, bool has_postgres_partial_agg)`
`395`	`398`	`{`
`396`		`- if (IsA(plan, Agg))`
	`399`	`+ if (IsA(plan, Agg) && castNode(Agg, plan)->aggsplit == AGGSPLIT_INITIAL_SERIAL)`
`397`	`400`	`{`
`398`		`- *has_normal_agg = true;`
	`401`	`+ *has_postgres_partial_agg = true;`
	`402`	`+ return false;`
`399`	`403`	`}`
`400`	`404`
`401`		`- if (plan->lefttree && has_vector_agg_node(plan->lefttree, has_normal_agg))`
	`405`	`+ if (plan->lefttree && has_vector_agg_node(plan->lefttree, has_postgres_partial_agg))`
`402`	`406`	`{`
`403`	`407`	`return true;`
`404`	`408`	`}`
`405`	`409`
`406`		`- if (plan->righttree && has_vector_agg_node(plan->righttree, has_normal_agg))`
	`410`	`+ if (plan->righttree && has_vector_agg_node(plan->righttree, has_postgres_partial_agg))`
`407`	`411`	`{`
`408`	`412`	`return true;`
`409`	`413`	`}`
`@@ -437,7 +441,7 @@ has_vector_agg_node(Plan plan, bool has_normal_agg)`
`437`	`441`	`ListCell *lc;`
`438`	`442`	`foreach (lc, append_plans)`
`439`	`443`	`{`
`440`		`- if (has_vector_agg_node(lfirst(lc), has_normal_agg))`
	`444`	`+ if (has_vector_agg_node(lfirst(lc), has_postgres_partial_agg))`
`441`	`445`	`{`
`442`	`446`	`return true;`
`443`	`447`	`}`