Skip to content

Commit 6e0e00d

Browse files
Fix gapfill bug when aggregates are in expressions with groupby columns (#8550)
Fixes #4894 Gapfill used to error out when aggregates were used in expressions with group-by columns like `(agg + gby_var)` and when gapfill target entries were not matching the table column order. It is fixed by fixing group-by vars to match execution group-by vars.
1 parent 0843d8b commit 6e0e00d

File tree

5 files changed

+317
-33
lines changed

5 files changed

+317
-33
lines changed

.unreleased/pr_8550

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixes: #8550 Error in Gapfill with expressions over aggregates and groupby columns and out-of-order columns
2+
Thanks: @MKrkkl for reporting a bug in Gapfill queries with expressions over aggregates and groupby columns

tsl/src/nodes/gapfill/gapfill_exec.c

Lines changed: 59 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ static bool gapfill_state_is_new_group(GapFillState *state, TupleTableSlot *slot
6666
static void gapfill_state_set_next(GapFillState *state, TupleTableSlot *subslot);
6767
static TupleTableSlot *gapfill_state_return_subplan_slot(GapFillState *state);
6868
static TupleTableSlot *gapfill_fetch_next_tuple(GapFillState *state);
69-
static void gapfill_state_initialize_columns(GapFillState *state);
69+
static void gapfill_state_initialize_columns(GapFillState *state, List *exec_tlist);
7070
static GapFillColumnState *gapfill_column_state_create(GapFillColumnType ctype, Oid typeid);
7171
static bool gapfill_is_group_column(GapFillState *state, TargetEntry *tle);
72-
static Node *gapfill_aggref_mutator(Node *node, void *context);
72+
static TargetEntry *gapfill_get_fixed_agg_expr_column(GapFillState *state, TargetEntry *tle);
7373

7474
static CustomExecMethods gapfill_state_methods = {
7575
.BeginCustomScan = gapfill_begin,
@@ -717,10 +717,8 @@ gapfill_begin(CustomScanState *node, EState *estate, int eflags)
717717
FuncExpr *func = linitial(cscan->custom_private);
718718
TupleDesc tupledesc = state->csstate.ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor;
719719
List *targetlist = copyObject(state->csstate.ss.ps.plan->targetlist);
720-
Node *entry;
721720
bool isnull;
722721
Datum arg_value;
723-
int i;
724722

725723
state->gapfill_typid = func->funcresulttype;
726724
state->state = FETCHED_NONE;
@@ -802,27 +800,11 @@ gapfill_begin(CustomScanState *node, EState *estate, int eflags)
802800
state->gapfill_end = gapfill_datum_get_internal(arg_value, func->funcresulttype);
803801
}
804802

805-
gapfill_state_initialize_columns(state);
803+
gapfill_state_initialize_columns(state, targetlist);
806804

807805
/*
808806
* Build ProjectionInfo that will be used for gap filled tuples only.
809-
*
810-
* For every NULL_COLUMN we take the original expression tree from the
811-
* subplan and replace Aggref nodes with Const NULL nodes. This is
812-
* necessary because the expression might be evaluated below the
813-
* aggregation so we need to pull up expression from subplan into
814-
* projection for gapfilled tuples so expressions like COALESCE work
815-
* correctly for gapfilled tuples.
816807
*/
817-
for (i = 0; i < state->ncolumns; i++)
818-
{
819-
if (state->columns[i]->ctype == NULL_COLUMN)
820-
{
821-
entry = copyObject(list_nth(cscan->custom_scan_tlist, i));
822-
entry = gapfill_aggref_mutator(entry, NULL);
823-
lfirst(list_nth_cell(targetlist, i)) = entry;
824-
}
825-
}
826808
state->pi = ExecBuildProjectionInfo(targetlist,
827809
state->csstate.ss.ps.ps_ExprContext,
828810
MakeSingleTupleTableSlot(tupledesc, &TTSOpsVirtual),
@@ -1205,7 +1187,7 @@ gapfill_fetch_next_tuple(GapFillState *state)
12051187
* Initialize column meta data
12061188
*/
12071189
static void
1208-
gapfill_state_initialize_columns(GapFillState *state)
1190+
gapfill_state_initialize_columns(GapFillState *state, List *exec_tlist)
12091191
{
12101192
TupleDesc tupledesc = state->csstate.ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor;
12111193
CustomScan *cscan = castNode(CustomScan, state->csstate.ss.ps.plan);
@@ -1278,7 +1260,8 @@ gapfill_state_initialize_columns(GapFillState *state)
12781260
* column so we treat those similar to GROUP BY column for gapfill
12791261
* purposes.
12801262
*/
1281-
if (!contain_agg_clause((Node *) expr) && contain_var_clause((Node *) expr))
1263+
bool column_contains_aggs = contain_agg_clause((Node *) expr);
1264+
if (!column_contains_aggs && contain_var_clause((Node *) expr))
12821265
{
12831266
state->columns[i] =
12841267
gapfill_column_state_create(DERIVED_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
@@ -1287,6 +1270,41 @@ gapfill_state_initialize_columns(GapFillState *state)
12871270
continue;
12881271
}
12891272

1273+
/*
1274+
* For every column with Aggrefs we take the original expression tree from the
1275+
* subplan and replace Aggref nodes with Const NULL nodes. This is
1276+
* necessary because the expression might be evaluated below the
1277+
* aggregation so we need to pull up expression from subplan into
1278+
* projection for gapfilled tuples so expressions like COALESCE work
1279+
* correctly for gapfilled tuples.
1280+
*/
1281+
if (column_contains_aggs)
1282+
{
1283+
TargetEntry *agg_expr_tle = gapfill_get_fixed_agg_expr_column(state, tle);
1284+
Assert(agg_expr_tle);
1285+
Node *entry = copyObject((Node *) agg_expr_tle);
1286+
1287+
/* Fix for #4894 when we have expressions like (agg + group_expr):
1288+
* after getting fixed entry where aggs are replaced with NULLs
1289+
* and group expressions are replaced with exec group columns,
1290+
* check whether this column contains group columns and needs to be DERIVED or NULL.
1291+
*/
1292+
if (contain_var_clause(entry))
1293+
{
1294+
state->columns[i] =
1295+
gapfill_column_state_create(DERIVED_COLUMN,
1296+
TupleDescAttr(tupledesc, i)->atttypid);
1297+
state->multigroup = true;
1298+
state->groups_initialized = false;
1299+
}
1300+
else
1301+
state->columns[i] =
1302+
gapfill_column_state_create(NULL_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
1303+
1304+
lfirst(list_nth_cell(exec_tlist, i)) = entry;
1305+
continue;
1306+
}
1307+
12901308
/* column with no special action from gap fill node */
12911309
state->columns[i] =
12921310
gapfill_column_state_create(NULL_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
@@ -1364,19 +1382,27 @@ gapfill_is_group_column(GapFillState *state, TargetEntry *tle)
13641382
}
13651383

13661384
/*
1367-
* Replace Aggref with const NULL
1385+
* If the target entry contains an aggregate, it has been fixed in "custom_exprs"
1386+
* so that the aggregate is replaced with NULL
1387+
* and any group expressions are replaced with exec group vars.
1388+
* We will get the fixed aggregate expression here and use it in exec tlist.
13681389
*/
1369-
static Node *
1370-
gapfill_aggref_mutator(Node *node, void *context)
1390+
static TargetEntry *
1391+
gapfill_get_fixed_agg_expr_column(GapFillState *state, TargetEntry *tle)
13711392
{
1372-
if (node == NULL)
1373-
return NULL;
1374-
1375-
if (IsA(node, Aggref))
1376-
return (Node *)
1377-
makeConst(((Aggref *) node)->aggtype, -1, InvalidOid, -2, (Datum) 0, true, false);
1393+
ListCell *lc;
1394+
CustomScan *cscan = castNode(CustomScan, state->csstate.ss.ps.plan);
1395+
List *mutated_agg_exprs_list = castNode(List, cscan->custom_exprs);
1396+
Assert(list_length(mutated_agg_exprs_list) == 1);
1397+
List *mutated_agg_exprs = castNode(List, linitial(mutated_agg_exprs_list));
13781398

1379-
return expression_tree_mutator(node, gapfill_aggref_mutator, context);
1399+
foreach (lc, mutated_agg_exprs)
1400+
{
1401+
TargetEntry *mutated_agg_expr_tle = castNode(TargetEntry, lfirst(lc));
1402+
if (tle->resno == mutated_agg_expr_tle->resno)
1403+
return mutated_agg_expr_tle;
1404+
}
1405+
return NULL;
13801406
}
13811407

13821408
/*

tsl/src/nodes/gapfill/gapfill_plan.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <postgres.h>
88
#include <nodes/execnodes.h>
99
#include <nodes/extensible.h>
10+
#include <nodes/makefuncs.h>
1011
#include <nodes/nodeFuncs.h>
1112
#include <optimizer/clauses.h>
1213
#include <optimizer/optimizer.h>
@@ -38,6 +39,22 @@ typedef struct gapfill_walker_context
3839
int count;
3940
} gapfill_walker_context;
4041

42+
/*
43+
* Replace Aggref with const NULL
44+
*/
45+
static Node *
46+
gapfill_aggref_mutator(Node *node, void *context)
47+
{
48+
if (node == NULL)
49+
return NULL;
50+
51+
if (IsA(node, Aggref))
52+
return (Node *)
53+
makeConst(((Aggref *) node)->aggtype, -1, InvalidOid, -2, (Datum) 0, true, false);
54+
55+
return expression_tree_mutator(node, gapfill_aggref_mutator, context);
56+
}
57+
4158
/*
4259
* FuncExpr is time_bucket_gapfill function call
4360
*/
@@ -181,6 +198,30 @@ gapfill_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *path, List *
181198
cscan->scan.plan.targetlist = tlist;
182199
cscan->custom_plans = custom_plans;
183200
cscan->custom_scan_tlist = tlist;
201+
202+
/* When we have original target entries like (agg + group_expr)
203+
* we will replace agg with NULL and put resulting expression into exec-fixed "targetlist",
204+
* but we need to fix "group_expr" to refer to exec targetlist group column.
205+
* Only then we can safely put (NULL + group_column_exec) entry into exec-fixed targetlist.
206+
*/
207+
List *mutated_agg_exprs = NIL;
208+
if (contain_agg_clause((Node *) tlist))
209+
{
210+
TargetEntry *tle;
211+
ListCell *lc;
212+
foreach (lc, tlist)
213+
{
214+
tle = lfirst(lc);
215+
if (contain_agg_clause((Node *) tle))
216+
{
217+
Node *entry = copyObject((Node *) tle);
218+
entry = gapfill_aggref_mutator(entry, NULL);
219+
mutated_agg_exprs = lappend(mutated_agg_exprs, entry);
220+
}
221+
}
222+
}
223+
cscan->custom_exprs = list_make1(mutated_agg_exprs);
224+
184225
cscan->flags = path->flags;
185226
cscan->methods = &gapfill_plan_methods;
186227

tsl/test/shared/expected/gapfill_bug.out

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,154 @@ ORDER BY
226226
Sat Jul 05 17:25:00 2025 PDT | e0145b95-6faa-4eb3-815d-5d0a91ba909c | FFFF000D6F4DA7B2 | 3 | temperature_ambient | 3 | 24
227227
(56 rows)
228228

229+
-- Fix for #4894: gapfill error over aggregates in expressions with groupby columns and columns out of order
230+
CREATE TABLE hourly (
231+
time timestamptz NOT NULL,
232+
signal smallint NOT NULL,
233+
value real,
234+
level_a integer,
235+
level_b smallint,
236+
level_c smallint,
237+
agg smallint
238+
);
239+
INSERT into hourly(time, signal,value, level_a, level_b, level_c, agg) values
240+
('2022-10-01T00:00:00Z', 2, 685, 1, -1, -1, 2 ),
241+
('2022-10-01T00:00:00Z', 2, 686, 1, -1, -1, 3 ),
242+
('2022-10-01T02:00:00Z', 2, 686, 1, -1, -3, 2 ),
243+
('2022-10-01T02:00:00Z', 2, 687, 1, -1, -1, 3 ),
244+
('2022-10-01T03:00:00Z', 2, 687, 1, -1, -3, 2 ),
245+
('2022-10-01T03:00:00Z', 2, 688, 1, -1, -1, 3 );
246+
-- Expression over 1 aggregate and 1 groupby column
247+
SELECT
248+
time_bucket_gapfill('1 hour', time) as time,
249+
CASE WHEN agg in (0,3) THEN max(value) ELSE null END as max,
250+
CASE WHEN agg in (0,2) THEN min(value) ELSE null END as min
251+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b >= -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
252+
GROUP BY 1, agg order by 1,2,3;
253+
time | max | min
254+
------------------------------+-----+-----
255+
Fri Sep 30 17:00:00 2022 PDT | 686 |
256+
Fri Sep 30 17:00:00 2022 PDT | | 685
257+
Fri Sep 30 18:00:00 2022 PDT | |
258+
Fri Sep 30 18:00:00 2022 PDT | |
259+
Fri Sep 30 19:00:00 2022 PDT | 687 |
260+
Fri Sep 30 19:00:00 2022 PDT | | 686
261+
Fri Sep 30 20:00:00 2022 PDT | 688 |
262+
Fri Sep 30 20:00:00 2022 PDT | | 687
263+
Fri Sep 30 21:00:00 2022 PDT | |
264+
Fri Sep 30 21:00:00 2022 PDT | |
265+
Fri Sep 30 22:00:00 2022 PDT | |
266+
Fri Sep 30 22:00:00 2022 PDT | |
267+
(12 rows)
268+
269+
-- Expression over 2 aggregates and 1 groupby column
270+
SELECT
271+
time_bucket_gapfill('1 hour', time) as time,
272+
CASE WHEN agg in (0,3) THEN max(value) ELSE min(level_c) END as maxmin
273+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
274+
GROUP BY 1, agg order by 1,2;
275+
time | maxmin
276+
------------------------------+--------
277+
Fri Sep 30 17:00:00 2022 PDT | -1
278+
Fri Sep 30 17:00:00 2022 PDT | 686
279+
Fri Sep 30 18:00:00 2022 PDT |
280+
Fri Sep 30 18:00:00 2022 PDT |
281+
Fri Sep 30 19:00:00 2022 PDT | -3
282+
Fri Sep 30 19:00:00 2022 PDT | 687
283+
Fri Sep 30 20:00:00 2022 PDT | -3
284+
Fri Sep 30 20:00:00 2022 PDT | 688
285+
Fri Sep 30 21:00:00 2022 PDT |
286+
Fri Sep 30 21:00:00 2022 PDT |
287+
Fri Sep 30 22:00:00 2022 PDT |
288+
Fri Sep 30 22:00:00 2022 PDT |
289+
(12 rows)
290+
291+
-- Expression over 2 aggregates and 2 groupby columns
292+
SELECT
293+
time_bucket_gapfill('1 hour', time) as time,
294+
CASE WHEN agg in (0,3) THEN max(value) ELSE min(level_c)+signal END as maxmin
295+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
296+
GROUP BY 1, agg, signal order by 1,2;
297+
time | maxmin
298+
------------------------------+--------
299+
Fri Sep 30 17:00:00 2022 PDT | 1
300+
Fri Sep 30 17:00:00 2022 PDT | 686
301+
Fri Sep 30 18:00:00 2022 PDT |
302+
Fri Sep 30 18:00:00 2022 PDT |
303+
Fri Sep 30 19:00:00 2022 PDT | -1
304+
Fri Sep 30 19:00:00 2022 PDT | 687
305+
Fri Sep 30 20:00:00 2022 PDT | -1
306+
Fri Sep 30 20:00:00 2022 PDT | 688
307+
Fri Sep 30 21:00:00 2022 PDT |
308+
Fri Sep 30 21:00:00 2022 PDT |
309+
Fri Sep 30 22:00:00 2022 PDT |
310+
Fri Sep 30 22:00:00 2022 PDT |
311+
(12 rows)
312+
313+
-- Expressions over aggregates and complex groupby expressions
314+
SELECT
315+
time_bucket_gapfill('1 hour', time) as time,
316+
max(value)+(agg+1)
317+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
318+
GROUP BY 1, agg+1 order by 1,2;
319+
time | ?column?
320+
------------------------------+----------
321+
Fri Sep 30 17:00:00 2022 PDT | 688
322+
Fri Sep 30 17:00:00 2022 PDT | 690
323+
Fri Sep 30 18:00:00 2022 PDT |
324+
Fri Sep 30 18:00:00 2022 PDT |
325+
Fri Sep 30 19:00:00 2022 PDT | 689
326+
Fri Sep 30 19:00:00 2022 PDT | 691
327+
Fri Sep 30 20:00:00 2022 PDT | 690
328+
Fri Sep 30 20:00:00 2022 PDT | 692
329+
Fri Sep 30 21:00:00 2022 PDT |
330+
Fri Sep 30 21:00:00 2022 PDT |
331+
Fri Sep 30 22:00:00 2022 PDT |
332+
Fri Sep 30 22:00:00 2022 PDT |
333+
(12 rows)
334+
335+
SELECT
336+
time_bucket_gapfill('1 hour', time) as time,
337+
max(value)+(agg+1)+(agg+1+1)
338+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
339+
GROUP BY 1, agg+1, agg+1+1 order by 1,2;
340+
time | ?column?
341+
------------------------------+----------
342+
Fri Sep 30 17:00:00 2022 PDT | 692
343+
Fri Sep 30 17:00:00 2022 PDT | 695
344+
Fri Sep 30 18:00:00 2022 PDT |
345+
Fri Sep 30 18:00:00 2022 PDT |
346+
Fri Sep 30 19:00:00 2022 PDT | 693
347+
Fri Sep 30 19:00:00 2022 PDT | 696
348+
Fri Sep 30 20:00:00 2022 PDT | 694
349+
Fri Sep 30 20:00:00 2022 PDT | 697
350+
Fri Sep 30 21:00:00 2022 PDT |
351+
Fri Sep 30 21:00:00 2022 PDT |
352+
Fri Sep 30 22:00:00 2022 PDT |
353+
Fri Sep 30 22:00:00 2022 PDT |
354+
(12 rows)
355+
356+
SELECT
357+
time_bucket_gapfill('1 hour', time) as time,
358+
max(value) + (agg+signal) maxv,
359+
min(value) - (agg+signal) minv,
360+
agg+signal
361+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
362+
GROUP BY 1, agg+signal order by 1,2,3;
363+
time | maxv | minv | ?column?
364+
------------------------------+------+------+----------
365+
Fri Sep 30 17:00:00 2022 PDT | 689 | 681 | 4
366+
Fri Sep 30 17:00:00 2022 PDT | 691 | 681 | 5
367+
Fri Sep 30 18:00:00 2022 PDT | | | 4
368+
Fri Sep 30 18:00:00 2022 PDT | | | 5
369+
Fri Sep 30 19:00:00 2022 PDT | 690 | 682 | 4
370+
Fri Sep 30 19:00:00 2022 PDT | 692 | 682 | 5
371+
Fri Sep 30 20:00:00 2022 PDT | 691 | 683 | 4
372+
Fri Sep 30 20:00:00 2022 PDT | 693 | 683 | 5
373+
Fri Sep 30 21:00:00 2022 PDT | | | 4
374+
Fri Sep 30 21:00:00 2022 PDT | | | 5
375+
Fri Sep 30 22:00:00 2022 PDT | | | 4
376+
Fri Sep 30 22:00:00 2022 PDT | | | 5
377+
(12 rows)
378+
379+
drop table hourly cascade;

0 commit comments

Comments
 (0)