Skip to content

Commit 554622c

Browse files
Fix gapfill bug when aggregates are in expressions with groupby columns
Fix gapfill bug when aggregates are in expressions with groupby columns, use custom_exprs
1 parent 0843d8b commit 554622c

File tree

5 files changed

+317
-33
lines changed

5 files changed

+317
-33
lines changed

.unreleased/pr_8550

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixes: #8550 Error in Gapfill with expressions over aggregates and groupby columns and out-of-order columns
2+
Thanks: @MKrkkl for reporting a bug in Gapfill queries with expressions over aggregates and groupby columns

tsl/src/nodes/gapfill/gapfill_exec.c

Lines changed: 59 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ static bool gapfill_state_is_new_group(GapFillState *state, TupleTableSlot *slot
6666
static void gapfill_state_set_next(GapFillState *state, TupleTableSlot *subslot);
6767
static TupleTableSlot *gapfill_state_return_subplan_slot(GapFillState *state);
6868
static TupleTableSlot *gapfill_fetch_next_tuple(GapFillState *state);
69-
static void gapfill_state_initialize_columns(GapFillState *state);
69+
static void gapfill_state_initialize_columns(GapFillState *state, List *exec_tlist);
7070
static GapFillColumnState *gapfill_column_state_create(GapFillColumnType ctype, Oid typeid);
7171
static bool gapfill_is_group_column(GapFillState *state, TargetEntry *tle);
72-
static Node *gapfill_aggref_mutator(Node *node, void *context);
72+
static TargetEntry *gapfill_get_fixed_agg_expr_column(GapFillState *state, TargetEntry *tle);
7373

7474
static CustomExecMethods gapfill_state_methods = {
7575
.BeginCustomScan = gapfill_begin,
@@ -717,10 +717,8 @@ gapfill_begin(CustomScanState *node, EState *estate, int eflags)
717717
FuncExpr *func = linitial(cscan->custom_private);
718718
TupleDesc tupledesc = state->csstate.ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor;
719719
List *targetlist = copyObject(state->csstate.ss.ps.plan->targetlist);
720-
Node *entry;
721720
bool isnull;
722721
Datum arg_value;
723-
int i;
724722

725723
state->gapfill_typid = func->funcresulttype;
726724
state->state = FETCHED_NONE;
@@ -802,27 +800,11 @@ gapfill_begin(CustomScanState *node, EState *estate, int eflags)
802800
state->gapfill_end = gapfill_datum_get_internal(arg_value, func->funcresulttype);
803801
}
804802

805-
gapfill_state_initialize_columns(state);
803+
gapfill_state_initialize_columns(state, targetlist);
806804

807805
/*
808806
* Build ProjectionInfo that will be used for gap filled tuples only.
809-
*
810-
* For every NULL_COLUMN we take the original expression tree from the
811-
* subplan and replace Aggref nodes with Const NULL nodes. This is
812-
* necessary because the expression might be evaluated below the
813-
* aggregation so we need to pull up expression from subplan into
814-
* projection for gapfilled tuples so expressions like COALESCE work
815-
* correctly for gapfilled tuples.
816807
*/
817-
for (i = 0; i < state->ncolumns; i++)
818-
{
819-
if (state->columns[i]->ctype == NULL_COLUMN)
820-
{
821-
entry = copyObject(list_nth(cscan->custom_scan_tlist, i));
822-
entry = gapfill_aggref_mutator(entry, NULL);
823-
lfirst(list_nth_cell(targetlist, i)) = entry;
824-
}
825-
}
826808
state->pi = ExecBuildProjectionInfo(targetlist,
827809
state->csstate.ss.ps.ps_ExprContext,
828810
MakeSingleTupleTableSlot(tupledesc, &TTSOpsVirtual),
@@ -1205,7 +1187,7 @@ gapfill_fetch_next_tuple(GapFillState *state)
12051187
* Initialize column meta data
12061188
*/
12071189
static void
1208-
gapfill_state_initialize_columns(GapFillState *state)
1190+
gapfill_state_initialize_columns(GapFillState *state, List *exec_tlist)
12091191
{
12101192
TupleDesc tupledesc = state->csstate.ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor;
12111193
CustomScan *cscan = castNode(CustomScan, state->csstate.ss.ps.plan);
@@ -1278,7 +1260,8 @@ gapfill_state_initialize_columns(GapFillState *state)
12781260
* column so we treat those similar to GROUP BY column for gapfill
12791261
* purposes.
12801262
*/
1281-
if (!contain_agg_clause((Node *) expr) && contain_var_clause((Node *) expr))
1263+
bool column_contains_aggs = contain_agg_clause((Node *) expr);
1264+
if (!column_contains_aggs && contain_var_clause((Node *) expr))
12821265
{
12831266
state->columns[i] =
12841267
gapfill_column_state_create(DERIVED_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
@@ -1287,6 +1270,41 @@ gapfill_state_initialize_columns(GapFillState *state)
12871270
continue;
12881271
}
12891272

1273+
/*
1274+
* For every column with Aggrefs we take the original expression tree from the
1275+
* subplan and replace Aggref nodes with Const NULL nodes. This is
1276+
* necessary because the expression might be evaluated below the
1277+
* aggregation so we need to pull up expression from subplan into
1278+
* projection for gapfilled tuples so expressions like COALESCE work
1279+
* correctly for gapfilled tuples.
1280+
*/
1281+
if (column_contains_aggs)
1282+
{
1283+
TargetEntry *agg_expr_tle = gapfill_get_fixed_agg_expr_column(state, tle);
1284+
Assert(agg_expr_tle);
1285+
Node *entry = copyObject((Node *) agg_expr_tle);
1286+
1287+
/* Fix for #4894 when we have expressions like (agg + group_expr):
1288+
* after getting fixed entry where aggs are replaced with NULLs
1289+
* and group expressions are replaced with exec group columns,
1290+
* check whether this column contains group columns and needs to be DERIVED or NULL.
1291+
*/
1292+
if (contain_var_clause(entry))
1293+
{
1294+
state->columns[i] =
1295+
gapfill_column_state_create(DERIVED_COLUMN,
1296+
TupleDescAttr(tupledesc, i)->atttypid);
1297+
state->multigroup = true;
1298+
state->groups_initialized = false;
1299+
}
1300+
else
1301+
state->columns[i] =
1302+
gapfill_column_state_create(NULL_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
1303+
1304+
lfirst(list_nth_cell(exec_tlist, i)) = entry;
1305+
continue;
1306+
}
1307+
12901308
/* column with no special action from gap fill node */
12911309
state->columns[i] =
12921310
gapfill_column_state_create(NULL_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
@@ -1364,19 +1382,27 @@ gapfill_is_group_column(GapFillState *state, TargetEntry *tle)
13641382
}
13651383

13661384
/*
1367-
* Replace Aggref with const NULL
1385+
* If the target entry contains an aggregate, it has been fixed in "custom_exprs"
1386+
* so that the aggregate is replaced with NULL
1387+
* and any group expressions are replaced with exec group vars.
1388+
* We will get the fixed aggregate expression here and use it in exec tlist.
13681389
*/
1369-
static Node *
1370-
gapfill_aggref_mutator(Node *node, void *context)
1390+
static TargetEntry *
1391+
gapfill_get_fixed_agg_expr_column(GapFillState *state, TargetEntry *tle)
13711392
{
1372-
if (node == NULL)
1373-
return NULL;
1374-
1375-
if (IsA(node, Aggref))
1376-
return (Node *)
1377-
makeConst(((Aggref *) node)->aggtype, -1, InvalidOid, -2, (Datum) 0, true, false);
1393+
ListCell *lc;
1394+
CustomScan *cscan = castNode(CustomScan, state->csstate.ss.ps.plan);
1395+
List *mutated_agg_exprs_list = castNode(List, cscan->custom_exprs);
1396+
Assert(list_length(mutated_agg_exprs_list) == 1);
1397+
List *mutated_agg_exprs = castNode(List, linitial(mutated_agg_exprs_list));
13781398

1379-
return expression_tree_mutator(node, gapfill_aggref_mutator, context);
1399+
foreach (lc, mutated_agg_exprs)
1400+
{
1401+
TargetEntry *mutated_agg_expr_tle = castNode(TargetEntry, lfirst(lc));
1402+
if (tle->resno == mutated_agg_expr_tle->resno)
1403+
return mutated_agg_expr_tle;
1404+
}
1405+
return NULL;
13801406
}
13811407

13821408
/*

tsl/src/nodes/gapfill/gapfill_plan.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <postgres.h>
88
#include <nodes/execnodes.h>
99
#include <nodes/extensible.h>
10+
#include <nodes/makefuncs.h>
1011
#include <nodes/nodeFuncs.h>
1112
#include <optimizer/clauses.h>
1213
#include <optimizer/optimizer.h>
@@ -38,6 +39,22 @@ typedef struct gapfill_walker_context
3839
int count;
3940
} gapfill_walker_context;
4041

42+
/*
43+
* Replace Aggref with const NULL
44+
*/
45+
static Node *
46+
gapfill_aggref_mutator(Node *node, void *context)
47+
{
48+
if (node == NULL)
49+
return NULL;
50+
51+
if (IsA(node, Aggref))
52+
return (Node *)
53+
makeConst(((Aggref *) node)->aggtype, -1, InvalidOid, -2, (Datum) 0, true, false);
54+
55+
return expression_tree_mutator(node, gapfill_aggref_mutator, context);
56+
}
57+
4158
/*
4259
* FuncExpr is time_bucket_gapfill function call
4360
*/
@@ -181,6 +198,30 @@ gapfill_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *path, List *
181198
cscan->scan.plan.targetlist = tlist;
182199
cscan->custom_plans = custom_plans;
183200
cscan->custom_scan_tlist = tlist;
201+
202+
/* When we have original target entries like (agg + group_expr)
203+
* we will replace agg with NULL and put resulting expression into exec-fixed "targetlist",
204+
* but we need to fix "group_expr" to refer to exec targetlist group column.
205+
* Only then we can safely put (NULL + group_column_exec) entry into exec-fixed targetlist.
206+
*/
207+
List *mutated_agg_exprs = NIL;
208+
if (contain_agg_clause((Node *) tlist))
209+
{
210+
TargetEntry *tle;
211+
ListCell *lc;
212+
foreach (lc, tlist)
213+
{
214+
tle = lfirst(lc);
215+
if (contain_agg_clause((Node *) tle))
216+
{
217+
Node *entry = copyObject((Node *) tle);
218+
entry = gapfill_aggref_mutator(entry, NULL);
219+
mutated_agg_exprs = lappend(mutated_agg_exprs, entry);
220+
}
221+
}
222+
}
223+
cscan->custom_exprs = list_make1(mutated_agg_exprs);
224+
184225
cscan->flags = path->flags;
185226
cscan->methods = &gapfill_plan_methods;
186227

tsl/test/shared/expected/gapfill_bug.out

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,154 @@ ORDER BY
226226
Sat Jul 05 17:25:00 2025 PDT | e0145b95-6faa-4eb3-815d-5d0a91ba909c | FFFF000D6F4DA7B2 | 3 | temperature_ambient | 3 | 24
227227
(56 rows)
228228

229+
-- Fix for #4894: gapfill error over aggregates in expressions with groupby columns and columns out of order
230+
CREATE TABLE hourly (
231+
time timestamptz NOT NULL,
232+
signal smallint NOT NULL,
233+
value real,
234+
level_a integer,
235+
level_b smallint,
236+
level_c smallint,
237+
agg smallint
238+
);
239+
INSERT into hourly(time, signal,value, level_a, level_b, level_c, agg) values
240+
('2022-10-01T00:00:00Z', 2, 685, 1, -1, -1, 2 ),
241+
('2022-10-01T00:00:00Z', 2, 686, 1, -1, -1, 3 ),
242+
('2022-10-01T02:00:00Z', 2, 686, 1, -1, -3, 2 ),
243+
('2022-10-01T02:00:00Z', 2, 687, 1, -1, -1, 3 ),
244+
('2022-10-01T03:00:00Z', 2, 687, 1, -1, -3, 2 ),
245+
('2022-10-01T03:00:00Z', 2, 688, 1, -1, -1, 3 );
246+
-- Expression over 1 aggregate and 1 groupby column
247+
SELECT
248+
time_bucket_gapfill('1 hour', time) as time,
249+
CASE WHEN agg in (0,3) THEN max(value) ELSE null END as max,
250+
CASE WHEN agg in (0,2) THEN min(value) ELSE null END as min
251+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b >= -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
252+
GROUP BY 1, agg order by 1,2,3;
253+
time | max | min
254+
------------------------------+-----+-----
255+
Fri Sep 30 17:00:00 2022 PDT | 686 |
256+
Fri Sep 30 17:00:00 2022 PDT | | 685
257+
Fri Sep 30 18:00:00 2022 PDT | |
258+
Fri Sep 30 18:00:00 2022 PDT | |
259+
Fri Sep 30 19:00:00 2022 PDT | 687 |
260+
Fri Sep 30 19:00:00 2022 PDT | | 686
261+
Fri Sep 30 20:00:00 2022 PDT | 688 |
262+
Fri Sep 30 20:00:00 2022 PDT | | 687
263+
Fri Sep 30 21:00:00 2022 PDT | |
264+
Fri Sep 30 21:00:00 2022 PDT | |
265+
Fri Sep 30 22:00:00 2022 PDT | |
266+
Fri Sep 30 22:00:00 2022 PDT | |
267+
(12 rows)
268+
269+
-- Expression over 2 aggregates and 1 groupby column
270+
SELECT
271+
time_bucket_gapfill('1 hour', time) as time,
272+
CASE WHEN agg in (0,3) THEN max(value) ELSE min(level_c) END as maxmin
273+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
274+
GROUP BY 1, agg order by 1,2;
275+
time | maxmin
276+
------------------------------+--------
277+
Fri Sep 30 17:00:00 2022 PDT | -1
278+
Fri Sep 30 17:00:00 2022 PDT | 686
279+
Fri Sep 30 18:00:00 2022 PDT |
280+
Fri Sep 30 18:00:00 2022 PDT |
281+
Fri Sep 30 19:00:00 2022 PDT | -3
282+
Fri Sep 30 19:00:00 2022 PDT | 687
283+
Fri Sep 30 20:00:00 2022 PDT | -3
284+
Fri Sep 30 20:00:00 2022 PDT | 688
285+
Fri Sep 30 21:00:00 2022 PDT |
286+
Fri Sep 30 21:00:00 2022 PDT |
287+
Fri Sep 30 22:00:00 2022 PDT |
288+
Fri Sep 30 22:00:00 2022 PDT |
289+
(12 rows)
290+
291+
-- Expression over 2 aggregates and 2 groupby columns
292+
SELECT
293+
time_bucket_gapfill('1 hour', time) as time,
294+
CASE WHEN agg in (0,3) THEN max(value) ELSE min(level_c)+signal END as maxmin
295+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
296+
GROUP BY 1, agg, signal order by 1,2;
297+
time | maxmin
298+
------------------------------+--------
299+
Fri Sep 30 17:00:00 2022 PDT | 1
300+
Fri Sep 30 17:00:00 2022 PDT | 686
301+
Fri Sep 30 18:00:00 2022 PDT |
302+
Fri Sep 30 18:00:00 2022 PDT |
303+
Fri Sep 30 19:00:00 2022 PDT | -1
304+
Fri Sep 30 19:00:00 2022 PDT | 687
305+
Fri Sep 30 20:00:00 2022 PDT | -1
306+
Fri Sep 30 20:00:00 2022 PDT | 688
307+
Fri Sep 30 21:00:00 2022 PDT |
308+
Fri Sep 30 21:00:00 2022 PDT |
309+
Fri Sep 30 22:00:00 2022 PDT |
310+
Fri Sep 30 22:00:00 2022 PDT |
311+
(12 rows)
312+
313+
-- Expressions over aggregates and complex groupby expressions
314+
SELECT
315+
time_bucket_gapfill('1 hour', time) as time,
316+
max(value)+(agg+1)
317+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
318+
GROUP BY 1, agg+1 order by 1,2;
319+
time | ?column?
320+
------------------------------+----------
321+
Fri Sep 30 17:00:00 2022 PDT | 688
322+
Fri Sep 30 17:00:00 2022 PDT | 690
323+
Fri Sep 30 18:00:00 2022 PDT |
324+
Fri Sep 30 18:00:00 2022 PDT |
325+
Fri Sep 30 19:00:00 2022 PDT | 689
326+
Fri Sep 30 19:00:00 2022 PDT | 691
327+
Fri Sep 30 20:00:00 2022 PDT | 690
328+
Fri Sep 30 20:00:00 2022 PDT | 692
329+
Fri Sep 30 21:00:00 2022 PDT |
330+
Fri Sep 30 21:00:00 2022 PDT |
331+
Fri Sep 30 22:00:00 2022 PDT |
332+
Fri Sep 30 22:00:00 2022 PDT |
333+
(12 rows)
334+
335+
SELECT
336+
time_bucket_gapfill('1 hour', time) as time,
337+
max(value)+(agg+1)+(agg+1+1)
338+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
339+
GROUP BY 1, agg+1, agg+1+1 order by 1,2;
340+
time | ?column?
341+
------------------------------+----------
342+
Fri Sep 30 17:00:00 2022 PDT | 692
343+
Fri Sep 30 17:00:00 2022 PDT | 695
344+
Fri Sep 30 18:00:00 2022 PDT |
345+
Fri Sep 30 18:00:00 2022 PDT |
346+
Fri Sep 30 19:00:00 2022 PDT | 693
347+
Fri Sep 30 19:00:00 2022 PDT | 696
348+
Fri Sep 30 20:00:00 2022 PDT | 694
349+
Fri Sep 30 20:00:00 2022 PDT | 697
350+
Fri Sep 30 21:00:00 2022 PDT |
351+
Fri Sep 30 21:00:00 2022 PDT |
352+
Fri Sep 30 22:00:00 2022 PDT |
353+
Fri Sep 30 22:00:00 2022 PDT |
354+
(12 rows)
355+
356+
SELECT
357+
time_bucket_gapfill('1 hour', time) as time,
358+
max(value) + (agg+signal) maxv,
359+
min(value) - (agg+signal) minv,
360+
agg+signal
361+
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
362+
GROUP BY 1, agg+signal order by 1,2,3;
363+
time | maxv | minv | ?column?
364+
------------------------------+------+------+----------
365+
Fri Sep 30 17:00:00 2022 PDT | 689 | 681 | 4
366+
Fri Sep 30 17:00:00 2022 PDT | 691 | 681 | 5
367+
Fri Sep 30 18:00:00 2022 PDT | | | 4
368+
Fri Sep 30 18:00:00 2022 PDT | | | 5
369+
Fri Sep 30 19:00:00 2022 PDT | 690 | 682 | 4
370+
Fri Sep 30 19:00:00 2022 PDT | 692 | 682 | 5
371+
Fri Sep 30 20:00:00 2022 PDT | 691 | 683 | 4
372+
Fri Sep 30 20:00:00 2022 PDT | 693 | 683 | 5
373+
Fri Sep 30 21:00:00 2022 PDT | | | 4
374+
Fri Sep 30 21:00:00 2022 PDT | | | 5
375+
Fri Sep 30 22:00:00 2022 PDT | | | 4
376+
Fri Sep 30 22:00:00 2022 PDT | | | 5
377+
(12 rows)
378+
379+
drop table hourly cascade;

0 commit comments

Comments
 (0)