Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .unreleased/pr_8550
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixes: #8550 Error in Gapfill with expressions over aggregates and groupby columns and out-of-order columns
Thanks: @MKrkkl for reporting a bug in Gapfill queries with expressions over aggregates and groupby columns
92 changes: 59 additions & 33 deletions tsl/src/nodes/gapfill/gapfill_exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ static bool gapfill_state_is_new_group(GapFillState *state, TupleTableSlot *slot
static void gapfill_state_set_next(GapFillState *state, TupleTableSlot *subslot);
static TupleTableSlot *gapfill_state_return_subplan_slot(GapFillState *state);
static TupleTableSlot *gapfill_fetch_next_tuple(GapFillState *state);
static void gapfill_state_initialize_columns(GapFillState *state);
static void gapfill_state_initialize_columns(GapFillState *state, List *exec_tlist);
static GapFillColumnState *gapfill_column_state_create(GapFillColumnType ctype, Oid typeid);
static bool gapfill_is_group_column(GapFillState *state, TargetEntry *tle);
static Node *gapfill_aggref_mutator(Node *node, void *context);
static TargetEntry *gapfill_get_fixed_agg_expr_column(GapFillState *state, TargetEntry *tle);

static CustomExecMethods gapfill_state_methods = {
.BeginCustomScan = gapfill_begin,
Expand Down Expand Up @@ -717,10 +717,8 @@ gapfill_begin(CustomScanState *node, EState *estate, int eflags)
FuncExpr *func = linitial(cscan->custom_private);
TupleDesc tupledesc = state->csstate.ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor;
List *targetlist = copyObject(state->csstate.ss.ps.plan->targetlist);
Node *entry;
bool isnull;
Datum arg_value;
int i;

state->gapfill_typid = func->funcresulttype;
state->state = FETCHED_NONE;
Expand Down Expand Up @@ -802,27 +800,11 @@ gapfill_begin(CustomScanState *node, EState *estate, int eflags)
state->gapfill_end = gapfill_datum_get_internal(arg_value, func->funcresulttype);
}

gapfill_state_initialize_columns(state);
gapfill_state_initialize_columns(state, targetlist);

/*
* Build ProjectionInfo that will be used for gap filled tuples only.
*
* For every NULL_COLUMN we take the original expression tree from the
* subplan and replace Aggref nodes with Const NULL nodes. This is
* necessary because the expression might be evaluated below the
* aggregation so we need to pull up expression from subplan into
* projection for gapfilled tuples so expressions like COALESCE work
* correctly for gapfilled tuples.
*/
for (i = 0; i < state->ncolumns; i++)
{
if (state->columns[i]->ctype == NULL_COLUMN)
{
entry = copyObject(list_nth(cscan->custom_scan_tlist, i));
entry = gapfill_aggref_mutator(entry, NULL);
lfirst(list_nth_cell(targetlist, i)) = entry;
}
}
state->pi = ExecBuildProjectionInfo(targetlist,
state->csstate.ss.ps.ps_ExprContext,
MakeSingleTupleTableSlot(tupledesc, &TTSOpsVirtual),
Expand Down Expand Up @@ -1205,7 +1187,7 @@ gapfill_fetch_next_tuple(GapFillState *state)
* Initialize column meta data
*/
static void
gapfill_state_initialize_columns(GapFillState *state)
gapfill_state_initialize_columns(GapFillState *state, List *exec_tlist)
{
TupleDesc tupledesc = state->csstate.ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor;
CustomScan *cscan = castNode(CustomScan, state->csstate.ss.ps.plan);
Expand Down Expand Up @@ -1278,7 +1260,8 @@ gapfill_state_initialize_columns(GapFillState *state)
* column so we treat those similar to GROUP BY column for gapfill
* purposes.
*/
if (!contain_agg_clause((Node *) expr) && contain_var_clause((Node *) expr))
bool column_contains_aggs = contain_agg_clause((Node *) expr);
if (!column_contains_aggs && contain_var_clause((Node *) expr))
{
state->columns[i] =
gapfill_column_state_create(DERIVED_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
Expand All @@ -1287,6 +1270,41 @@ gapfill_state_initialize_columns(GapFillState *state)
continue;
}

/*
* For every column with Aggrefs we take the original expression tree from the
* subplan and replace Aggref nodes with Const NULL nodes. This is
* necessary because the expression might be evaluated below the
* aggregation so we need to pull up expression from subplan into
* projection for gapfilled tuples so expressions like COALESCE work
* correctly for gapfilled tuples.
*/
if (column_contains_aggs)
{
TargetEntry *agg_expr_tle = gapfill_get_fixed_agg_expr_column(state, tle);
Assert(agg_expr_tle);
Node *entry = copyObject((Node *) agg_expr_tle);

/* Fix for #4894 when we have expressions like (agg + group_expr):
* after getting fixed entry where aggs are replaced with NULLs
* and group expressions are replaced with exec group columns,
* check whether this column contains group columns and needs to be DERIVED or NULL.
*/
if (contain_var_clause(entry))
{
state->columns[i] =
gapfill_column_state_create(DERIVED_COLUMN,
TupleDescAttr(tupledesc, i)->atttypid);
state->multigroup = true;
state->groups_initialized = false;
}
else
state->columns[i] =
gapfill_column_state_create(NULL_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);

lfirst(list_nth_cell(exec_tlist, i)) = entry;
continue;
}

/* column with no special action from gap fill node */
state->columns[i] =
gapfill_column_state_create(NULL_COLUMN, TupleDescAttr(tupledesc, i)->atttypid);
Expand Down Expand Up @@ -1364,19 +1382,27 @@ gapfill_is_group_column(GapFillState *state, TargetEntry *tle)
}

/*
* Replace Aggref with const NULL
* If the target entry contains an aggregate, it has been fixed in "custom_exprs"
* so that the aggregate is replaced with NULL
* and any group expressions are replaced with exec group vars.
* We will get the fixed aggregate expression here and use it in exec tlist.
*/
static Node *
gapfill_aggref_mutator(Node *node, void *context)
static TargetEntry *
gapfill_get_fixed_agg_expr_column(GapFillState *state, TargetEntry *tle)
{
if (node == NULL)
return NULL;

if (IsA(node, Aggref))
return (Node *)
makeConst(((Aggref *) node)->aggtype, -1, InvalidOid, -2, (Datum) 0, true, false);
ListCell *lc;
CustomScan *cscan = castNode(CustomScan, state->csstate.ss.ps.plan);
List *mutated_agg_exprs_list = castNode(List, cscan->custom_exprs);
Assert(list_length(mutated_agg_exprs_list) == 1);
List *mutated_agg_exprs = castNode(List, linitial(mutated_agg_exprs_list));

return expression_tree_mutator(node, gapfill_aggref_mutator, context);
foreach (lc, mutated_agg_exprs)
{
TargetEntry *mutated_agg_expr_tle = castNode(TargetEntry, lfirst(lc));
if (tle->resno == mutated_agg_expr_tle->resno)
return mutated_agg_expr_tle;
}
return NULL;
}

/*
Expand Down
41 changes: 41 additions & 0 deletions tsl/src/nodes/gapfill/gapfill_plan.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <postgres.h>
#include <nodes/execnodes.h>
#include <nodes/extensible.h>
#include <nodes/makefuncs.h>
#include <nodes/nodeFuncs.h>
#include <optimizer/clauses.h>
#include <optimizer/optimizer.h>
Expand Down Expand Up @@ -38,6 +39,22 @@ typedef struct gapfill_walker_context
int count;
} gapfill_walker_context;

/*
* Replace Aggref with const NULL
*/
static Node *
gapfill_aggref_mutator(Node *node, void *context)
{
if (node == NULL)
return NULL;

if (IsA(node, Aggref))
return (Node *)
makeConst(((Aggref *) node)->aggtype, -1, InvalidOid, -2, (Datum) 0, true, false);

return expression_tree_mutator(node, gapfill_aggref_mutator, context);
}

/*
* FuncExpr is time_bucket_gapfill function call
*/
Expand Down Expand Up @@ -181,6 +198,30 @@ gapfill_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *path, List *
cscan->scan.plan.targetlist = tlist;
cscan->custom_plans = custom_plans;
cscan->custom_scan_tlist = tlist;

/* When we have original target entries like (agg + group_expr)
* we will replace agg with NULL and put resulting expression into exec-fixed "targetlist",
* but we need to fix "group_expr" to refer to exec targetlist group column.
* Only then we can safely put (NULL + group_column_exec) entry into exec-fixed targetlist.
*/
List *mutated_agg_exprs = NIL;
if (contain_agg_clause((Node *) tlist))
{
TargetEntry *tle;
ListCell *lc;
foreach (lc, tlist)
{
tle = lfirst(lc);
if (contain_agg_clause((Node *) tle))
{
Node *entry = copyObject((Node *) tle);
entry = gapfill_aggref_mutator(entry, NULL);
mutated_agg_exprs = lappend(mutated_agg_exprs, entry);
}
}
}
cscan->custom_exprs = list_make1(mutated_agg_exprs);

cscan->flags = path->flags;
cscan->methods = &gapfill_plan_methods;

Expand Down
151 changes: 151 additions & 0 deletions tsl/test/shared/expected/gapfill_bug.out
Original file line number Diff line number Diff line change
Expand Up @@ -226,3 +226,154 @@ ORDER BY
Sat Jul 05 17:25:00 2025 PDT | e0145b95-6faa-4eb3-815d-5d0a91ba909c | FFFF000D6F4DA7B2 | 3 | temperature_ambient | 3 | 24
(56 rows)

-- Fix for #4894: gapfill error over aggregates in expressions with groupby columns and columns out of order
CREATE TABLE hourly (
time timestamptz NOT NULL,
signal smallint NOT NULL,
value real,
level_a integer,
level_b smallint,
level_c smallint,
agg smallint
);
INSERT into hourly(time, signal,value, level_a, level_b, level_c, agg) values
('2022-10-01T00:00:00Z', 2, 685, 1, -1, -1, 2 ),
('2022-10-01T00:00:00Z', 2, 686, 1, -1, -1, 3 ),
('2022-10-01T02:00:00Z', 2, 686, 1, -1, -3, 2 ),
('2022-10-01T02:00:00Z', 2, 687, 1, -1, -1, 3 ),
('2022-10-01T03:00:00Z', 2, 687, 1, -1, -3, 2 ),
('2022-10-01T03:00:00Z', 2, 688, 1, -1, -1, 3 );
-- Expression over 1 aggregate and 1 groupby column
SELECT
time_bucket_gapfill('1 hour', time) as time,
CASE WHEN agg in (0,3) THEN max(value) ELSE null END as max,
CASE WHEN agg in (0,2) THEN min(value) ELSE null END as min
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b >= -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
GROUP BY 1, agg order by 1,2,3;
time | max | min
------------------------------+-----+-----
Fri Sep 30 17:00:00 2022 PDT | 686 |
Fri Sep 30 17:00:00 2022 PDT | | 685
Fri Sep 30 18:00:00 2022 PDT | |
Fri Sep 30 18:00:00 2022 PDT | |
Fri Sep 30 19:00:00 2022 PDT | 687 |
Fri Sep 30 19:00:00 2022 PDT | | 686
Fri Sep 30 20:00:00 2022 PDT | 688 |
Fri Sep 30 20:00:00 2022 PDT | | 687
Fri Sep 30 21:00:00 2022 PDT | |
Fri Sep 30 21:00:00 2022 PDT | |
Fri Sep 30 22:00:00 2022 PDT | |
Fri Sep 30 22:00:00 2022 PDT | |
(12 rows)

-- Expression over 2 aggregates and 1 groupby column
SELECT
time_bucket_gapfill('1 hour', time) as time,
CASE WHEN agg in (0,3) THEN max(value) ELSE min(level_c) END as maxmin
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
GROUP BY 1, agg order by 1,2;
time | maxmin
------------------------------+--------
Fri Sep 30 17:00:00 2022 PDT | -1
Fri Sep 30 17:00:00 2022 PDT | 686
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 19:00:00 2022 PDT | -3
Fri Sep 30 19:00:00 2022 PDT | 687
Fri Sep 30 20:00:00 2022 PDT | -3
Fri Sep 30 20:00:00 2022 PDT | 688
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
(12 rows)

-- Expression over 2 aggregates and 2 groupby columns
SELECT
time_bucket_gapfill('1 hour', time) as time,
CASE WHEN agg in (0,3) THEN max(value) ELSE min(level_c)+signal END as maxmin
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
GROUP BY 1, agg, signal order by 1,2;
time | maxmin
------------------------------+--------
Fri Sep 30 17:00:00 2022 PDT | 1
Fri Sep 30 17:00:00 2022 PDT | 686
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 19:00:00 2022 PDT | -1
Fri Sep 30 19:00:00 2022 PDT | 687
Fri Sep 30 20:00:00 2022 PDT | -1
Fri Sep 30 20:00:00 2022 PDT | 688
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
(12 rows)

-- Expressions over aggregates and complex groupby expressions
SELECT
time_bucket_gapfill('1 hour', time) as time,
max(value)+(agg+1)
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
GROUP BY 1, agg+1 order by 1,2;
time | ?column?
------------------------------+----------
Fri Sep 30 17:00:00 2022 PDT | 688
Fri Sep 30 17:00:00 2022 PDT | 690
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 19:00:00 2022 PDT | 689
Fri Sep 30 19:00:00 2022 PDT | 691
Fri Sep 30 20:00:00 2022 PDT | 690
Fri Sep 30 20:00:00 2022 PDT | 692
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
(12 rows)

SELECT
time_bucket_gapfill('1 hour', time) as time,
max(value)+(agg+1)+(agg+1+1)
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
GROUP BY 1, agg+1, agg+1+1 order by 1,2;
time | ?column?
------------------------------+----------
Fri Sep 30 17:00:00 2022 PDT | 692
Fri Sep 30 17:00:00 2022 PDT | 695
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 18:00:00 2022 PDT |
Fri Sep 30 19:00:00 2022 PDT | 693
Fri Sep 30 19:00:00 2022 PDT | 696
Fri Sep 30 20:00:00 2022 PDT | 694
Fri Sep 30 20:00:00 2022 PDT | 697
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 21:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
Fri Sep 30 22:00:00 2022 PDT |
(12 rows)

SELECT
time_bucket_gapfill('1 hour', time) as time,
max(value) + (agg+signal) maxv,
min(value) - (agg+signal) minv,
agg+signal
FROM hourly WHERE agg in (0,2,3) and signal in (2) AND level_a = 1 AND level_b = -1 AND time >= '2022-10-01T00:00:00Z' AND time < '2022-10-01T05:59:59Z'
GROUP BY 1, agg+signal order by 1,2,3;
time | maxv | minv | ?column?
------------------------------+------+------+----------
Fri Sep 30 17:00:00 2022 PDT | 689 | 681 | 4
Fri Sep 30 17:00:00 2022 PDT | 691 | 681 | 5
Fri Sep 30 18:00:00 2022 PDT | | | 4
Fri Sep 30 18:00:00 2022 PDT | | | 5
Fri Sep 30 19:00:00 2022 PDT | 690 | 682 | 4
Fri Sep 30 19:00:00 2022 PDT | 692 | 682 | 5
Fri Sep 30 20:00:00 2022 PDT | 691 | 683 | 4
Fri Sep 30 20:00:00 2022 PDT | 693 | 683 | 5
Fri Sep 30 21:00:00 2022 PDT | | | 4
Fri Sep 30 21:00:00 2022 PDT | | | 5
Fri Sep 30 22:00:00 2022 PDT | | | 4
Fri Sep 30 22:00:00 2022 PDT | | | 5
(12 rows)

drop table hourly cascade;
Loading
Loading