Skip to content

Commit c356b61

Browse files
Implement multikey skipscan in not-null mode
1 parent 7247bce commit c356b61

File tree

15 files changed

+1906
-382
lines changed

15 files changed

+1906
-382
lines changed

.unreleased/pr_8513

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implements: #8513 Support multikey SkipScan when all keys are guaranteed to be non-null

src/guc.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ TSDLLEXPORT bool ts_guc_enable_skip_scan = true;
132132
TSDLLEXPORT bool ts_guc_enable_skip_scan_for_distinct_aggregates = true;
133133
#endif
134134
TSDLLEXPORT bool ts_guc_enable_compressed_skip_scan = true;
135+
TSDLLEXPORT bool ts_guc_enable_multikey_skip_scan = true;
135136
TSDLLEXPORT double ts_guc_skip_scan_run_cost_multiplier = 1.0;
136137
static char *ts_guc_default_segmentby_fn = NULL;
137138
static char *ts_guc_default_orderby_fn = NULL;
@@ -679,6 +680,17 @@ _guc_init(void)
679680
NULL,
680681
NULL);
681682

683+
DefineCustomBoolVariable(MAKE_EXTOPTION("enable_multikey_skipscan"),
684+
"Enable SkipScan for multiple distinct keys",
685+
"Enable SkipScan for multiple distinct inputs",
686+
&ts_guc_enable_multikey_skip_scan,
687+
true,
688+
PGC_USERSET,
689+
0,
690+
NULL,
691+
NULL,
692+
NULL);
693+
682694
DefineCustomRealVariable(MAKE_EXTOPTION("skip_scan_run_cost_multiplier"),
683695
"Multiplier for SkipScan run cost as an option to make the cost "
684696
"smaller so that SkipScan can be chosen",

src/guc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ extern TSDLLEXPORT bool ts_guc_enable_skip_scan_for_distinct_aggregates;
7171
#endif
7272
extern bool ts_guc_enable_event_triggers;
7373
extern TSDLLEXPORT bool ts_guc_enable_compressed_skip_scan;
74+
extern TSDLLEXPORT bool ts_guc_enable_multikey_skip_scan;
7475
extern TSDLLEXPORT double ts_guc_skip_scan_run_cost_multiplier;
7576
extern TSDLLEXPORT bool ts_guc_debug_skip_scan_info;
7677

tsl/src/nodes/skip_scan/exec.c

Lines changed: 185 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,53 @@
3838
* | DONE |
3939
* \===========/
4040
*
41+
*
42+
*
43+
* N-key SkipScan needs to do 2^N null check stages when using the above scheme,
44+
* made even more complicated with having to change searches for previous keys.
45+
*
46+
* So we made a decision to support multikey SkipScan in NOT NULL mode only.
47+
*
48+
* For N-key SkipScan we search with these predicates when current key = K:
49+
* (key_1 = prev_1),...,(key_K > prev_K),(key_K+1 IS NOT NULL)...(key_N IS NOT NULL)
50+
*
51+
* As all skip keys are NOT NULL, "IS NOT NULL" fetches the tuple with no previous value.
52+
*
53+
* We start the search with K=1 i.e. with these predicates:
54+
* (key_1 IS NOT NULL),...,(key_N IS NOT NULL).
55+
*
56+
* When a tuple is fetched we set K=N as we can fill all previous values, search is now:
57+
* (key_1 = prev_1),...,(key_N > prev_N)
58+
*
59+
* When no tuple is fetched and K>0 we can relax the search and move to previous key (K-1):
60+
* (key_1 = prev_1),...,(key_K-1 > prev_K-1),(key_K IS NOT NULL)...(key_N IS NOT NULL)
61+
*
62+
* When no tuple is fetched and K=1, we are done.
63+
*
64+
* Multikey SkipScan flowchart:
65+
* start (K=1)
66+
* | +---------+
67+
* | | |
68+
* v v |
69+
* +=================================+ |
70+
* | search for NOT NULL after K | |
71+
* +=================================+ |
72+
* | | |
73+
* | found value | |
74+
* v | |
75+
* +==============================+ | |
76+
* | search for values after prev | | |
77+
* +==============================+ | |
78+
* | | |
79+
* | no value | |
80+
* v v |
81+
* +======================+ |
82+
* | K=1 | K>1 |
83+
* v v |
84+
* /===========\ +=========+ |
85+
* | DONE | | K = K-1 |---+
86+
* \===========/ +=========+
87+
*
4188
*/
4289

4390
#include <postgres.h>
@@ -58,6 +105,7 @@ typedef enum SkipScanStage
58105
SS_NOT_NULL,
59106
SS_VALUES,
60107
SS_NULLS_LAST,
108+
SS_PREV_KEY,
61109
SS_END,
62110
} SkipScanStage;
63111

@@ -93,6 +141,17 @@ typedef struct SkipScanState
93141
int num_skip_keys;
94142
SkipKeyData *skip_keys;
95143

144+
/* Skip key with ">" qual, coming after "=" skip quals for multikey SkipScan */
145+
int current_key;
146+
147+
/* For Multikey SkipScan we keep copies of "sk_func" for "=" and ">" for keys 1..N-1
148+
* to be swapped during execution.
149+
*/
150+
FmgrInfo *eq_funcs;
151+
/* Will be filled after IndexScan scankeys have been initialized */
152+
FmgrInfo *comp_funcs;
153+
StrategyNumber *comp_strategies;
154+
96155
SkipScanStage stage;
97156

98157
/* rescan required before getting next tuple */
@@ -157,31 +216,44 @@ skip_scan_begin(CustomScanState *node, EState *estate, int eflags)
157216
/* find position of our skip key
158217
* skip key is put as first key for the respective column in sort_indexquals
159218
*/
160-
ScanKey data = *state->scan_keys;
219+
ScanKey scankeydata = *state->scan_keys;
161220
int j = 0;
162221
for (int i = 0; i < *state->num_scan_keys; i++)
163222
{
164-
if (data[i].sk_flags == SK_ISNULL && data[i].sk_attno == state->skip_keys[j].sk_attno)
223+
if (scankeydata[i].sk_flags == SK_ISNULL &&
224+
scankeydata[i].sk_attno == state->skip_keys[j].sk_attno)
165225
{
166-
state->skip_keys[j++].skip_key = &data[i];
226+
SkipKeyData *skipkeydata = &state->skip_keys[j++];
227+
skipkeydata->skip_key = &scankeydata[i];
228+
/* Set up ">" sk_func swaps for skip keys 1..N-1 */
229+
if (j < state->num_skip_keys)
230+
{
231+
state->comp_strategies[j - 1] = scankeydata[i].sk_strategy;
232+
fmgr_info_copy(&state->comp_funcs[j - 1],
233+
&scankeydata[i].sk_func,
234+
CurrentMemoryContext);
235+
}
167236
if (j == state->num_skip_keys)
168237
break;
169238
}
170239
}
171240
if (j < state->num_skip_keys)
172241
elog(ERROR, "ScanKey for skip qual not found");
242+
243+
/* when we fetch the 1st tuple we update all skip keys from 0 to N */
244+
state->current_key = 0;
173245
}
174246

175247
static bool
176248
has_nulls_first(SkipScanState *state)
177249
{
178-
return state->skip_keys[0].nulls == SKIPKEY_NULLS_FIRST;
250+
return state->skip_keys[0].nulls == SK_NULLS_FIRST;
179251
}
180252

181253
static bool
182254
has_nulls_last(SkipScanState *state)
183255
{
184-
return state->skip_keys[0].nulls == SKIPKEY_NULLS_LAST;
256+
return state->skip_keys[0].nulls == SK_NULLS_LAST;
185257
}
186258

187259
static void
@@ -223,18 +295,48 @@ skip_scan_rescan_index(SkipScanState *state)
223295
static void
224296
skip_scan_switch_stage(SkipScanState *state, SkipScanStage new_stage)
225297
{
226-
Assert(new_stage > state->stage);
298+
Assert(new_stage > state->stage || state->num_skip_keys > 1);
227299

228300
switch (new_stage)
229301
{
230302
case SS_NOT_NULL:
231-
state->skip_keys[0].skip_key->sk_flags = SK_ISNULL | SK_SEARCHNOTNULL;
232-
state->skip_keys[0].skip_key->sk_argument = 0;
303+
for (int i = 0; i < state->num_skip_keys; i++)
304+
{
305+
state->skip_keys[i].skip_key->sk_flags = SK_ISNULL | SK_SEARCHNOTNULL;
306+
state->skip_keys[i].skip_key->sk_argument = 0;
307+
}
308+
state->needs_rescan = true;
309+
break;
310+
311+
case SS_PREV_KEY:
312+
/* Done searching with ">" for this key: set this key to NOT NULL i.e. any value,
313+
* set previous "=" key to search with ">".
314+
*/
315+
state->skip_keys[state->current_key].skip_key->sk_flags = SK_ISNULL | SK_SEARCHNOTNULL;
316+
state->current_key--;
317+
state->skip_keys[state->current_key].skip_key->sk_flags = 0;
318+
fmgr_info_copy(&state->skip_keys[state->current_key].skip_key->sk_func,
319+
&state->comp_funcs[state->current_key],
320+
CurrentMemoryContext);
321+
state->skip_keys[state->current_key].skip_key->sk_strategy =
322+
state->comp_strategies[state->current_key];
233323
state->needs_rescan = true;
234324
break;
235325

236326
case SS_VALUES:
237-
state->skip_keys[0].skip_key->sk_flags = 0;
327+
for (int i = 0; i < state->num_skip_keys; i++)
328+
{
329+
state->skip_keys[i].skip_key->sk_flags = 0;
330+
/* reset all ">" back to "=" from the current key to N-1 */
331+
if (i >= state->current_key && i < state->num_skip_keys - 1)
332+
{
333+
fmgr_info_copy(&state->skip_keys[i].skip_key->sk_func,
334+
&state->eq_funcs[i],
335+
CurrentMemoryContext);
336+
state->skip_keys[i].skip_key->sk_strategy = BTEqualStrategyNumber;
337+
}
338+
}
339+
state->current_key = state->num_skip_keys - 1;
238340
state->needs_rescan = true;
239341
break;
240342

@@ -256,31 +358,32 @@ skip_scan_switch_stage(SkipScanState *state, SkipScanStage new_stage)
256358
static void
257359
skip_scan_update_key(SkipScanState *state, TupleTableSlot *slot)
258360
{
259-
if (!state->skip_keys[0].prev_is_null && !state->skip_keys[0].distinct_by_val)
361+
for (int i = state->current_key; i < state->num_skip_keys; i++)
260362
{
261-
Assert(state->stage == SS_VALUES);
262-
pfree(DatumGetPointer(state->skip_keys[0].prev_datum));
263-
}
363+
if (!state->skip_keys[i].prev_is_null && !state->skip_keys[i].distinct_by_val)
364+
{
365+
Assert(state->stage == SS_VALUES || state->num_skip_keys > 1);
366+
pfree(DatumGetPointer(state->skip_keys[i].prev_datum));
367+
}
264368

265-
MemoryContext old_ctx = MemoryContextSwitchTo(state->ctx);
266-
state->skip_keys[0].prev_datum = slot_getattr(slot,
267-
state->skip_keys[0].distinct_col_attnum,
268-
&state->skip_keys[0].prev_is_null);
269-
if (state->skip_keys[0].prev_is_null)
270-
{
271-
state->skip_keys[0].skip_key->sk_flags = SK_ISNULL;
272-
state->skip_keys[0].skip_key->sk_argument = 0;
273-
}
274-
else
275-
{
276-
state->skip_keys[0].prev_datum = datumCopy(state->skip_keys[0].prev_datum,
277-
state->skip_keys[0].distinct_by_val,
278-
state->skip_keys[0].distinct_typ_len);
279-
state->skip_keys[0].skip_key->sk_argument = state->skip_keys[0].prev_datum;
369+
MemoryContext old_ctx = MemoryContextSwitchTo(state->ctx);
370+
state->skip_keys[i].prev_datum = slot_getattr(slot,
371+
state->skip_keys[i].distinct_col_attnum,
372+
&state->skip_keys[i].prev_is_null);
373+
if (state->skip_keys[i].prev_is_null)
374+
{
375+
state->skip_keys[i].skip_key->sk_flags = SK_ISNULL;
376+
state->skip_keys[i].skip_key->sk_argument = 0;
377+
}
378+
else
379+
{
380+
state->skip_keys[i].prev_datum = datumCopy(state->skip_keys[i].prev_datum,
381+
state->skip_keys[i].distinct_by_val,
382+
state->skip_keys[i].distinct_typ_len);
383+
state->skip_keys[i].skip_key->sk_argument = state->skip_keys[i].prev_datum;
384+
}
385+
MemoryContextSwitchTo(old_ctx);
280386
}
281-
282-
MemoryContextSwitchTo(old_ctx);
283-
284387
/* we need to do a rescan whenever we modify the ScanKey */
285388
state->needs_rescan = true;
286389
}
@@ -330,6 +433,7 @@ skip_scan_exec(CustomScanState *node)
330433
break;
331434

332435
case SS_NOT_NULL:
436+
case SS_PREV_KEY:
333437
case SS_VALUES:
334438
child_state = linitial(state->cscan_state.custom_ps);
335439
result = child_state->ps.ExecProcNode(&child_state->ps);
@@ -343,10 +447,10 @@ skip_scan_exec(CustomScanState *node)
343447
* also switch stage to look for values greater than
344448
* that in subsequent calls.
345449
*/
346-
if (state->stage == SS_NOT_NULL)
450+
skip_scan_update_key(state, result);
451+
if (state->stage == SS_NOT_NULL || state->stage == SS_PREV_KEY)
347452
skip_scan_switch_stage(state, SS_VALUES);
348453

349-
skip_scan_update_key(state, result);
350454
return result;
351455
}
352456
else
@@ -356,9 +460,15 @@ skip_scan_exec(CustomScanState *node)
356460
* the skip constraint we are either done
357461
* for NULLS FIRST ordering or need to check
358462
* for NULLs if we have NULLS LAST ordering
463+
*
464+
* Or we can move back one key for multikey SkipScan to relax the search,
465+
* i.e. make current key NOT NULL (any value) and change previous search from
466+
* "=" to ">"
359467
*/
360468
if (has_nulls_last(state))
361469
skip_scan_switch_stage(state, SS_NULLS_LAST);
470+
else if (state->current_key > 0)
471+
skip_scan_switch_stage(state, SS_PREV_KEY);
362472
else
363473
skip_scan_switch_stage(state, SS_END);
364474
}
@@ -401,8 +511,11 @@ skip_scan_rescan(CustomScanState *node)
401511
else
402512
skip_scan_switch_stage(state, SS_NOT_NULL);
403513

404-
state->skip_keys[0].prev_is_null = true;
405-
state->skip_keys[0].prev_datum = 0;
514+
for (int i = 0; i < state->num_skip_keys; i++)
515+
{
516+
state->skip_keys[i].prev_is_null = true;
517+
state->skip_keys[i].prev_datum = 0;
518+
}
406519

407520
state->needs_rescan = false;
408521
ScanState *child_state = linitial(state->cscan_state.custom_ps);
@@ -435,24 +548,54 @@ tsl_skip_scan_state_create(CustomScan *cscan)
435548
}
436549
state->stage = SS_BEGIN;
437550

438-
state->num_skip_keys = list_length(cscan->custom_private);
551+
/* set up N skipkeyinfos for N skip keys */
552+
List *skinfos = (List *) linitial(cscan->custom_private);
553+
state->num_skip_keys = list_length(skinfos);
439554
state->skip_keys = palloc(sizeof(SkipKeyData) * state->num_skip_keys);
440555

441556
ListCell *lc;
442557
int i = 0;
443-
foreach (lc, cscan->custom_private)
558+
foreach (lc, skinfos)
444559
{
445560
List *skipkeyinfo = (List *) lfirst(lc);
446561

447-
state->skip_keys[i].distinct_col_attnum = linitial_int(skipkeyinfo);
448-
state->skip_keys[i].distinct_by_val = lsecond_int(skipkeyinfo);
449-
state->skip_keys[i].distinct_typ_len = lthird_int(skipkeyinfo);
450-
state->skip_keys[i].nulls = lfourth_int(skipkeyinfo);
451-
state->skip_keys[i].sk_attno = list_nth_int(skipkeyinfo, 4);
562+
state->skip_keys[i].distinct_col_attnum = list_nth_int(skipkeyinfo, SK_DistinctColAttno);
563+
state->skip_keys[i].distinct_by_val = list_nth_int(skipkeyinfo, SK_DistinctByVal);
564+
state->skip_keys[i].distinct_typ_len = list_nth_int(skipkeyinfo, SK_DistinctTypeLen);
565+
state->skip_keys[i].nulls = list_nth_int(skipkeyinfo, SK_NullStatus);
566+
Assert(state->num_skip_keys == 1 || state->skip_keys[i].nulls == SK_NOT_NULL);
567+
state->skip_keys[i].sk_attno = list_nth_int(skipkeyinfo, SK_IndexKeyAttno);
452568

453569
state->skip_keys[i].prev_is_null = true;
454570
i++;
455571
}
572+
573+
state->eq_funcs = NULL;
574+
state->comp_funcs = NULL;
575+
state->comp_strategies = NULL;
576+
577+
/* set up N-1 equality ops for N skip keys if N>1 */
578+
if (state->num_skip_keys > 1)
579+
{
580+
/* Should have a list of N-1 equality op Oids for N skip keys if N>1 */
581+
Assert(list_length(cscan->custom_private) == 2);
582+
List *eqoids = (List *) lsecond(cscan->custom_private);
583+
584+
state->eq_funcs = palloc(sizeof(FmgrInfo) * (state->num_skip_keys - 1));
585+
state->comp_funcs = palloc(sizeof(FmgrInfo) * (state->num_skip_keys - 1));
586+
state->comp_strategies = palloc(sizeof(StrategyNumber) * (state->num_skip_keys - 1));
587+
588+
int i = 0;
589+
/* Set up "=" sk_funcs for keys 1..N-1 */
590+
foreach (lc, eqoids)
591+
{
592+
Oid eqoid = lfirst_oid(lc);
593+
Assert(OidIsValid(eqoid));
594+
fmgr_info(eqoid, &state->eq_funcs[i++]);
595+
}
596+
Assert(i == state->num_skip_keys - 1);
597+
}
598+
456599
state->cscan_state.methods = &skip_scan_state_methods;
457600
return (Node *) state;
458601
}

0 commit comments

Comments
 (0)