38
38
* | DONE |
39
39
* \===========/
40
40
*
41
+ *
42
+ *
43
+ * N-key SkipScan needs to do 2^N null check stages when using the above scheme,
44
+ * made even more complicated with having to change searches for previous keys.
45
+ *
46
+ * So we made a decision to support multikey SkipScan in NOT NULL mode only.
47
+ *
48
+ * For N-key SkipScan we search with these predicates when current key = K:
49
+ * (key_1 = prev_1),...,(key_K > prev_K),(key_K+1 IS NOT NULL)...(key_N IS NOT NULL)
50
+ *
51
+ * As all skip keys are NOT NULL, "IS NOT NULL" fetches the tuple with no previous value.
52
+ *
53
+ * We start the search with K=1 i.e. with these predicates:
54
+ * (key_1 IS NOT NULL),...,(key_N IS NOT NULL).
55
+ *
56
+ * When a tuple is fetched we set K=N as we can fill all previous values, search is now:
57
+ * (key_1 = prev_1),...,(key_N > prev_N)
58
+ *
59
+ * When no tuple is fetched and K>0 we can relax the search and move to previous key (K-1):
60
+ * (key_1 = prev_1),...,(key_K-1 > prev_K-1),(key_K IS NOT NULL)...(key_N IS NOT NULL)
61
+ *
62
+ * When no tuple is fetched and K=1, we are done.
63
+ *
64
+ * Multikey SkipScan flowchart:
65
+ * start (K=1)
66
+ * | +---------+
67
+ * | | |
68
+ * v v |
69
+ * +=================================+ |
70
+ * | search for NOT NULL after K | |
71
+ * +=================================+ |
72
+ * | | |
73
+ * | found value | |
74
+ * v | |
75
+ * +==============================+ | |
76
+ * | search for values after prev | | |
77
+ * +==============================+ | |
78
+ * | | |
79
+ * | no value | |
80
+ * v v |
81
+ * +======================+ |
82
+ * | K=1 | K>1 |
83
+ * v v |
84
+ * /===========\ +=========+ |
85
+ * | DONE | | K = K-1 |---+
86
+ * \===========/ +=========+
87
+ *
41
88
*/
42
89
43
90
#include <postgres.h>
@@ -58,6 +105,7 @@ typedef enum SkipScanStage
58
105
SS_NOT_NULL ,
59
106
SS_VALUES ,
60
107
SS_NULLS_LAST ,
108
+ SS_PREV_KEY ,
61
109
SS_END ,
62
110
} SkipScanStage ;
63
111
@@ -93,6 +141,17 @@ typedef struct SkipScanState
93
141
int num_skip_keys ;
94
142
SkipKeyData * skip_keys ;
95
143
144
+ /* Skip key with ">" qual, coming after "=" skip quals for multikey SkipScan */
145
+ int current_key ;
146
+
147
+ /* For Multikey SkipScan we keep copies of "sk_func" for "=" and ">" for keys 1..N-1
148
+ * to be swapped during execution.
149
+ */
150
+ FmgrInfo * eq_funcs ;
151
+ /* Will be filled after IndexScan scankeys have been initialized */
152
+ FmgrInfo * comp_funcs ;
153
+ StrategyNumber * comp_strategies ;
154
+
96
155
SkipScanStage stage ;
97
156
98
157
/* rescan required before getting next tuple */
@@ -157,31 +216,44 @@ skip_scan_begin(CustomScanState *node, EState *estate, int eflags)
157
216
/* find position of our skip key
158
217
* skip key is put as first key for the respective column in sort_indexquals
159
218
*/
160
- ScanKey data = * state -> scan_keys ;
219
+ ScanKey scankeydata = * state -> scan_keys ;
161
220
int j = 0 ;
162
221
for (int i = 0 ; i < * state -> num_scan_keys ; i ++ )
163
222
{
164
- if (data [i ].sk_flags == SK_ISNULL && data [i ].sk_attno == state -> skip_keys [j ].sk_attno )
223
+ if (scankeydata [i ].sk_flags == SK_ISNULL &&
224
+ scankeydata [i ].sk_attno == state -> skip_keys [j ].sk_attno )
165
225
{
166
- state -> skip_keys [j ++ ].skip_key = & data [i ];
226
+ SkipKeyData * skipkeydata = & state -> skip_keys [j ++ ];
227
+ skipkeydata -> skip_key = & scankeydata [i ];
228
+ /* Set up ">" sk_func swaps for skip keys 1..N-1 */
229
+ if (j < state -> num_skip_keys )
230
+ {
231
+ state -> comp_strategies [j - 1 ] = scankeydata [i ].sk_strategy ;
232
+ fmgr_info_copy (& state -> comp_funcs [j - 1 ],
233
+ & scankeydata [i ].sk_func ,
234
+ CurrentMemoryContext );
235
+ }
167
236
if (j == state -> num_skip_keys )
168
237
break ;
169
238
}
170
239
}
171
240
if (j < state -> num_skip_keys )
172
241
elog (ERROR , "ScanKey for skip qual not found" );
242
+
243
+ /* when we fetch the 1st tuple we update all skip keys from 0 to N */
244
+ state -> current_key = 0 ;
173
245
}
174
246
175
247
static bool
176
248
has_nulls_first (SkipScanState * state )
177
249
{
178
- return state -> skip_keys [0 ].nulls == SKIPKEY_NULLS_FIRST ;
250
+ return state -> skip_keys [0 ].nulls == SK_NULLS_FIRST ;
179
251
}
180
252
181
253
static bool
182
254
has_nulls_last (SkipScanState * state )
183
255
{
184
- return state -> skip_keys [0 ].nulls == SKIPKEY_NULLS_LAST ;
256
+ return state -> skip_keys [0 ].nulls == SK_NULLS_LAST ;
185
257
}
186
258
187
259
static void
@@ -223,18 +295,48 @@ skip_scan_rescan_index(SkipScanState *state)
223
295
static void
224
296
skip_scan_switch_stage (SkipScanState * state , SkipScanStage new_stage )
225
297
{
226
- Assert (new_stage > state -> stage );
298
+ Assert (new_stage > state -> stage || state -> num_skip_keys > 1 );
227
299
228
300
switch (new_stage )
229
301
{
230
302
case SS_NOT_NULL :
231
- state -> skip_keys [0 ].skip_key -> sk_flags = SK_ISNULL | SK_SEARCHNOTNULL ;
232
- state -> skip_keys [0 ].skip_key -> sk_argument = 0 ;
303
+ for (int i = 0 ; i < state -> num_skip_keys ; i ++ )
304
+ {
305
+ state -> skip_keys [i ].skip_key -> sk_flags = SK_ISNULL | SK_SEARCHNOTNULL ;
306
+ state -> skip_keys [i ].skip_key -> sk_argument = 0 ;
307
+ }
308
+ state -> needs_rescan = true;
309
+ break ;
310
+
311
+ case SS_PREV_KEY :
312
+ /* Done searching with ">" for this key: set this key to NOT NULL i.e. any value,
313
+ * set previous "=" key to search with ">".
314
+ */
315
+ state -> skip_keys [state -> current_key ].skip_key -> sk_flags = SK_ISNULL | SK_SEARCHNOTNULL ;
316
+ state -> current_key -- ;
317
+ state -> skip_keys [state -> current_key ].skip_key -> sk_flags = 0 ;
318
+ fmgr_info_copy (& state -> skip_keys [state -> current_key ].skip_key -> sk_func ,
319
+ & state -> comp_funcs [state -> current_key ],
320
+ CurrentMemoryContext );
321
+ state -> skip_keys [state -> current_key ].skip_key -> sk_strategy =
322
+ state -> comp_strategies [state -> current_key ];
233
323
state -> needs_rescan = true;
234
324
break ;
235
325
236
326
case SS_VALUES :
237
- state -> skip_keys [0 ].skip_key -> sk_flags = 0 ;
327
+ for (int i = 0 ; i < state -> num_skip_keys ; i ++ )
328
+ {
329
+ state -> skip_keys [i ].skip_key -> sk_flags = 0 ;
330
+ /* reset all ">" back to "=" from the current key to N-1 */
331
+ if (i >= state -> current_key && i < state -> num_skip_keys - 1 )
332
+ {
333
+ fmgr_info_copy (& state -> skip_keys [i ].skip_key -> sk_func ,
334
+ & state -> eq_funcs [i ],
335
+ CurrentMemoryContext );
336
+ state -> skip_keys [i ].skip_key -> sk_strategy = BTEqualStrategyNumber ;
337
+ }
338
+ }
339
+ state -> current_key = state -> num_skip_keys - 1 ;
238
340
state -> needs_rescan = true;
239
341
break ;
240
342
@@ -256,31 +358,32 @@ skip_scan_switch_stage(SkipScanState *state, SkipScanStage new_stage)
256
358
static void
257
359
skip_scan_update_key (SkipScanState * state , TupleTableSlot * slot )
258
360
{
259
- if (! state -> skip_keys [ 0 ]. prev_is_null && ! state -> skip_keys [ 0 ]. distinct_by_val )
361
+ for ( int i = state -> current_key ; i < state -> num_skip_keys ; i ++ )
260
362
{
261
- Assert (state -> stage == SS_VALUES );
262
- pfree (DatumGetPointer (state -> skip_keys [0 ].prev_datum ));
263
- }
363
+ if (!state -> skip_keys [i ].prev_is_null && !state -> skip_keys [i ].distinct_by_val )
364
+ {
365
+ Assert (state -> stage == SS_VALUES || state -> num_skip_keys > 1 );
366
+ pfree (DatumGetPointer (state -> skip_keys [i ].prev_datum ));
367
+ }
264
368
265
- MemoryContext old_ctx = MemoryContextSwitchTo (state -> ctx );
266
- state -> skip_keys [0 ].prev_datum = slot_getattr (slot ,
267
- state -> skip_keys [0 ].distinct_col_attnum ,
268
- & state -> skip_keys [0 ].prev_is_null );
269
- if (state -> skip_keys [0 ].prev_is_null )
270
- {
271
- state -> skip_keys [0 ].skip_key -> sk_flags = SK_ISNULL ;
272
- state -> skip_keys [0 ].skip_key -> sk_argument = 0 ;
273
- }
274
- else
275
- {
276
- state -> skip_keys [0 ].prev_datum = datumCopy (state -> skip_keys [0 ].prev_datum ,
277
- state -> skip_keys [0 ].distinct_by_val ,
278
- state -> skip_keys [0 ].distinct_typ_len );
279
- state -> skip_keys [0 ].skip_key -> sk_argument = state -> skip_keys [0 ].prev_datum ;
369
+ MemoryContext old_ctx = MemoryContextSwitchTo (state -> ctx );
370
+ state -> skip_keys [i ].prev_datum = slot_getattr (slot ,
371
+ state -> skip_keys [i ].distinct_col_attnum ,
372
+ & state -> skip_keys [i ].prev_is_null );
373
+ if (state -> skip_keys [i ].prev_is_null )
374
+ {
375
+ state -> skip_keys [i ].skip_key -> sk_flags = SK_ISNULL ;
376
+ state -> skip_keys [i ].skip_key -> sk_argument = 0 ;
377
+ }
378
+ else
379
+ {
380
+ state -> skip_keys [i ].prev_datum = datumCopy (state -> skip_keys [i ].prev_datum ,
381
+ state -> skip_keys [i ].distinct_by_val ,
382
+ state -> skip_keys [i ].distinct_typ_len );
383
+ state -> skip_keys [i ].skip_key -> sk_argument = state -> skip_keys [i ].prev_datum ;
384
+ }
385
+ MemoryContextSwitchTo (old_ctx );
280
386
}
281
-
282
- MemoryContextSwitchTo (old_ctx );
283
-
284
387
/* we need to do a rescan whenever we modify the ScanKey */
285
388
state -> needs_rescan = true;
286
389
}
@@ -330,6 +433,7 @@ skip_scan_exec(CustomScanState *node)
330
433
break ;
331
434
332
435
case SS_NOT_NULL :
436
+ case SS_PREV_KEY :
333
437
case SS_VALUES :
334
438
child_state = linitial (state -> cscan_state .custom_ps );
335
439
result = child_state -> ps .ExecProcNode (& child_state -> ps );
@@ -343,10 +447,10 @@ skip_scan_exec(CustomScanState *node)
343
447
* also switch stage to look for values greater than
344
448
* that in subsequent calls.
345
449
*/
346
- if (state -> stage == SS_NOT_NULL )
450
+ skip_scan_update_key (state , result );
451
+ if (state -> stage == SS_NOT_NULL || state -> stage == SS_PREV_KEY )
347
452
skip_scan_switch_stage (state , SS_VALUES );
348
453
349
- skip_scan_update_key (state , result );
350
454
return result ;
351
455
}
352
456
else
@@ -356,9 +460,15 @@ skip_scan_exec(CustomScanState *node)
356
460
* the skip constraint we are either done
357
461
* for NULLS FIRST ordering or need to check
358
462
* for NULLs if we have NULLS LAST ordering
463
+ *
464
+ * Or we can move back one key for multikey SkipScan to relax the search,
465
+ * i.e. make current key NOT NULL (any value) and change previous search from
466
+ * "=" to ">"
359
467
*/
360
468
if (has_nulls_last (state ))
361
469
skip_scan_switch_stage (state , SS_NULLS_LAST );
470
+ else if (state -> current_key > 0 )
471
+ skip_scan_switch_stage (state , SS_PREV_KEY );
362
472
else
363
473
skip_scan_switch_stage (state , SS_END );
364
474
}
@@ -401,8 +511,11 @@ skip_scan_rescan(CustomScanState *node)
401
511
else
402
512
skip_scan_switch_stage (state , SS_NOT_NULL );
403
513
404
- state -> skip_keys [0 ].prev_is_null = true;
405
- state -> skip_keys [0 ].prev_datum = 0 ;
514
+ for (int i = 0 ; i < state -> num_skip_keys ; i ++ )
515
+ {
516
+ state -> skip_keys [i ].prev_is_null = true;
517
+ state -> skip_keys [i ].prev_datum = 0 ;
518
+ }
406
519
407
520
state -> needs_rescan = false;
408
521
ScanState * child_state = linitial (state -> cscan_state .custom_ps );
@@ -435,24 +548,54 @@ tsl_skip_scan_state_create(CustomScan *cscan)
435
548
}
436
549
state -> stage = SS_BEGIN ;
437
550
438
- state -> num_skip_keys = list_length (cscan -> custom_private );
551
+ /* set up N skipkeyinfos for N skip keys */
552
+ List * skinfos = (List * ) linitial (cscan -> custom_private );
553
+ state -> num_skip_keys = list_length (skinfos );
439
554
state -> skip_keys = palloc (sizeof (SkipKeyData ) * state -> num_skip_keys );
440
555
441
556
ListCell * lc ;
442
557
int i = 0 ;
443
- foreach (lc , cscan -> custom_private )
558
+ foreach (lc , skinfos )
444
559
{
445
560
List * skipkeyinfo = (List * ) lfirst (lc );
446
561
447
- state -> skip_keys [i ].distinct_col_attnum = linitial_int (skipkeyinfo );
448
- state -> skip_keys [i ].distinct_by_val = lsecond_int (skipkeyinfo );
449
- state -> skip_keys [i ].distinct_typ_len = lthird_int (skipkeyinfo );
450
- state -> skip_keys [i ].nulls = lfourth_int (skipkeyinfo );
451
- state -> skip_keys [i ].sk_attno = list_nth_int (skipkeyinfo , 4 );
562
+ state -> skip_keys [i ].distinct_col_attnum = list_nth_int (skipkeyinfo , SK_DistinctColAttno );
563
+ state -> skip_keys [i ].distinct_by_val = list_nth_int (skipkeyinfo , SK_DistinctByVal );
564
+ state -> skip_keys [i ].distinct_typ_len = list_nth_int (skipkeyinfo , SK_DistinctTypeLen );
565
+ state -> skip_keys [i ].nulls = list_nth_int (skipkeyinfo , SK_NullStatus );
566
+ Assert (state -> num_skip_keys == 1 || state -> skip_keys [i ].nulls == SK_NOT_NULL );
567
+ state -> skip_keys [i ].sk_attno = list_nth_int (skipkeyinfo , SK_IndexKeyAttno );
452
568
453
569
state -> skip_keys [i ].prev_is_null = true;
454
570
i ++ ;
455
571
}
572
+
573
+ state -> eq_funcs = NULL ;
574
+ state -> comp_funcs = NULL ;
575
+ state -> comp_strategies = NULL ;
576
+
577
+ /* set up N-1 equality ops for N skip keys if N>1 */
578
+ if (state -> num_skip_keys > 1 )
579
+ {
580
+ /* Should have a list of N-1 equality op Oids for N skip keys if N>1 */
581
+ Assert (list_length (cscan -> custom_private ) == 2 );
582
+ List * eqoids = (List * ) lsecond (cscan -> custom_private );
583
+
584
+ state -> eq_funcs = palloc (sizeof (FmgrInfo ) * (state -> num_skip_keys - 1 ));
585
+ state -> comp_funcs = palloc (sizeof (FmgrInfo ) * (state -> num_skip_keys - 1 ));
586
+ state -> comp_strategies = palloc (sizeof (StrategyNumber ) * (state -> num_skip_keys - 1 ));
587
+
588
+ int i = 0 ;
589
+ /* Set up "=" sk_funcs for keys 1..N-1 */
590
+ foreach (lc , eqoids )
591
+ {
592
+ Oid eqoid = lfirst_oid (lc );
593
+ Assert (OidIsValid (eqoid ));
594
+ fmgr_info (eqoid , & state -> eq_funcs [i ++ ]);
595
+ }
596
+ Assert (i == state -> num_skip_keys - 1 );
597
+ }
598
+
456
599
state -> cscan_state .methods = & skip_scan_state_methods ;
457
600
return (Node * ) state ;
458
601
}
0 commit comments