diff options
Diffstat (limited to 'src')
29 files changed, 2889 insertions, 361 deletions
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 55ec4c10352..219df1971da 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -489,7 +489,8 @@ index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys, if (parallel_aware && indexRelation->rd_indam->amestimateparallelscan != NULL) nbytes = add_size(nbytes, - indexRelation->rd_indam->amestimateparallelscan(nkeys, + indexRelation->rd_indam->amestimateparallelscan(indexRelation, + nkeys, norderbys)); return nbytes; diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c index 291cb8fc15d..4da5a3c1d16 100644 --- a/src/backend/access/nbtree/nbtcompare.c +++ b/src/backend/access/nbtree/nbtcompare.c @@ -58,6 +58,7 @@ #include <limits.h> #include "utils/fmgrprotos.h" +#include "utils/skipsupport.h" #include "utils/sortsupport.h" #ifdef STRESS_SORT_INT_MIN @@ -78,6 +79,51 @@ btboolcmp(PG_FUNCTION_ARGS) PG_RETURN_INT32((int32) a - (int32) b); } +static Datum +bool_decrement(Relation rel, Datum existing, bool *underflow) +{ + bool bexisting = DatumGetBool(existing); + + if (bexisting == false) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return BoolGetDatum(bexisting - 1); +} + +static Datum +bool_increment(Relation rel, Datum existing, bool *overflow) +{ + bool bexisting = DatumGetBool(existing); + + if (bexisting == true) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return BoolGetDatum(bexisting + 1); +} + +Datum +btboolskipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = bool_decrement; + sksup->increment = bool_increment; + sksup->low_elem = BoolGetDatum(false); + sksup->high_elem = BoolGetDatum(true); + + PG_RETURN_VOID(); +} + Datum btint2cmp(PG_FUNCTION_ARGS) { @@ -105,6 +151,51 @@ btint2sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +static Datum +int2_decrement(Relation rel, Datum existing, bool *underflow) +{ + int16 iexisting = DatumGetInt16(existing); + + if (iexisting == PG_INT16_MIN) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return Int16GetDatum(iexisting - 1); +} + +static Datum +int2_increment(Relation rel, Datum existing, bool *overflow) +{ + int16 iexisting = DatumGetInt16(existing); + + if (iexisting == PG_INT16_MAX) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return Int16GetDatum(iexisting + 1); +} + +Datum +btint2skipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = int2_decrement; + sksup->increment = int2_increment; + sksup->low_elem = Int16GetDatum(PG_INT16_MIN); + sksup->high_elem = Int16GetDatum(PG_INT16_MAX); + + PG_RETURN_VOID(); +} + Datum btint4cmp(PG_FUNCTION_ARGS) { @@ -128,6 +219,51 @@ btint4sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +static Datum +int4_decrement(Relation rel, Datum existing, bool *underflow) +{ + int32 iexisting = DatumGetInt32(existing); + + if (iexisting == PG_INT32_MIN) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return Int32GetDatum(iexisting - 1); +} + +static Datum +int4_increment(Relation rel, Datum existing, bool *overflow) +{ + int32 iexisting = DatumGetInt32(existing); + + if (iexisting == PG_INT32_MAX) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return Int32GetDatum(iexisting + 1); +} + +Datum +btint4skipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = int4_decrement; + sksup->increment = int4_increment; + sksup->low_elem = Int32GetDatum(PG_INT32_MIN); + sksup->high_elem = Int32GetDatum(PG_INT32_MAX); + + PG_RETURN_VOID(); +} + Datum btint8cmp(PG_FUNCTION_ARGS) { @@ -171,6 +307,51 @@ btint8sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +static Datum +int8_decrement(Relation rel, Datum existing, bool *underflow) +{ + int64 iexisting = DatumGetInt64(existing); + + if (iexisting == PG_INT64_MIN) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return Int64GetDatum(iexisting - 1); +} + +static Datum +int8_increment(Relation rel, Datum existing, bool *overflow) +{ + int64 iexisting = DatumGetInt64(existing); + + if (iexisting == PG_INT64_MAX) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return Int64GetDatum(iexisting + 1); +} + +Datum +btint8skipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = int8_decrement; + sksup->increment = int8_increment; + sksup->low_elem = Int64GetDatum(PG_INT64_MIN); + sksup->high_elem = Int64GetDatum(PG_INT64_MAX); + + PG_RETURN_VOID(); +} + Datum btint48cmp(PG_FUNCTION_ARGS) { @@ -292,6 +473,51 @@ btoidsortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +static Datum +oid_decrement(Relation rel, Datum existing, bool *underflow) +{ + Oid oexisting = DatumGetObjectId(existing); + + if (oexisting == InvalidOid) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return ObjectIdGetDatum(oexisting - 1); +} + +static Datum +oid_increment(Relation rel, Datum existing, bool *overflow) +{ + Oid oexisting = DatumGetObjectId(existing); + + if (oexisting == OID_MAX) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return ObjectIdGetDatum(oexisting + 1); +} + +Datum +btoidskipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = oid_decrement; + sksup->increment = oid_increment; + sksup->low_elem = ObjectIdGetDatum(InvalidOid); + sksup->high_elem = ObjectIdGetDatum(OID_MAX); + + PG_RETURN_VOID(); +} + Datum btoidvectorcmp(PG_FUNCTION_ARGS) { @@ -325,3 +551,50 @@ btcharcmp(PG_FUNCTION_ARGS) /* Be careful to compare chars as unsigned */ PG_RETURN_INT32((int32) ((uint8) a) - (int32) ((uint8) b)); } + +static Datum +char_decrement(Relation rel, Datum existing, bool *underflow) +{ + uint8 cexisting = UInt8GetDatum(existing); + + if (cexisting == 0) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return CharGetDatum((uint8) cexisting - 1); +} + +static Datum +char_increment(Relation rel, Datum existing, bool *overflow) +{ + uint8 cexisting = UInt8GetDatum(existing); + + if (cexisting == UCHAR_MAX) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return CharGetDatum((uint8) cexisting + 1); +} + +Datum +btcharskipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = char_decrement; + sksup->increment = char_increment; + + /* btcharcmp compares chars as unsigned */ + sksup->low_elem = UInt8GetDatum(0); + sksup->high_elem = UInt8GetDatum(UCHAR_MAX); + + PG_RETURN_VOID(); +} diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c index 38a87af1cc8..791aa4ece40 100644 --- a/src/backend/access/nbtree/nbtpreprocesskeys.c +++ b/src/backend/access/nbtree/nbtpreprocesskeys.c @@ -45,8 +45,15 @@ static bool _bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey, FmgrInfo *orderproc, BTArrayKeyInfo *array, bool *qual_ok); +static bool _bt_saoparray_shrink(IndexScanDesc scan, ScanKey arraysk, + ScanKey skey, FmgrInfo *orderproc, + BTArrayKeyInfo *array, bool *qual_ok); +static bool _bt_skiparray_shrink(IndexScanDesc scan, ScanKey skey, + BTArrayKeyInfo *array, bool *qual_ok); static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys); static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap); +static int _bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out, + int *numSkipArrayKeys_out); static Datum _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey, Oid elemtype, StrategyNumber strat, Datum *elems, int nelems); @@ -89,6 +96,8 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg); * within each attribute may be done as a byproduct of the processing here. * That process must leave array scan keys (within an attribute) in the same * order as corresponding entries from the scan's BTArrayKeyInfo array info. + * We might also construct skip array scan keys that weren't present in the + * original input keys; these are also output in standard attribute order. * * The output keys are marked with flags SK_BT_REQFWD and/or SK_BT_REQBKWD * if they must be satisfied in order to continue the scan forward or backward @@ -101,10 +110,16 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg); * attributes with "=" keys are marked both SK_BT_REQFWD and SK_BT_REQBKWD. * For the first attribute without an "=" key, any "<" and "<=" keys are * marked SK_BT_REQFWD while any ">" and ">=" keys are marked SK_BT_REQBKWD. - * This can be seen to be correct by considering the above example. Note - * in particular that if there are no keys for a given attribute, the keys for - * subsequent attributes can never be required; for instance "WHERE y = 4" - * requires a full-index scan. + * This can be seen to be correct by considering the above example. + * + * If we never generated skip array scan keys, it would be possible for "gaps" + * to appear that make it unsafe to mark any subsequent input scan keys + * (copied from scan->keyData[]) as required to continue the scan. Prior to + * Postgres 18, a qual like "WHERE y = 4" always resulted in a full scan. + * This qual now becomes "WHERE x = ANY('{every possible x value}') and y = 4" + * on output. In other words, preprocessing now adds a skip array on "x". + * This has the potential to be much more efficient than a full index scan + * (though it behaves like a full scan when there's many distinct "x" values). * * If possible, redundant keys are eliminated: we keep only the tightest * >/>= bound and the tightest </<= bound, and if there's an = key then @@ -137,11 +152,20 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg); * Again, missing cross-type operators might cause us to fail to prove the * quals contradictory when they really are, but the scan will work correctly. * - * Row comparison keys are currently also treated without any smarts: - * we just transfer them into the preprocessed array without any + * Skip array = keys will even be generated in the presence of "contradictory" + * inequality quals when it'll enable marking later input quals as required. + * We'll merge any such inequalities into the generated skip array by setting + * its array.low_compare or array.high_compare key field. The resulting skip + * array will generate its array elements from a range that's constrained by + * any merged input inequalities (which won't get output in so->keyData[]). + * + * Row comparison keys currently have a couple of notable limitations. + * Right now we just transfer them into the preprocessed array without any * editorialization. We can treat them the same as an ordinary inequality * comparison on the row's first index column, for the purposes of the logic - * about required keys. + * about required keys. Also, we are unable to merge a row comparison key + * into a skip array (only ordinary inequalities are merged). A key that + * comes after a Row comparison key is therefore never marked as required. * * Note: the reason we have to copy the preprocessed scan keys into private * storage is that we are modifying the array based on comparisons of the @@ -200,6 +224,14 @@ _bt_preprocess_keys(IndexScanDesc scan) /* Also maintain keyDataMap for remapping so->orderProcs[] later */ keyDataMap = MemoryContextAlloc(so->arrayContext, numberOfKeys * sizeof(int)); + + /* + * Also enlarge output array when it might otherwise not have room for + * a skip array's scan key + */ + if (numberOfKeys > scan->numberOfKeys) + so->keyData = repalloc(so->keyData, + numberOfKeys * sizeof(ScanKeyData)); } else inkeys = scan->keyData; @@ -229,6 +261,7 @@ _bt_preprocess_keys(IndexScanDesc scan) Assert(so->keyData[0].sk_flags & SK_SEARCHARRAY); Assert(so->keyData[0].sk_strategy != BTEqualStrategyNumber || (so->arrayKeys[0].scan_key == 0 && + !(so->keyData[0].sk_flags & SK_BT_SKIP) && OidIsValid(so->orderProcs[0].fn_oid))); } @@ -288,7 +321,8 @@ _bt_preprocess_keys(IndexScanDesc scan) * redundant. Note that this is no less true if the = key is * SEARCHARRAY; the only real difference is that the inequality * key _becomes_ redundant by making _bt_compare_scankey_args - * eliminate the subset of elements that won't need to be matched. + * eliminate the subset of elements that won't need to be matched + * (with SAOP arrays and skip arrays alike). * * If we have a case like "key = 1 AND key > 2", we set qual_ok to * false and abandon further processing. We'll do the same thing @@ -345,7 +379,6 @@ _bt_preprocess_keys(IndexScanDesc scan) return; } /* else discard the redundant non-equality key */ - Assert(!array || array->num_elems > 0); xform[j].inkey = NULL; xform[j].inkeyi = -1; } @@ -393,6 +426,11 @@ _bt_preprocess_keys(IndexScanDesc scan) * Emit the cleaned-up keys into the so->keyData[] array, and then * mark them if they are required. They are required (possibly * only in one direction) if all attrs before this one had "=". + * + * In practice we'll rarely output non-required scan keys here; + * typically, _bt_preprocess_array_keys has already added "=" keys + * sufficient to form an unbroken series of "=" constraints on all + * attrs prior to the attr from the final scan->keyData[] key. */ for (j = BTMaxStrategyNumber; --j >= 0;) { @@ -481,6 +519,7 @@ _bt_preprocess_keys(IndexScanDesc scan) Assert(array->scan_key == i); Assert(OidIsValid(orderproc->fn_oid)); + Assert(!(inkey->sk_flags & SK_BT_SKIP)); } else if (xform[j].inkey->sk_flags & SK_SEARCHARRAY) { @@ -489,6 +528,7 @@ _bt_preprocess_keys(IndexScanDesc scan) Assert(array->scan_key == xform[j].inkeyi); Assert(OidIsValid(orderproc->fn_oid)); + Assert(!(xform[j].inkey->sk_flags & SK_BT_SKIP)); } /* @@ -508,8 +548,6 @@ _bt_preprocess_keys(IndexScanDesc scan) /* Have all we need to determine redundancy */ if (test_result) { - Assert(!array || array->num_elems > 0); - /* * New key is more restrictive, and so replaces old key... */ @@ -803,6 +841,9 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, cmp_op; StrategyNumber strat; + Assert(!((leftarg->sk_flags | rightarg->sk_flags) & + (SK_ROW_HEADER | SK_ROW_MEMBER))); + /* * First, deal with cases where one or both args are NULL. This should * only happen when the scankeys represent IS NULL/NOT NULL conditions. @@ -812,6 +853,22 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, bool leftnull, rightnull; + /* Handle skip array comparison with IS NOT NULL scan key */ + if ((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP) + { + /* Shouldn't generate skip array in presence of IS NULL key */ + Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_SEARCHNULL)); + Assert((leftarg->sk_flags | rightarg->sk_flags) & SK_SEARCHNOTNULL); + + /* Skip array will have no NULL element/IS NULL scan key */ + Assert(array->num_elems == -1); + array->null_elem = false; + + /* IS NOT NULL key (could be leftarg or rightarg) now redundant */ + *result = true; + return true; + } + if (leftarg->sk_flags & SK_ISNULL) { Assert(leftarg->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL)); @@ -885,6 +942,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, { /* Can't make the comparison */ *result = false; /* suppress compiler warnings */ + Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP)); return false; } @@ -978,24 +1036,56 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, * Compare an array scan key to a scalar scan key, eliminating contradictory * array elements such that the scalar scan key becomes redundant. * + * If the opfamily is incomplete we may not be able to determine which + * elements are contradictory. When we return true we'll have validly set + * *qual_ok, guaranteeing that at least the scalar scan key can be considered + * redundant. We return false if the comparison could not be made (caller + * must keep both scan keys when this happens). + * + * Note: it's up to caller to deal with IS [NOT] NULL scan keys, as well as + * row comparison scan keys. We only deal with scalar scan keys. + */ +static bool +_bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey, + FmgrInfo *orderproc, BTArrayKeyInfo *array, + bool *qual_ok) +{ + Assert(arraysk->sk_attno == skey->sk_attno); + Assert(!(arraysk->sk_flags & (SK_ISNULL | SK_ROW_HEADER | SK_ROW_MEMBER))); + Assert((arraysk->sk_flags & SK_SEARCHARRAY) && + arraysk->sk_strategy == BTEqualStrategyNumber); + /* don't expect to have to deal with NULLs/row comparison scan keys */ + Assert(!(skey->sk_flags & (SK_ISNULL | SK_ROW_HEADER | SK_ROW_MEMBER))); + Assert(!(skey->sk_flags & SK_SEARCHARRAY) || + skey->sk_strategy != BTEqualStrategyNumber); + + /* + * Just call the appropriate helper function based on whether it's a SAOP + * array or a skip array. Both helpers will set *qual_ok in passing. + */ + if (array->num_elems != -1) + return _bt_saoparray_shrink(scan, arraysk, skey, orderproc, array, + qual_ok); + else + return _bt_skiparray_shrink(scan, skey, array, qual_ok); +} + +/* + * Preprocessing of SAOP array scan key, used to determine which array + * elements are eliminated as contradictory by a non-array scalar key. + * + * _bt_compare_array_scankey_args helper function. + * * Array elements can be eliminated as contradictory when excluded by some * other operator on the same attribute. For example, with an index scan qual * "WHERE a IN (1, 2, 3) AND a < 2", all array elements except the value "1" * are eliminated, and the < scan key is eliminated as redundant. Cases where * every array element is eliminated by a redundant scalar scan key have an * unsatisfiable qual, which we handle by setting *qual_ok=false for caller. - * - * If the opfamily doesn't supply a complete set of cross-type ORDER procs we - * may not be able to determine which elements are contradictory. If we have - * the required ORDER proc then we return true (and validly set *qual_ok), - * guaranteeing that at least the scalar scan key can be considered redundant. - * We return false if the comparison could not be made (caller must keep both - * scan keys when this happens). */ static bool -_bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey, - FmgrInfo *orderproc, BTArrayKeyInfo *array, - bool *qual_ok) +_bt_saoparray_shrink(IndexScanDesc scan, ScanKey arraysk, ScanKey skey, + FmgrInfo *orderproc, BTArrayKeyInfo *array, bool *qual_ok) { Relation rel = scan->indexRelation; Oid opcintype = rel->rd_opcintype[arraysk->sk_attno - 1]; @@ -1006,14 +1096,8 @@ _bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey FmgrInfo crosstypeproc; FmgrInfo *orderprocp = orderproc; - Assert(arraysk->sk_attno == skey->sk_attno); Assert(array->num_elems > 0); - Assert(!(arraysk->sk_flags & (SK_ISNULL | SK_ROW_HEADER | SK_ROW_MEMBER))); - Assert((arraysk->sk_flags & SK_SEARCHARRAY) && - arraysk->sk_strategy == BTEqualStrategyNumber); - Assert(!(skey->sk_flags & (SK_ISNULL | SK_ROW_HEADER | SK_ROW_MEMBER))); - Assert(!(skey->sk_flags & SK_SEARCHARRAY) || - skey->sk_strategy != BTEqualStrategyNumber); + Assert(!(arraysk->sk_flags & SK_BT_SKIP)); /* * _bt_binsrch_array_skey searches an array for the entry best matching a @@ -1113,6 +1197,106 @@ _bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey } /* + * Preprocessing of skip array scan key, used to determine redundancy against + * a non-array scalar scan key (must be an inequality). + * + * _bt_compare_array_scankey_args helper function. + * + * Skip arrays work by procedurally generating their elements as needed, so we + * just store the inequality as the skip array's low_compare or high_compare + * (except when there's already a more restrictive low_compare/high_compare). + * The array's final elements are the range of values that still satisfy the + * array's final low_compare and high_compare. + */ +static bool +_bt_skiparray_shrink(IndexScanDesc scan, ScanKey skey, BTArrayKeyInfo *array, + bool *qual_ok) +{ + bool test_result; + + Assert(array->num_elems == -1); + + /* + * Array's index attribute will be constrained by a strict operator/key. + * Array must not "contain a NULL element" (i.e. the scan must not apply + * "IS NULL" qual when it reaches the end of the index that stores NULLs). + */ + array->null_elem = false; + *qual_ok = true; + + /* + * Consider if we should treat caller's scalar scan key as the skip + * array's high_compare or low_compare. + * + * In general the current array element must either be a copy of a value + * taken from an index tuple, or a derivative value generated by opclass's + * skip support function. That way the scan can always safely assume that + * it's okay to use the only-input-opclass-type proc from so->orderProcs[] + * (they can be cross-type with SAOP arrays, but never with skip arrays). + * + * This approach is enabled by MINVAL/MAXVAL sentinel key markings, which + * can be thought of as representing either the lowest or highest matching + * array element (excluding the NULL element, where applicable, though as + * just discussed it isn't applicable to this range skip array anyway). + * Array keys marked MINVAL/MAXVAL never have a valid datum in their + * sk_argument field. The scan directly applies the array's low_compare + * key when it encounters MINVAL in the array key proper (just as it + * applies high_compare when it sees MAXVAL set in the array key proper). + * The scan must never use the array's so->orderProcs[] proc against + * low_compare's/high_compare's sk_argument, either (so->orderProcs[] is + * only intended to be used with rhs datums from the array proper/index). + */ + switch (skey->sk_strategy) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + if (array->high_compare) + { + /* replace existing high_compare with caller's key? */ + if (!_bt_compare_scankey_args(scan, array->high_compare, skey, + array->high_compare, NULL, NULL, + &test_result)) + return false; /* can't determine more restrictive key */ + + if (!test_result) + return true; /* no, just discard caller's key */ + + /* yes, replace existing high_compare with caller's key */ + } + + /* caller's key becomes skip array's high_compare */ + array->high_compare = skey; + break; + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: + if (array->low_compare) + { + /* replace existing low_compare with caller's key? */ + if (!_bt_compare_scankey_args(scan, array->low_compare, skey, + array->low_compare, NULL, NULL, + &test_result)) + return false; /* can't determine more restrictive key */ + + if (!test_result) + return true; /* no, just discard caller's key */ + + /* yes, replace existing low_compare with caller's key */ + } + + /* caller's key becomes skip array's low_compare */ + array->low_compare = skey; + break; + case BTEqualStrategyNumber: + default: + elog(ERROR, "unrecognized StrategyNumber: %d", + (int) skey->sk_strategy); + break; + } + + return true; +} + +/* * _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys * * If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and @@ -1137,6 +1321,12 @@ _bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey * one equality strategy array scan key per index attribute. We'll always be * able to set things up that way when complete opfamilies are used. * + * We're also responsible for generating skip arrays (and their associated + * scan keys) here. This enables skip scan. We do this for index attributes + * that initially lacked an equality condition within scan->keyData[], iff + * doing so allows a later scan key (that was passed to us in scan->keyData[]) + * to be marked required by our _bt_preprocess_keys caller. + * * We set the scan key references from the scan's BTArrayKeyInfo info array to * offsets into the temp modified input array returned to caller. Scans that * have array keys should call _bt_preprocess_array_keys_final when standard @@ -1144,50 +1334,46 @@ _bt_compare_array_scankey_args(IndexScanDesc scan, ScanKey arraysk, ScanKey skey * references into references to the scan's so->keyData[] output scan keys. * * Note: the reason we need to return a temp scan key array, rather than just - * scribbling on scan->keyData, is that callers are permitted to call btrescan - * without supplying a new set of scankey data. + * modifying scan->keyData[], is that callers are permitted to call btrescan + * without supplying a new set of scankey data. Certain other preprocessing + * routines (e.g., _bt_fix_scankey_strategy) _can_ modify scan->keyData[], but + * we can't make that work here because our modifications are non-idempotent. */ static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) { BTScanOpaque so = (BTScanOpaque) scan->opaque; Relation rel = scan->indexRelation; - int numberOfKeys = scan->numberOfKeys; int16 *indoption = rel->rd_indoption; + Oid skip_eq_ops[INDEX_MAX_KEYS]; int numArrayKeys, - output_ikey = 0; + numSkipArrayKeys, + numArrayKeyData; + AttrNumber attno_skip = 1; int origarrayatt = InvalidAttrNumber, origarraykey = -1; Oid origelemtype = InvalidOid; - ScanKey cur; MemoryContext oldContext; ScanKey arrayKeyData; /* modified copy of scan->keyData */ - Assert(numberOfKeys); - - /* Quick check to see if there are any array keys */ - numArrayKeys = 0; - for (int i = 0; i < numberOfKeys; i++) - { - cur = &scan->keyData[i]; - if (cur->sk_flags & SK_SEARCHARRAY) - { - numArrayKeys++; - Assert(!(cur->sk_flags & (SK_ROW_HEADER | SK_SEARCHNULL | SK_SEARCHNOTNULL))); - /* If any arrays are null as a whole, we can quit right now. */ - if (cur->sk_flags & SK_ISNULL) - { - so->qual_ok = false; - return NULL; - } - } - } + /* + * Check the number of input array keys within scan->keyData[] input keys + * (also checks if we should add extra skip arrays based on input keys) + */ + numArrayKeys = _bt_num_array_keys(scan, skip_eq_ops, &numSkipArrayKeys); /* Quit if nothing to do. */ if (numArrayKeys == 0) return NULL; /* + * Estimated final size of arrayKeyData[] array we'll return to our caller + * is the size of the original scan->keyData[] input array, plus space for + * any additional skip array scan keys we'll need to generate below + */ + numArrayKeyData = scan->numberOfKeys + numSkipArrayKeys; + + /* * Make a scan-lifespan context to hold array-associated data, or reset it * if we already have one from a previous rescan cycle. */ @@ -1201,18 +1387,20 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) oldContext = MemoryContextSwitchTo(so->arrayContext); /* Create output scan keys in the workspace context */ - arrayKeyData = (ScanKey) palloc(numberOfKeys * sizeof(ScanKeyData)); + arrayKeyData = (ScanKey) palloc(numArrayKeyData * sizeof(ScanKeyData)); /* Allocate space for per-array data in the workspace context */ so->arrayKeys = (BTArrayKeyInfo *) palloc(numArrayKeys * sizeof(BTArrayKeyInfo)); /* Allocate space for ORDER procs used to help _bt_checkkeys */ - so->orderProcs = (FmgrInfo *) palloc(numberOfKeys * sizeof(FmgrInfo)); + so->orderProcs = (FmgrInfo *) palloc(numArrayKeyData * sizeof(FmgrInfo)); - /* Now process each array key */ numArrayKeys = 0; - for (int input_ikey = 0; input_ikey < numberOfKeys; input_ikey++) + numArrayKeyData = 0; + for (int input_ikey = 0; input_ikey < scan->numberOfKeys; input_ikey++) { + ScanKey inkey = scan->keyData + input_ikey, + cur; FmgrInfo sortproc; FmgrInfo *sortprocp = &sortproc; Oid elemtype; @@ -1225,22 +1413,114 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) Datum *elem_values; bool *elem_nulls; int num_nonnulls; - int j; + + /* set up next output scan key */ + cur = &arrayKeyData[numArrayKeyData]; + + /* Backfill skip arrays for attrs < or <= input key's attr? */ + while (numSkipArrayKeys && attno_skip <= inkey->sk_attno) + { + Oid opfamily = rel->rd_opfamily[attno_skip - 1]; + Oid opcintype = rel->rd_opcintype[attno_skip - 1]; + Oid collation = rel->rd_indcollation[attno_skip - 1]; + Oid eq_op = skip_eq_ops[attno_skip - 1]; + CompactAttribute *attr; + RegProcedure cmp_proc; + + if (!OidIsValid(eq_op)) + { + /* + * Attribute already has an = input key, so don't output a + * skip array for attno_skip. Just copy attribute's = input + * key into arrayKeyData[] once outside this inner loop. + * + * Note: When we get here there must be a later attribute that + * lacks an equality input key, and still needs a skip array + * (if there wasn't then numSkipArrayKeys would be 0 by now). + */ + Assert(attno_skip == inkey->sk_attno); + /* inkey can't be last input key to be marked required: */ + Assert(input_ikey < scan->numberOfKeys - 1); +#if 0 + /* Could be a redundant input scan key, so can't do this: */ + Assert(inkey->sk_strategy == BTEqualStrategyNumber || + (inkey->sk_flags & SK_SEARCHNULL)); +#endif + + attno_skip++; + break; + } + + cmp_proc = get_opcode(eq_op); + if (!RegProcedureIsValid(cmp_proc)) + elog(ERROR, "missing oprcode for skipping equals operator %u", eq_op); + + ScanKeyEntryInitialize(cur, + SK_SEARCHARRAY | SK_BT_SKIP, /* flags */ + attno_skip, /* skipped att number */ + BTEqualStrategyNumber, /* equality strategy */ + InvalidOid, /* opclass input subtype */ + collation, /* index column's collation */ + cmp_proc, /* equality operator's proc */ + (Datum) 0); /* constant */ + + /* Initialize generic BTArrayKeyInfo fields */ + so->arrayKeys[numArrayKeys].scan_key = numArrayKeyData; + so->arrayKeys[numArrayKeys].num_elems = -1; + + /* Initialize skip array specific BTArrayKeyInfo fields */ + attr = TupleDescCompactAttr(RelationGetDescr(rel), attno_skip - 1); + reverse = (indoption[attno_skip - 1] & INDOPTION_DESC) != 0; + so->arrayKeys[numArrayKeys].attlen = attr->attlen; + so->arrayKeys[numArrayKeys].attbyval = attr->attbyval; + so->arrayKeys[numArrayKeys].null_elem = true; /* for now */ + so->arrayKeys[numArrayKeys].sksup = + PrepareSkipSupportFromOpclass(opfamily, opcintype, reverse); + so->arrayKeys[numArrayKeys].low_compare = NULL; /* for now */ + so->arrayKeys[numArrayKeys].high_compare = NULL; /* for now */ + + /* + * We'll need a 3-way ORDER proc. Set that up now. + */ + _bt_setup_array_cmp(scan, cur, opcintype, + &so->orderProcs[numArrayKeyData], NULL); + + numArrayKeys++; + numArrayKeyData++; /* keep this scan key/array */ + + /* set up next output scan key */ + cur = &arrayKeyData[numArrayKeyData]; + + /* remember having output this skip array and scan key */ + numSkipArrayKeys--; + attno_skip++; + } /* * Provisionally copy scan key into arrayKeyData[] array we'll return * to _bt_preprocess_keys caller */ - cur = &arrayKeyData[output_ikey]; - *cur = scan->keyData[input_ikey]; + *cur = *inkey; if (!(cur->sk_flags & SK_SEARCHARRAY)) { - output_ikey++; /* keep this non-array scan key */ + numArrayKeyData++; /* keep this non-array scan key */ continue; } /* + * Process SAOP array scan key + */ + Assert(!(cur->sk_flags & (SK_ROW_HEADER | SK_SEARCHNULL | SK_SEARCHNOTNULL))); + + /* If array is null as a whole, the scan qual is unsatisfiable */ + if (cur->sk_flags & SK_ISNULL) + { + so->qual_ok = false; + break; + } + + /* * Deconstruct the array into elements */ arrayval = DatumGetArrayTypeP(cur->sk_argument); @@ -1257,7 +1537,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) * all btree operators are strict. */ num_nonnulls = 0; - for (j = 0; j < num_elems; j++) + for (int j = 0; j < num_elems; j++) { if (!elem_nulls[j]) elem_values[num_nonnulls++] = elem_values[j]; @@ -1295,7 +1575,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) _bt_find_extreme_element(scan, cur, elemtype, BTGreaterStrategyNumber, elem_values, num_nonnulls); - output_ikey++; /* keep this transformed scan key */ + numArrayKeyData++; /* keep this transformed scan key */ continue; case BTEqualStrategyNumber: /* proceed with rest of loop */ @@ -1306,7 +1586,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) _bt_find_extreme_element(scan, cur, elemtype, BTLessStrategyNumber, elem_values, num_nonnulls); - output_ikey++; /* keep this transformed scan key */ + numArrayKeyData++; /* keep this transformed scan key */ continue; default: elog(ERROR, "unrecognized StrategyNumber: %d", @@ -1323,7 +1603,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) * sortproc just points to the same proc used during binary searches. */ _bt_setup_array_cmp(scan, cur, elemtype, - &so->orderProcs[output_ikey], &sortprocp); + &so->orderProcs[numArrayKeyData], &sortprocp); /* * Sort the non-null elements and eliminate any duplicates. We must @@ -1392,23 +1672,24 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys) origelemtype = elemtype; } - /* - * And set up the BTArrayKeyInfo data. - * - * Note: _bt_preprocess_array_keys_final will fix-up each array's - * scan_key field later on, after so->keyData[] has been finalized. - */ - so->arrayKeys[numArrayKeys].scan_key = output_ikey; + /* Initialize generic BTArrayKeyInfo fields */ + so->arrayKeys[numArrayKeys].scan_key = numArrayKeyData; so->arrayKeys[numArrayKeys].num_elems = num_elems; + + /* Initialize SAOP array specific BTArrayKeyInfo fields */ so->arrayKeys[numArrayKeys].elem_values = elem_values; + so->arrayKeys[numArrayKeys].cur_elem = -1; /* i.e. invalid */ + numArrayKeys++; - output_ikey++; /* keep this scan key/array */ + numArrayKeyData++; /* keep this scan key/array */ } + Assert(numSkipArrayKeys == 0); + /* Set final number of equality-type array keys */ so->numArrayKeys = numArrayKeys; - /* Set number of scan keys remaining in arrayKeyData[] */ - *new_numberOfKeys = output_ikey; + /* Set number of scan keys in arrayKeyData[] */ + *new_numberOfKeys = numArrayKeyData; MemoryContextSwitchTo(oldContext); @@ -1514,7 +1795,14 @@ _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap) { BTArrayKeyInfo *array = &so->arrayKeys[arrayidx]; - Assert(array->num_elems > 0); + /* + * All skip arrays must be marked required, and final column can + * never have a skip array + */ + Assert(array->num_elems > 0 || array->num_elems == -1); + Assert(array->num_elems != -1 || outkey->sk_flags & SK_BT_REQFWD); + Assert(array->num_elems != -1 || + outkey->sk_attno < IndexRelationGetNumberOfKeyAttributes(rel)); if (array->scan_key == input_ikey) { @@ -1576,6 +1864,199 @@ _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap) } /* + * _bt_num_array_keys() -- determine # of BTArrayKeyInfo entries + * + * _bt_preprocess_array_keys helper function. Returns the estimated size of + * the scan's BTArrayKeyInfo array, which is guaranteed to be large enough to + * fit every so->arrayKeys[] entry. + * + * Also sets *numSkipArrayKeys_out to the number of of skip arrays caller must + * add to the scan keys it'll output. Caller must add this many skip arrays: + * one array for each of the most significant attributes that lack a = input + * key (IS NULL keys count as = input keys here). The specific attributes + * that need skip arrays are indicated by initializing skip_eq_ops_out[] arg + * 0-based attribute offset to a valid = op strategy Oid. We'll only ever set + * skip_eq_ops_out[] entries to InvalidOid for attributes that already have an + * equality key in scan->keyData[] input keys -- and only when there's some + * later "attribute gap" for us to "fill-in" with a skip array. + * + * We're optimistic about skipping working out: we always add exactly the skip + * arrays needed to maximize the number of input scan keys that can ultimately + * be marked as required to continue the scan (but no more). Given a + * multi-column index on (a, b, c, d), we add skip arrays as follows: + * + * Input keys Output keys (after all preprocessing) + * ---------- ------------------------------------- + * a = 1 a = 1 (no skip arrays) + * b = 42 skip a AND b = 42 + * a = 1 AND b = 42 a = 1 AND b = 42 (no skip arrays) + * a >= 1 AND b = 42 range skip a AND b = 42 + * a = 1 AND b > 42 a = 1 AND b > 42 (no skip arrays) + * a >= 1 AND a <= 3 AND b = 42 range skip a AND b = 42 + * a = 1 AND c <= 27 a = 1 AND skip b AND c <= 27 + * a = 1 AND d >= 1 a = 1 AND skip b AND skip c AND d >= 1 + * a = 1 AND b >= 42 AND d > 1 a = 1 AND range skip b AND skip c AND d > 1 + */ +static int +_bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out, + int *numSkipArrayKeys_out) +{ + Relation rel = scan->indexRelation; + AttrNumber attno_skip = 1, + attno_inkey = 1; + bool attno_has_equal = false, + attno_has_rowcompare = false; + int numSAOPArrayKeys, + numSkipArrayKeys, + prev_numSkipArrayKeys; + + Assert(scan->numberOfKeys); + + /* Initial pass over input scan keys counts the number of SAOP arrays */ + numSAOPArrayKeys = 0; + *numSkipArrayKeys_out = prev_numSkipArrayKeys = numSkipArrayKeys = 0; + for (int i = 0; i < scan->numberOfKeys; i++) + { + ScanKey inkey = scan->keyData + i; + + if (inkey->sk_flags & SK_SEARCHARRAY) + numSAOPArrayKeys++; + } + +#ifdef DEBUG_DISABLE_SKIP_SCAN + /* don't attempt to add skip arrays */ + return numSAOPArrayKeys; +#endif + + for (int i = 0;; i++) + { + ScanKey inkey = scan->keyData + i; + + /* + * Backfill skip arrays for any wholly omitted attributes prior to + * attno_inkey + */ + while (attno_skip < attno_inkey) + { + Oid opfamily = rel->rd_opfamily[attno_skip - 1]; + Oid opcintype = rel->rd_opcintype[attno_skip - 1]; + + /* Look up input opclass's equality operator (might fail) */ + skip_eq_ops_out[attno_skip - 1] = + get_opfamily_member(opfamily, opcintype, opcintype, + BTEqualStrategyNumber); + if (!OidIsValid(skip_eq_ops_out[attno_skip - 1])) + { + /* + * Cannot generate a skip array for this or later attributes + * (input opclass lacks an equality strategy operator) + */ + *numSkipArrayKeys_out = prev_numSkipArrayKeys; + return numSAOPArrayKeys + prev_numSkipArrayKeys; + } + + /* plan on adding a backfill skip array for this attribute */ + numSkipArrayKeys++; + attno_skip++; + } + + prev_numSkipArrayKeys = numSkipArrayKeys; + + /* + * Stop once past the final input scan key. We deliberately never add + * a skip array for the last input scan key's attribute -- even when + * there are only inequality keys on that attribute. + */ + if (i == scan->numberOfKeys) + break; + + /* + * Later preprocessing steps cannot merge a RowCompare into a skip + * array, so stop adding skip arrays once we see one. (Note that we + * can backfill skip arrays before a RowCompare, which will allow keys + * up to and including the RowCompare to be marked required.) + * + * Skip arrays work by maintaining a current array element value, + * which anchors lower-order keys via an implied equality constraint. + * This is incompatible with the current nbtree row comparison design, + * which compares all columns together, as an indivisible group. + * Alternative designs that can be used alongside skip arrays are + * possible, but it's not clear that they're really worth pursuing. + * + * A RowCompare qual "(a, b, c) > (10, 'foo', 42)" is equivalent to + * "(a=10 AND b='foo' AND c>42) OR (a=10 AND b>'foo') OR (a>10)". + * Decomposing this RowCompare into these 3 disjuncts allows each + * disjunct to be executed as a separate "single value" index scan. + * That'll give all 3 scans the ability to add skip arrays in the + * usual way (when there are any scalar keys after the RowCompare). + * Under this scheme, a qual "(a, b, c) > (10, 'foo', 42) AND d = 99" + * performs 3 separate scans, each of which can mark keys up to and + * including its "d = 99" key as required to continue the scan. + */ + if (attno_has_rowcompare) + break; + + /* + * Now consider next attno_inkey (or keep going if this is an + * additional scan key against the same attribute) + */ + if (attno_inkey < inkey->sk_attno) + { + /* + * Now add skip array for previous scan key's attribute, though + * only if the attribute has no equality strategy scan keys + */ + if (attno_has_equal) + { + /* Attributes with an = key must have InvalidOid eq_op set */ + skip_eq_ops_out[attno_skip - 1] = InvalidOid; + } + else + { + Oid opfamily = rel->rd_opfamily[attno_skip - 1]; + Oid opcintype = rel->rd_opcintype[attno_skip - 1]; + + /* Look up input opclass's equality operator (might fail) */ + skip_eq_ops_out[attno_skip - 1] = + get_opfamily_member(opfamily, opcintype, opcintype, + BTEqualStrategyNumber); + + if (!OidIsValid(skip_eq_ops_out[attno_skip - 1])) + { + /* + * Input opclass lacks an equality strategy operator, so + * don't generate a skip array that definitely won't work + */ + break; + } + + /* plan on adding a backfill skip array for this attribute */ + numSkipArrayKeys++; + } + + /* Set things up for this new attribute */ + attno_skip++; + attno_inkey = inkey->sk_attno; + attno_has_equal = false; + } + + /* + * Track if this attribute's scan keys include any equality strategy + * scan keys (IS NULL keys count as equality keys here). Also track + * if it has any RowCompare keys. + */ + if (inkey->sk_strategy == BTEqualStrategyNumber || + (inkey->sk_flags & SK_SEARCHNULL)) + attno_has_equal = true; + if (inkey->sk_flags & SK_ROW_HEADER) + attno_has_rowcompare = true; + } + + *numSkipArrayKeys_out = numSkipArrayKeys; + return numSAOPArrayKeys + numSkipArrayKeys; +} + +/* * _bt_find_extreme_element() -- get least or greatest array element * * scan and skey identify the index column, whose opfamily determines the diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index a99667eb2bd..77781cb9002 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -31,6 +31,7 @@ #include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/read_stream.h" +#include "utils/datum.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" #include "utils/memutils.h" @@ -76,14 +77,26 @@ typedef struct BTParallelScanDescData /* * btps_arrElems is used when scans need to schedule another primitive - * index scan. Holds BTArrayKeyInfo.cur_elem offsets for scan keys. + * index scan with one or more SAOP arrays. Holds BTArrayKeyInfo.cur_elem + * offsets for each = scan key associated with a ScalarArrayOp array. */ int btps_arrElems[FLEXIBLE_ARRAY_MEMBER]; + + /* + * Additional space (at the end of the struct) is used when scans need to + * schedule another primitive index scan with one or more skip arrays. + * Holds a flattened datum representation for each = scan key associated + * with a skip array. + */ } BTParallelScanDescData; typedef struct BTParallelScanDescData *BTParallelScanDesc; +static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so); +static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so); static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid); @@ -541,10 +554,167 @@ btrestrpos(IndexScanDesc scan) * btestimateparallelscan -- estimate storage for BTParallelScanDescData */ Size -btestimateparallelscan(int nkeys, int norderbys) +btestimateparallelscan(Relation rel, int nkeys, int norderbys) +{ + int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); + Size estnbtreeshared, + genericattrspace; + + /* + * Pessimistically assume that every input scan key will be output with + * its own SAOP array + */ + estnbtreeshared = offsetof(BTParallelScanDescData, btps_arrElems) + + sizeof(int) * nkeys; + + /* Single column indexes cannot possibly use a skip array */ + if (nkeyatts == 1) + return estnbtreeshared; + + /* + * Pessimistically assume that all attributes prior to the least + * significant attribute require a skip array (and an associated key) + */ + genericattrspace = datumEstimateSpace((Datum) 0, false, true, + sizeof(Datum)); + for (int attnum = 1; attnum < nkeyatts; attnum++) + { + CompactAttribute *attr; + + /* + * We make the conservative assumption that every index column will + * also require a skip array. + * + * Every skip array must have space to store its scan key's sk_flags. + */ + estnbtreeshared = add_size(estnbtreeshared, sizeof(int)); + + /* Consider space required to store a datum of opclass input type */ + attr = TupleDescCompactAttr(rel->rd_att, attnum - 1); + if (attr->attbyval) + { + /* This index attribute stores pass-by-value datums */ + Size estfixed = datumEstimateSpace((Datum) 0, false, + true, attr->attlen); + + estnbtreeshared = add_size(estnbtreeshared, estfixed); + continue; + } + + /* + * This index attribute stores pass-by-reference datums. + * + * Assume that serializing this array will use just as much space as a + * pass-by-value datum, in addition to space for the largest possible + * whole index tuple (this is not just a per-datum portion of the + * largest possible tuple because that'd be almost as large anyway). + * + * This is quite conservative, but it's not clear how we could do much + * better. The executor requires an up-front storage request size + * that reliably covers the scan's high watermark memory usage. We + * can't be sure of the real high watermark until the scan is over. + */ + estnbtreeshared = add_size(estnbtreeshared, genericattrspace); + estnbtreeshared = add_size(estnbtreeshared, BTMaxItemSize); + } + + return estnbtreeshared; +} + +/* + * _bt_parallel_serialize_arrays() -- Serialize parallel array state. + * + * Caller must have exclusively locked btscan->btps_lock when called. + */ +static void +_bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so) +{ + char *datumshared; + + /* Space for serialized datums begins immediately after btps_arrElems[] */ + datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]); + for (int i = 0; i < so->numArrayKeys; i++) + { + BTArrayKeyInfo *array = &so->arrayKeys[i]; + ScanKey skey = &so->keyData[array->scan_key]; + + if (array->num_elems != -1) + { + /* Save SAOP array's cur_elem (no need to copy key/datum) */ + Assert(!(skey->sk_flags & SK_BT_SKIP)); + btscan->btps_arrElems[i] = array->cur_elem; + continue; + } + + /* Save all mutable state associated with skip array's key */ + Assert(skey->sk_flags & SK_BT_SKIP); + memcpy(datumshared, &skey->sk_flags, sizeof(int)); + datumshared += sizeof(int); + + if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)) + { + /* No sk_argument datum to serialize */ + Assert(skey->sk_argument == 0); + continue; + } + + datumSerialize(skey->sk_argument, (skey->sk_flags & SK_ISNULL) != 0, + array->attbyval, array->attlen, &datumshared); + } +} + +/* + * _bt_parallel_restore_arrays() -- Restore serialized parallel array state. + * + * Caller must have exclusively locked btscan->btps_lock when called. + */ +static void +_bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, + BTScanOpaque so) { - /* Pessimistically assume all input scankeys will be output with arrays */ - return offsetof(BTParallelScanDescData, btps_arrElems) + sizeof(int) * nkeys; + char *datumshared; + + /* Space for serialized datums begins immediately after btps_arrElems[] */ + datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]); + for (int i = 0; i < so->numArrayKeys; i++) + { + BTArrayKeyInfo *array = &so->arrayKeys[i]; + ScanKey skey = &so->keyData[array->scan_key]; + bool isnull; + + if (array->num_elems != -1) + { + /* Restore SAOP array using its saved cur_elem */ + Assert(!(skey->sk_flags & SK_BT_SKIP)); + array->cur_elem = btscan->btps_arrElems[i]; + skey->sk_argument = array->elem_values[array->cur_elem]; + continue; + } + + /* Restore skip array by restoring its key directly */ + if (!array->attbyval && skey->sk_argument) + pfree(DatumGetPointer(skey->sk_argument)); + skey->sk_argument = (Datum) 0; + memcpy(&skey->sk_flags, datumshared, sizeof(int)); + datumshared += sizeof(int); + + Assert(skey->sk_flags & SK_BT_SKIP); + + if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)) + { + /* No sk_argument datum to restore */ + continue; + } + + skey->sk_argument = datumRestore(&datumshared, &isnull); + if (isnull) + { + Assert(skey->sk_argument == 0); + Assert(skey->sk_flags & SK_SEARCHNULL); + Assert(skey->sk_flags & SK_ISNULL); + } + } } /* @@ -613,6 +783,7 @@ bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, BlockNumber *last_curr_page, bool first) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; bool exit_loop = false, status = true, @@ -679,14 +850,9 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *next_scan_page, { /* Can start scheduled primitive scan right away, so do so */ btscan->btps_pageStatus = BTPARALLEL_ADVANCING; - for (int i = 0; i < so->numArrayKeys; i++) - { - BTArrayKeyInfo *array = &so->arrayKeys[i]; - ScanKey skey = &so->keyData[array->scan_key]; - array->cur_elem = btscan->btps_arrElems[i]; - skey->sk_argument = array->elem_values[array->cur_elem]; - } + /* Restore scan's array keys from serialized values */ + _bt_parallel_restore_arrays(rel, btscan, so); exit_loop = true; } else @@ -831,6 +997,7 @@ _bt_parallel_done(IndexScanDesc scan) void _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; @@ -849,12 +1016,7 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber curr_page) btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN; /* Serialize scan's current array keys */ - for (int i = 0; i < so->numArrayKeys; i++) - { - BTArrayKeyInfo *array = &so->arrayKeys[i]; - - btscan->btps_arrElems[i] = array->cur_elem; - } + _bt_parallel_serialize_arrays(rel, btscan, so); } LWLockRelease(&btscan->btps_lock); } diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 3d46fb5df78..1ef2cb2b55e 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -983,7 +983,21 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * one we use --- by definition, they are either redundant or * contradictory. * - * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier. + * In practice we rarely see any "attribute boundary key gaps" here. + * Preprocessing can usually backfill skip array keys for any attributes + * that were omitted from the original scan->keyData[] input keys. All + * array keys are always considered = keys, but we'll sometimes need to + * treat the current key value as if we were using an inequality strategy. + * This happens with range skip arrays, which store inequality keys in the + * array's low_compare/high_compare fields (used to find the first/last + * set of matches, when = key will lack a usable sk_argument value). + * These are always preferred over any redundant "standard" inequality + * keys on the same column (per the usual rule about preferring = keys). + * Note also that any column with an = skip array key can never have an + * additional, contradictory = key. + * + * All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP + * array keys whose array is "null_elem=true") imply a NOT NULL qualifier. * If the index stores nulls at the end of the index we'll be starting * from, and we have no boundary key for the column (which means the key * we deduced NOT NULL from is an inequality key that constrains the other @@ -1040,8 +1054,54 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (i >= so->numberOfKeys || cur->sk_attno != curattr) { /* - * Done looking at keys for curattr. If we didn't find a - * usable boundary key, see if we can deduce a NOT NULL key. + * Done looking at keys for curattr. + * + * If this is a scan key for a skip array whose current + * element is MINVAL, choose low_compare (when scanning + * backwards it'll be MAXVAL, and we'll choose high_compare). + * + * Note: if the array's low_compare key makes 'chosen' NULL, + * then we behave as if the array's first element is -inf, + * except when !array->null_elem implies a usable NOT NULL + * constraint. + */ + if (chosen != NULL && + (chosen->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))) + { + int ikey = chosen - so->keyData; + ScanKey skipequalitykey = chosen; + BTArrayKeyInfo *array = NULL; + + for (int arridx = 0; arridx < so->numArrayKeys; arridx++) + { + array = &so->arrayKeys[arridx]; + if (array->scan_key == ikey) + break; + } + + if (ScanDirectionIsForward(dir)) + { + Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL)); + chosen = array->low_compare; + } + else + { + Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL)); + chosen = array->high_compare; + } + + Assert(chosen == NULL || + chosen->sk_attno == skipequalitykey->sk_attno); + + if (!array->null_elem) + impliesNN = skipequalitykey; + else + Assert(chosen == NULL && impliesNN == NULL); + } + + /* + * If we didn't find a usable boundary key, see if we can + * deduce a NOT NULL key */ if (chosen == NULL && impliesNN != NULL && ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? @@ -1084,9 +1144,40 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) break; /* - * Done if that was the last attribute, or if next key is not - * in sequence (implying no boundary key is available for the - * next attribute). + * If the key that we just added to startKeys[] is a skip + * array = key whose current element is marked NEXT or PRIOR, + * make strat_total > or < (and stop adding boundary keys). + * This can only happen with opclasses that lack skip support. + */ + if (chosen->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR)) + { + Assert(chosen->sk_flags & SK_BT_SKIP); + Assert(strat_total == BTEqualStrategyNumber); + + if (ScanDirectionIsForward(dir)) + { + Assert(!(chosen->sk_flags & SK_BT_PRIOR)); + strat_total = BTGreaterStrategyNumber; + } + else + { + Assert(!(chosen->sk_flags & SK_BT_NEXT)); + strat_total = BTLessStrategyNumber; + } + + /* + * We're done. We'll never find an exact = match for a + * NEXT or PRIOR sentinel sk_argument value. There's no + * sense in trying to add more keys to startKeys[]. + */ + break; + } + + /* + * Done if that was the last scan key output by preprocessing. + * Also done if there is a gap index attribute that lacks a + * usable key (only possible when preprocessing was unable to + * generate a skip array key to "fill in the gap"). */ if (i >= so->numberOfKeys || cur->sk_attno != curattr + 1) @@ -1581,31 +1672,10 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, * We skip this for the first page read by each (primitive) scan, to avoid * slowing down point queries. They typically don't stand to gain much * when the optimization can be applied, and are more likely to notice the - * overhead of the precheck. - * - * The optimization is unsafe and must be avoided whenever _bt_checkkeys - * just set a low-order required array's key to the best available match - * for a truncated -inf attribute value from the prior page's high key - * (array element 0 is always the best available match in this scenario). - * It's quite likely that matches for array element 0 begin on this page, - * but the start of matches won't necessarily align with page boundaries. - * When the start of matches is somewhere in the middle of this page, it - * would be wrong to treat page's final non-pivot tuple as representative. - * Doing so might lead us to treat some of the page's earlier tuples as - * being part of a group of tuples thought to satisfy the required keys. - * - * Note: Conversely, in the case where the scan's arrays just advanced - * using the prior page's HIKEY _without_ advancement setting scanBehind, - * the start of matches must be aligned with page boundaries, which makes - * it safe to attempt the optimization here now. It's also safe when the - * prior page's HIKEY simply didn't need to advance any required array. In - * both cases we can safely assume that the _first_ tuple from this page - * must be >= the current set of array keys/equality constraints. And so - * if the final tuple is == those same keys (and also satisfies any - * required < or <= strategy scan keys) during the precheck, we can safely - * assume that this must also be true of all earlier tuples from the page. + * overhead of the precheck. Also avoid it during scans with array keys, + * which might be using skip scan (XXX fixed in next commit). */ - if (!pstate.firstpage && !so->scanBehind && minoff < maxoff) + if (!pstate.firstpage && !arrayKeys && minoff < maxoff) { ItemId iid; IndexTuple itup; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 2aee9bbf67d..108030a8ee7 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -30,6 +30,17 @@ static inline int32 _bt_compare_array_skey(FmgrInfo *orderproc, Datum tupdatum, bool tupnull, Datum arrdatum, ScanKey cur); +static void _bt_binsrch_skiparray_skey(bool cur_elem_trig, ScanDirection dir, + Datum tupdatum, bool tupnull, + BTArrayKeyInfo *array, ScanKey cur, + int32 *set_elem_result); +static void _bt_skiparray_set_element(Relation rel, ScanKey skey, BTArrayKeyInfo *array, + int32 set_elem_result, Datum tupdatum, bool tupnull); +static void _bt_skiparray_set_isnull(Relation rel, ScanKey skey, BTArrayKeyInfo *array); +static void _bt_array_set_low_or_high(Relation rel, ScanKey skey, + BTArrayKeyInfo *array, bool low_not_high); +static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *array); +static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array); static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir); static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir); static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir, @@ -207,6 +218,7 @@ _bt_compare_array_skey(FmgrInfo *orderproc, int32 result = 0; Assert(cur->sk_strategy == BTEqualStrategyNumber); + Assert(!(cur->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))); if (tupnull) /* NULL tupdatum */ { @@ -283,6 +295,8 @@ _bt_binsrch_array_skey(FmgrInfo *orderproc, Datum arrdatum; Assert(cur->sk_flags & SK_SEARCHARRAY); + Assert(!(cur->sk_flags & SK_BT_SKIP)); + Assert(!(cur->sk_flags & SK_ISNULL)); /* SAOP arrays never have NULLs */ Assert(cur->sk_strategy == BTEqualStrategyNumber); if (cur_elem_trig) @@ -406,6 +420,186 @@ _bt_binsrch_array_skey(FmgrInfo *orderproc, } /* + * _bt_binsrch_skiparray_skey() -- "Binary search" within a skip array + * + * Does not return an index into the array, since skip arrays don't really + * contain elements (they generate their array elements procedurally instead). + * Our interface matches that of _bt_binsrch_array_skey in every other way. + * + * Sets *set_elem_result just like _bt_binsrch_array_skey would with a true + * array. The value 0 indicates that tupdatum/tupnull is within the range of + * the skip array. We return -1 when tupdatum/tupnull is lower that any value + * within the range of the array, and 1 when it is higher than every value. + * Caller should pass *set_elem_result to _bt_skiparray_set_element to advance + * the array. + * + * cur_elem_trig indicates if array advancement was triggered by this array's + * scan key. We use this to optimize-away comparisons that are known by our + * caller to be unnecessary from context, just like _bt_binsrch_array_skey. + */ +static void +_bt_binsrch_skiparray_skey(bool cur_elem_trig, ScanDirection dir, + Datum tupdatum, bool tupnull, + BTArrayKeyInfo *array, ScanKey cur, + int32 *set_elem_result) +{ + Assert(cur->sk_flags & SK_BT_SKIP); + Assert(cur->sk_flags & SK_SEARCHARRAY); + Assert(cur->sk_flags & SK_BT_REQFWD); + Assert(array->num_elems == -1); + Assert(!ScanDirectionIsNoMovement(dir)); + + if (array->null_elem) + { + Assert(!array->low_compare && !array->high_compare); + + *set_elem_result = 0; + return; + } + + if (tupnull) /* NULL tupdatum */ + { + if (cur->sk_flags & SK_BT_NULLS_FIRST) + *set_elem_result = -1; /* NULL "<" NOT_NULL */ + else + *set_elem_result = 1; /* NULL ">" NOT_NULL */ + return; + } + + /* + * Array inequalities determine whether tupdatum is within the range of + * caller's skip array + */ + *set_elem_result = 0; + if (ScanDirectionIsForward(dir)) + { + /* + * Evaluate low_compare first (unless cur_elem_trig tells us that it + * cannot possibly fail to be satisfied), then evaluate high_compare + */ + if (!cur_elem_trig && array->low_compare && + !DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func, + array->low_compare->sk_collation, + tupdatum, + array->low_compare->sk_argument))) + *set_elem_result = -1; + else if (array->high_compare && + !DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func, + array->high_compare->sk_collation, + tupdatum, + array->high_compare->sk_argument))) + *set_elem_result = 1; + } + else + { + /* + * Evaluate high_compare first (unless cur_elem_trig tells us that it + * cannot possibly fail to be satisfied), then evaluate low_compare + */ + if (!cur_elem_trig && array->high_compare && + !DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func, + array->high_compare->sk_collation, + tupdatum, + array->high_compare->sk_argument))) + *set_elem_result = 1; + else if (array->low_compare && + !DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func, + array->low_compare->sk_collation, + tupdatum, + array->low_compare->sk_argument))) + *set_elem_result = -1; + } + + /* + * Assert that any keys that were assumed to be satisfied already (due to + * caller passing cur_elem_trig=true) really are satisfied as expected + */ +#ifdef USE_ASSERT_CHECKING + if (cur_elem_trig) + { + if (ScanDirectionIsForward(dir) && array->low_compare) + Assert(DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func, + array->low_compare->sk_collation, + tupdatum, + array->low_compare->sk_argument))); + + if (ScanDirectionIsBackward(dir) && array->high_compare) + Assert(DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func, + array->high_compare->sk_collation, + tupdatum, + array->high_compare->sk_argument))); + } +#endif +} + +/* + * _bt_skiparray_set_element() -- Set skip array scan key's sk_argument + * + * Caller passes set_elem_result returned by _bt_binsrch_skiparray_skey for + * caller's tupdatum/tupnull. + * + * We copy tupdatum/tupnull into skey's sk_argument iff set_elem_result == 0. + * Otherwise, we set skey to either the lowest or highest value that's within + * the range of caller's skip array (whichever is the best available match to + * tupdatum/tupnull that is still within the range of the skip array according + * to _bt_binsrch_skiparray_skey/set_elem_result). + */ +static void +_bt_skiparray_set_element(Relation rel, ScanKey skey, BTArrayKeyInfo *array, + int32 set_elem_result, Datum tupdatum, bool tupnull) +{ + Assert(skey->sk_flags & SK_BT_SKIP); + Assert(skey->sk_flags & SK_SEARCHARRAY); + + if (set_elem_result) + { + /* tupdatum/tupnull is out of the range of the skip array */ + Assert(!array->null_elem); + + _bt_array_set_low_or_high(rel, skey, array, set_elem_result < 0); + return; + } + + /* Advance skip array to tupdatum (or tupnull) value */ + if (unlikely(tupnull)) + { + _bt_skiparray_set_isnull(rel, skey, array); + return; + } + + /* Free memory previously allocated for sk_argument if needed */ + if (!array->attbyval && skey->sk_argument) + pfree(DatumGetPointer(skey->sk_argument)); + + /* tupdatum becomes new sk_argument/new current element */ + skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL | + SK_BT_MINVAL | SK_BT_MAXVAL | + SK_BT_NEXT | SK_BT_PRIOR); + skey->sk_argument = datumCopy(tupdatum, array->attbyval, array->attlen); +} + +/* + * _bt_skiparray_set_isnull() -- set skip array scan key to NULL + */ +static void +_bt_skiparray_set_isnull(Relation rel, ScanKey skey, BTArrayKeyInfo *array) +{ + Assert(skey->sk_flags & SK_BT_SKIP); + Assert(skey->sk_flags & SK_SEARCHARRAY); + Assert(array->null_elem && !array->low_compare && !array->high_compare); + + /* Free memory previously allocated for sk_argument if needed */ + if (!array->attbyval && skey->sk_argument) + pfree(DatumGetPointer(skey->sk_argument)); + + /* NULL becomes new sk_argument/new current element */ + skey->sk_argument = (Datum) 0; + skey->sk_flags &= ~(SK_BT_MINVAL | SK_BT_MAXVAL | + SK_BT_NEXT | SK_BT_PRIOR); + skey->sk_flags |= (SK_SEARCHNULL | SK_ISNULL); +} + +/* * _bt_start_array_keys() -- Initialize array keys at start of a scan * * Set up the cur_elem counters and fill in the first sk_argument value for @@ -414,30 +608,356 @@ _bt_binsrch_array_skey(FmgrInfo *orderproc, void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; - int i; Assert(so->numArrayKeys); Assert(so->qual_ok); - for (i = 0; i < so->numArrayKeys; i++) + for (int i = 0; i < so->numArrayKeys; i++) { - BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i]; - ScanKey skey = &so->keyData[curArrayKey->scan_key]; + BTArrayKeyInfo *array = &so->arrayKeys[i]; + ScanKey skey = &so->keyData[array->scan_key]; - Assert(curArrayKey->num_elems > 0); Assert(skey->sk_flags & SK_SEARCHARRAY); - if (ScanDirectionIsBackward(dir)) - curArrayKey->cur_elem = curArrayKey->num_elems - 1; - else - curArrayKey->cur_elem = 0; - skey->sk_argument = curArrayKey->elem_values[curArrayKey->cur_elem]; + _bt_array_set_low_or_high(rel, skey, array, + ScanDirectionIsForward(dir)); } so->scanBehind = so->oppositeDirCheck = false; /* reset */ } /* + * _bt_array_set_low_or_high() -- Set array scan key to lowest/highest element + * + * Caller also passes associated scan key, which will have its argument set to + * the lowest/highest array value in passing. + */ +static void +_bt_array_set_low_or_high(Relation rel, ScanKey skey, BTArrayKeyInfo *array, + bool low_not_high) +{ + Assert(skey->sk_flags & SK_SEARCHARRAY); + + if (array->num_elems != -1) + { + /* set low or high element for SAOP array */ + int set_elem = 0; + + Assert(!(skey->sk_flags & SK_BT_SKIP)); + + if (!low_not_high) + set_elem = array->num_elems - 1; + + /* + * Just copy over array datum (only skip arrays require freeing and + * allocating memory for sk_argument) + */ + array->cur_elem = set_elem; + skey->sk_argument = array->elem_values[set_elem]; + + return; + } + + /* set low or high element for skip array */ + Assert(skey->sk_flags & SK_BT_SKIP); + Assert(array->num_elems == -1); + + /* Free memory previously allocated for sk_argument if needed */ + if (!array->attbyval && skey->sk_argument) + pfree(DatumGetPointer(skey->sk_argument)); + + /* Reset flags */ + skey->sk_argument = (Datum) 0; + skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL | + SK_BT_MINVAL | SK_BT_MAXVAL | + SK_BT_NEXT | SK_BT_PRIOR); + + if (array->null_elem && + (low_not_high == ((skey->sk_flags & SK_BT_NULLS_FIRST) != 0))) + { + /* Requested element (either lowest or highest) has the value NULL */ + skey->sk_flags |= (SK_SEARCHNULL | SK_ISNULL); + } + else if (low_not_high) + { + /* Setting array to lowest element (according to low_compare) */ + skey->sk_flags |= SK_BT_MINVAL; + } + else + { + /* Setting array to highest element (according to high_compare) */ + skey->sk_flags |= SK_BT_MAXVAL; + } +} + +/* + * _bt_array_decrement() -- decrement array scan key's sk_argument + * + * Return value indicates whether caller's array was successfully decremented. + * Cannot decrement an array whose current element is already the first one. + */ +static bool +_bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *array) +{ + bool uflow = false; + Datum dec_sk_argument; + + Assert(skey->sk_flags & SK_SEARCHARRAY); + Assert(!(skey->sk_flags & (SK_BT_MAXVAL | SK_BT_NEXT | SK_BT_PRIOR))); + + /* SAOP array? */ + if (array->num_elems != -1) + { + Assert(!(skey->sk_flags & (SK_BT_SKIP | SK_BT_MINVAL | SK_BT_MAXVAL))); + if (array->cur_elem > 0) + { + /* + * Just decrement current element, and assign its datum to skey + * (only skip arrays need us to free existing sk_argument memory) + */ + array->cur_elem--; + skey->sk_argument = array->elem_values[array->cur_elem]; + + /* Successfully decremented array */ + return true; + } + + /* Cannot decrement to before first array element */ + return false; + } + + /* Nope, this is a skip array */ + Assert(skey->sk_flags & SK_BT_SKIP); + + /* + * The sentinel value that represents the minimum value within the range + * of a skip array (often just -inf) is never decrementable + */ + if (skey->sk_flags & SK_BT_MINVAL) + return false; + + /* + * When the current array element is NULL, and the lowest sorting value in + * the index is also NULL, we cannot decrement before first array element + */ + if ((skey->sk_flags & SK_ISNULL) && (skey->sk_flags & SK_BT_NULLS_FIRST)) + return false; + + /* + * Opclasses without skip support "decrement" the scan key's current + * element by setting the PRIOR flag. The true prior value is determined + * by repositioning to the last index tuple < existing sk_argument/current + * array element. Note that this works in the usual way when the scan key + * is already marked ISNULL (i.e. when the current element is NULL). + */ + if (!array->sksup) + { + /* Successfully "decremented" array */ + skey->sk_flags |= SK_BT_PRIOR; + return true; + } + + /* + * Opclasses with skip support directly decrement sk_argument + */ + if (skey->sk_flags & SK_ISNULL) + { + Assert(!(skey->sk_flags & SK_BT_NULLS_FIRST)); + + /* + * Existing sk_argument/array element is NULL (for an IS NULL qual). + * + * "Decrement" from NULL to the high_elem value provided by opclass + * skip support routine. + */ + skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL); + skey->sk_argument = datumCopy(array->sksup->high_elem, + array->attbyval, array->attlen); + return true; + } + + /* + * Ask opclass support routine to provide decremented copy of existing + * non-NULL sk_argument + */ + dec_sk_argument = array->sksup->decrement(rel, skey->sk_argument, &uflow); + if (unlikely(uflow)) + { + /* dec_sk_argument has undefined value (so no pfree) */ + if (array->null_elem && (skey->sk_flags & SK_BT_NULLS_FIRST)) + { + _bt_skiparray_set_isnull(rel, skey, array); + + /* Successfully "decremented" array to NULL */ + return true; + } + + /* Cannot decrement to before first array element */ + return false; + } + + /* + * Successfully decremented sk_argument to a non-NULL value. Make sure + * that the decremented value is still within the range of the array. + */ + if (array->low_compare && + !DatumGetBool(FunctionCall2Coll(&array->low_compare->sk_func, + array->low_compare->sk_collation, + dec_sk_argument, + array->low_compare->sk_argument))) + { + /* Keep existing sk_argument after all */ + if (!array->attbyval) + pfree(DatumGetPointer(dec_sk_argument)); + + /* Cannot decrement to before first array element */ + return false; + } + + /* Accept value returned by opclass decrement callback */ + if (!array->attbyval && skey->sk_argument) + pfree(DatumGetPointer(skey->sk_argument)); + skey->sk_argument = dec_sk_argument; + + /* Successfully decremented array */ + return true; +} + +/* + * _bt_array_increment() -- increment array scan key's sk_argument + * + * Return value indicates whether caller's array was successfully incremented. + * Cannot increment an array whose current element is already the final one. + */ +static bool +_bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array) +{ + bool oflow = false; + Datum inc_sk_argument; + + Assert(skey->sk_flags & SK_SEARCHARRAY); + Assert(!(skey->sk_flags & (SK_BT_MINVAL | SK_BT_NEXT | SK_BT_PRIOR))); + + /* SAOP array? */ + if (array->num_elems != -1) + { + Assert(!(skey->sk_flags & (SK_BT_SKIP | SK_BT_MINVAL | SK_BT_MAXVAL))); + if (array->cur_elem < array->num_elems - 1) + { + /* + * Just increment current element, and assign its datum to skey + * (only skip arrays need us to free existing sk_argument memory) + */ + array->cur_elem++; + skey->sk_argument = array->elem_values[array->cur_elem]; + + /* Successfully incremented array */ + return true; + } + + /* Cannot increment past final array element */ + return false; + } + + /* Nope, this is a skip array */ + Assert(skey->sk_flags & SK_BT_SKIP); + + /* + * The sentinel value that represents the maximum value within the range + * of a skip array (often just +inf) is never incrementable + */ + if (skey->sk_flags & SK_BT_MAXVAL) + return false; + + /* + * When the current array element is NULL, and the highest sorting value + * in the index is also NULL, we cannot increment past the final element + */ + if ((skey->sk_flags & SK_ISNULL) && !(skey->sk_flags & SK_BT_NULLS_FIRST)) + return false; + + /* + * Opclasses without skip support "increment" the scan key's current + * element by setting the NEXT flag. The true next value is determined by + * repositioning to the first index tuple > existing sk_argument/current + * array element. Note that this works in the usual way when the scan key + * is already marked ISNULL (i.e. when the current element is NULL). + */ + if (!array->sksup) + { + /* Successfully "incremented" array */ + skey->sk_flags |= SK_BT_NEXT; + return true; + } + + /* + * Opclasses with skip support directly increment sk_argument + */ + if (skey->sk_flags & SK_ISNULL) + { + Assert(skey->sk_flags & SK_BT_NULLS_FIRST); + + /* + * Existing sk_argument/array element is NULL (for an IS NULL qual). + * + * "Increment" from NULL to the low_elem value provided by opclass + * skip support routine. + */ + skey->sk_flags &= ~(SK_SEARCHNULL | SK_ISNULL); + skey->sk_argument = datumCopy(array->sksup->low_elem, + array->attbyval, array->attlen); + return true; + } + + /* + * Ask opclass support routine to provide incremented copy of existing + * non-NULL sk_argument + */ + inc_sk_argument = array->sksup->increment(rel, skey->sk_argument, &oflow); + if (unlikely(oflow)) + { + /* inc_sk_argument has undefined value (so no pfree) */ + if (array->null_elem && !(skey->sk_flags & SK_BT_NULLS_FIRST)) + { + _bt_skiparray_set_isnull(rel, skey, array); + + /* Successfully "incremented" array to NULL */ + return true; + } + + /* Cannot increment past final array element */ + return false; + } + + /* + * Successfully incremented sk_argument to a non-NULL value. Make sure + * that the incremented value is still within the range of the array. + */ + if (array->high_compare && + !DatumGetBool(FunctionCall2Coll(&array->high_compare->sk_func, + array->high_compare->sk_collation, + inc_sk_argument, + array->high_compare->sk_argument))) + { + /* Keep existing sk_argument after all */ + if (!array->attbyval) + pfree(DatumGetPointer(inc_sk_argument)); + + /* Cannot increment past final array element */ + return false; + } + + /* Accept value returned by opclass increment callback */ + if (!array->attbyval && skey->sk_argument) + pfree(DatumGetPointer(skey->sk_argument)); + skey->sk_argument = inc_sk_argument; + + /* Successfully incremented array */ + return true; +} + +/* * _bt_advance_array_keys_increment() -- Advance to next set of array elements * * Advances the array keys by a single increment in the current scan @@ -452,6 +972,7 @@ _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir) static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; /* @@ -461,29 +982,30 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir) */ for (int i = so->numArrayKeys - 1; i >= 0; i--) { - BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i]; - ScanKey skey = &so->keyData[curArrayKey->scan_key]; - int cur_elem = curArrayKey->cur_elem; - int num_elems = curArrayKey->num_elems; - bool rolled = false; + BTArrayKeyInfo *array = &so->arrayKeys[i]; + ScanKey skey = &so->keyData[array->scan_key]; - if (ScanDirectionIsForward(dir) && ++cur_elem >= num_elems) + if (ScanDirectionIsForward(dir)) { - cur_elem = 0; - rolled = true; + if (_bt_array_increment(rel, skey, array)) + return true; } - else if (ScanDirectionIsBackward(dir) && --cur_elem < 0) + else { - cur_elem = num_elems - 1; - rolled = true; + if (_bt_array_decrement(rel, skey, array)) + return true; } - curArrayKey->cur_elem = cur_elem; - skey->sk_argument = curArrayKey->elem_values[cur_elem]; - if (!rolled) - return true; + /* + * Couldn't increment (or decrement) array. Handle array roll over. + * + * Start over at the array's lowest sorting value (or its highest + * value, for backward scans)... + */ + _bt_array_set_low_or_high(rel, skey, array, + ScanDirectionIsForward(dir)); - /* Need to advance next array key, if any */ + /* ...then increment (or decrement) next most significant array */ } /* @@ -507,7 +1029,7 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir) } /* - * _bt_rewind_nonrequired_arrays() -- Rewind non-required arrays + * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required * * Called when _bt_advance_array_keys decides to start a new primitive index * scan on the basis of the current scan position being before the position @@ -539,10 +1061,15 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir) * * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that * everybody got this right. + * + * Note: In practice almost all SAOP arrays are marked required during + * preprocessing (if necessary by generating skip arrays). It is hardly ever + * truly necessary to call here, but consistently doing so is simpler. */ static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; int arrayidx = 0; @@ -550,7 +1077,6 @@ _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir) { ScanKey cur = so->keyData + ikey; BTArrayKeyInfo *array = NULL; - int first_elem_dir; if (!(cur->sk_flags & SK_SEARCHARRAY) || cur->sk_strategy != BTEqualStrategyNumber) @@ -562,16 +1088,10 @@ _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir) if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) continue; - if (ScanDirectionIsForward(dir)) - first_elem_dir = 0; - else - first_elem_dir = array->num_elems - 1; + Assert(array->num_elems != -1); /* No non-required skip arrays */ - if (array->cur_elem != first_elem_dir) - { - array->cur_elem = first_elem_dir; - cur->sk_argument = array->elem_values[first_elem_dir]; - } + _bt_array_set_low_or_high(rel, cur, array, + ScanDirectionIsForward(dir)); } } @@ -696,9 +1216,77 @@ _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir, tupdatum = index_getattr(tuple, cur->sk_attno, tupdesc, &tupnull); - result = _bt_compare_array_skey(&so->orderProcs[ikey], - tupdatum, tupnull, - cur->sk_argument, cur); + if (likely(!(cur->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))) + { + /* Scankey has a valid/comparable sk_argument value */ + result = _bt_compare_array_skey(&so->orderProcs[ikey], + tupdatum, tupnull, + cur->sk_argument, cur); + + if (result == 0) + { + /* + * Interpret result in a way that takes NEXT/PRIOR into + * account + */ + if (cur->sk_flags & SK_BT_NEXT) + result = -1; + else if (cur->sk_flags & SK_BT_PRIOR) + result = 1; + + Assert(result == 0 || (cur->sk_flags & SK_BT_SKIP)); + } + } + else + { + BTArrayKeyInfo *array = NULL; + + /* + * Current array element/array = scan key value is a sentinel + * value that represents the lowest (or highest) possible value + * that's still within the range of the array. + * + * Like _bt_first, we only see MINVAL keys during forwards scans + * (and similarly only see MAXVAL keys during backwards scans). + * Even if the scan's direction changes, we'll stop at some higher + * order key before we can ever reach any MAXVAL (or MINVAL) keys. + * (However, unlike _bt_first we _can_ get to keys marked either + * NEXT or PRIOR, regardless of the scan's current direction.) + */ + Assert(ScanDirectionIsForward(dir) ? + !(cur->sk_flags & SK_BT_MAXVAL) : + !(cur->sk_flags & SK_BT_MINVAL)); + + /* + * There are no valid sk_argument values in MINVAL/MAXVAL keys. + * Check if tupdatum is within the range of skip array instead. + */ + for (int arrayidx = 0; arrayidx < so->numArrayKeys; arrayidx++) + { + array = &so->arrayKeys[arrayidx]; + if (array->scan_key == ikey) + break; + } + + _bt_binsrch_skiparray_skey(false, dir, tupdatum, tupnull, + array, cur, &result); + + if (result == 0) + { + /* + * tupdatum satisfies both low_compare and high_compare, so + * it's time to advance the array keys. + * + * Note: It's possible that the skip array will "advance" from + * its MINVAL (or MAXVAL) representation to an alternative, + * logically equivalent representation of the same value: a + * representation where the = key gets a valid datum in its + * sk_argument. This is only possible when low_compare uses + * the >= strategy (or high_compare uses the <= strategy). + */ + return false; + } + } /* * Does this comparison indicate that caller must _not_ advance the @@ -1017,18 +1605,9 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, */ if (beyond_end_advance) { - int final_elem_dir; - - if (ScanDirectionIsBackward(dir) || !array) - final_elem_dir = 0; - else - final_elem_dir = array->num_elems - 1; - - if (array && array->cur_elem != final_elem_dir) - { - array->cur_elem = final_elem_dir; - cur->sk_argument = array->elem_values[final_elem_dir]; - } + if (array) + _bt_array_set_low_or_high(rel, cur, array, + ScanDirectionIsBackward(dir)); continue; } @@ -1053,18 +1632,9 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, */ if (!all_required_satisfied || cur->sk_attno > tupnatts) { - int first_elem_dir; - - if (ScanDirectionIsForward(dir) || !array) - first_elem_dir = 0; - else - first_elem_dir = array->num_elems - 1; - - if (array && array->cur_elem != first_elem_dir) - { - array->cur_elem = first_elem_dir; - cur->sk_argument = array->elem_values[first_elem_dir]; - } + if (array) + _bt_array_set_low_or_high(rel, cur, array, + ScanDirectionIsForward(dir)); continue; } @@ -1080,14 +1650,22 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, bool cur_elem_trig = (sktrig_required && ikey == sktrig); /* - * Binary search for closest match that's available from the array + * "Binary search" by checking if tupdatum/tupnull are within the + * range of the skip array */ - set_elem = _bt_binsrch_array_skey(&so->orderProcs[ikey], - cur_elem_trig, dir, - tupdatum, tupnull, array, cur, - &result); + if (array->num_elems == -1) + _bt_binsrch_skiparray_skey(cur_elem_trig, dir, + tupdatum, tupnull, array, cur, + &result); - Assert(set_elem >= 0 && set_elem < array->num_elems); + /* + * Binary search for the closest match from the SAOP array + */ + else + set_elem = _bt_binsrch_array_skey(&so->orderProcs[ikey], + cur_elem_trig, dir, + tupdatum, tupnull, array, cur, + &result); } else { @@ -1163,11 +1741,21 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, } } - /* Advance array keys, even when set_elem isn't an exact match */ - if (array && array->cur_elem != set_elem) + /* Advance array keys, even when we don't have an exact match */ + if (array) { - array->cur_elem = set_elem; - cur->sk_argument = array->elem_values[set_elem]; + if (array->num_elems == -1) + { + /* Skip array's new element is tupdatum (or MINVAL/MAXVAL) */ + _bt_skiparray_set_element(rel, cur, array, result, + tupdatum, tupnull); + } + else if (array->cur_elem != set_elem) + { + /* SAOP array's new element is set_elem datum */ + array->cur_elem = set_elem; + cur->sk_argument = array->elem_values[set_elem]; + } } } @@ -1581,10 +2169,11 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan) if (array->scan_key != ikey) return false; - if (array->num_elems <= 0) + if (array->num_elems == 0 || array->num_elems < -1) return false; - if (cur->sk_argument != array->elem_values[array->cur_elem]) + if (array->num_elems != -1 && + cur->sk_argument != array->elem_values[array->cur_elem]) return false; if (last_sk_attno > cur->sk_attno) return false; @@ -1914,6 +2503,20 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, continue; } + /* + * A skip array scan key uses one of several sentinel values. We just + * fall back on _bt_tuple_before_array_skeys when we see such a value. + */ + if (key->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL | + SK_BT_NEXT | SK_BT_PRIOR)) + { + Assert(key->sk_flags & SK_SEARCHARRAY); + Assert(key->sk_flags & SK_BT_SKIP); + + *continuescan = false; + return false; + } + /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { @@ -1939,6 +2542,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, else { Assert(key->sk_flags & SK_SEARCHNOTNULL); + Assert(!(key->sk_flags & SK_BT_SKIP)); if (!isNull) continue; /* tuple satisfies this qual */ } diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c index dd6f5a15c65..817ad358f0c 100644 --- a/src/backend/access/nbtree/nbtvalidate.c +++ b/src/backend/access/nbtree/nbtvalidate.c @@ -106,6 +106,10 @@ btvalidate(Oid opclassoid) case BTOPTIONS_PROC: ok = check_amoptsproc_signature(procform->amproc); break; + case BTSKIPSUPPORT_PROC: + ok = check_amproc_signature(procform->amproc, VOIDOID, true, + 1, 1, INTERNALOID); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c index 8546366ee06..a6dd8eab518 100644 --- a/src/backend/commands/opclasscmds.c +++ b/src/backend/commands/opclasscmds.c @@ -1331,6 +1331,31 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("ordering equal image functions must not be cross-type"))); } + else if (member->number == BTSKIPSUPPORT_PROC) + { + if (procform->pronargs != 1 || + procform->proargtypes.values[0] != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree skip support functions must accept type \"internal\""))); + if (procform->prorettype != VOIDOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree skip support functions must return void"))); + + /* + * pg_amproc functions are indexed by (lefttype, righttype), but a + * skip support function doesn't make sense in cross-type + * scenarios. The same opclass opcintype OID is always used for + * lefttype and righttype. Providing a cross-type routine isn't + * sensible. Reject cross-type ALTER OPERATOR FAMILY ... ADD + * FUNCTION 6 statements here. + */ + if (member->lefttype != member->righttype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree skip support functions must not be cross-type"))); + } } else if (GetIndexAmRoutineByAmId(amoid, false)->amcanhash) { diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 35e8c01aab9..4a233b63c32 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -99,6 +99,7 @@ OBJS = \ rowtypes.o \ ruleutils.o \ selfuncs.o \ + skipsupport.o \ tid.o \ timestamp.o \ trigfuncs.o \ diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index f279853deb8..4227ab1a72b 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -34,6 +34,7 @@ #include "utils/date.h" #include "utils/datetime.h" #include "utils/numeric.h" +#include "utils/skipsupport.h" #include "utils/sortsupport.h" /* @@ -462,6 +463,51 @@ date_sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +static Datum +date_decrement(Relation rel, Datum existing, bool *underflow) +{ + DateADT dexisting = DatumGetDateADT(existing); + + if (dexisting == DATEVAL_NOBEGIN) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return DateADTGetDatum(dexisting - 1); +} + +static Datum +date_increment(Relation rel, Datum existing, bool *overflow) +{ + DateADT dexisting = DatumGetDateADT(existing); + + if (dexisting == DATEVAL_NOEND) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return DateADTGetDatum(dexisting + 1); +} + +Datum +date_skipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = date_decrement; + sksup->increment = date_increment; + sksup->low_elem = DateADTGetDatum(DATEVAL_NOBEGIN); + sksup->high_elem = DateADTGetDatum(DATEVAL_NOEND); + + PG_RETURN_VOID(); +} + Datum hashdate(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index f23cfad7182..244f48f4fd7 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -86,6 +86,7 @@ backend_sources += files( 'rowtypes.c', 'ruleutils.c', 'selfuncs.c', + 'skipsupport.c', 'tid.c', 'timestamp.c', 'trigfuncs.c', diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 462308807ef..021ea7517d7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -193,6 +193,8 @@ static double convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure); static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata); +static void examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, + int indexcol, VariableStatData *vardata); static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max); @@ -214,6 +216,8 @@ static bool get_actual_variable_endpoint(Relation heapRel, MemoryContext outercontext, Datum *endpointDatum); static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); +static double btcost_correlation(IndexOptInfo *index, + VariableStatData *vardata); /* @@ -5944,6 +5948,92 @@ examine_simple_variable(PlannerInfo *root, Var *var, } /* + * examine_indexcol_variable + * Try to look up statistical data about an index column/expression. + * Fill in a VariableStatData struct to describe the column. + * + * Inputs: + * root: the planner info + * index: the index whose column we're interested in + * indexcol: 0-based index column number (subscripts index->indexkeys[]) + * + * Outputs: *vardata is filled as follows: + * var: the input expression (with any binary relabeling stripped, if + * it is or contains a variable; but otherwise the type is preserved) + * rel: RelOptInfo for table relation containing variable. + * statsTuple: the pg_statistic entry for the variable, if one exists; + * otherwise NULL. + * freefunc: pointer to a function to release statsTuple with. + * + * Caller is responsible for doing ReleaseVariableStats() before exiting. + */ +static void +examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, + int indexcol, VariableStatData *vardata) +{ + AttrNumber colnum; + Oid relid; + + if (index->indexkeys[indexcol] != 0) + { + /* Simple variable --- look to stats for the underlying table */ + RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); + + Assert(rte->rtekind == RTE_RELATION); + relid = rte->relid; + Assert(relid != InvalidOid); + colnum = index->indexkeys[indexcol]; + vardata->rel = index->rel; + + if (get_relation_stats_hook && + (*get_relation_stats_hook) (root, rte, colnum, vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata->statsTuple) && + !vardata->freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata->statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(colnum), + BoolGetDatum(rte->inh)); + vardata->freefunc = ReleaseSysCache; + } + } + else + { + /* Expression --- maybe there are stats for the index itself */ + relid = index->indexoid; + colnum = indexcol + 1; + + if (get_index_stats_hook && + (*get_index_stats_hook) (root, relid, colnum, vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata->statsTuple) && + !vardata->freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata->statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(colnum), + BoolGetDatum(false)); + vardata->freefunc = ReleaseSysCache; + } + } +} + +/* * Check whether it is permitted to call func_oid passing some of the * pg_statistic data in vardata. We allow this either if the user has SELECT * privileges on the table or column underlying the pg_statistic data or if @@ -7007,6 +7097,53 @@ add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals) return list_concat(predExtraQuals, indexQuals); } +/* + * Estimate correlation of btree index's first column. + * + * If we can get an estimate of the first column's ordering correlation C + * from pg_statistic, estimate the index correlation as C for a single-column + * index, or C * 0.75 for multiple columns. The idea here is that multiple + * columns dilute the importance of the first column's ordering, but don't + * negate it entirely. + * + * We already filled in the stats tuple for *vardata when called. + */ +static double +btcost_correlation(IndexOptInfo *index, VariableStatData *vardata) +{ + Oid sortop; + AttStatsSlot sslot; + double indexCorrelation = 0; + + Assert(HeapTupleIsValid(vardata->statsTuple)); + + sortop = get_opfamily_member(index->opfamily[0], + index->opcintype[0], + index->opcintype[0], + BTLessStrategyNumber); + if (OidIsValid(sortop) && + get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_CORRELATION, sortop, + ATTSTATSSLOT_NUMBERS)) + { + double varCorrelation; + + Assert(sslot.nnumbers == 1); + varCorrelation = sslot.numbers[0]; + + if (index->reverse_sort[0]) + varCorrelation = -varCorrelation; + + if (index->nkeycolumns > 1) + indexCorrelation = varCorrelation * 0.75; + else + indexCorrelation = varCorrelation; + + free_attstatsslot(&sslot); + } + + return indexCorrelation; +} void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, @@ -7016,17 +7153,19 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { IndexOptInfo *index = path->indexinfo; GenericCosts costs = {0}; - Oid relid; - AttrNumber colnum; VariableStatData vardata = {0}; double numIndexTuples; Cost descentCost; List *indexBoundQuals; + List *indexSkipQuals; int indexcol; bool eqQualHere; - bool found_saop; + bool found_row_compare; + bool found_array; bool found_is_null_op; + bool have_correlation = false; double num_sa_scans; + double correlation = 0.0; ListCell *lc; /* @@ -7037,19 +7176,24 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * it's OK to count them in indexSelectivity, but they should not count * for estimating numIndexTuples. So we must examine the given indexquals * to find out which ones count as boundary quals. We rely on the - * knowledge that they are given in index column order. + * knowledge that they are given in index column order. Note that nbtree + * preprocessing can add skip arrays that act as leading '=' quals in the + * absence of ordinary input '=' quals, so in practice _most_ input quals + * are able to act as index bound quals (which we take into account here). * * For a RowCompareExpr, we consider only the first column, just as * rowcomparesel() does. * - * If there's a ScalarArrayOpExpr in the quals, we'll actually perform up - * to N index descents (not just one), but the ScalarArrayOpExpr's + * If there's a SAOP or skip array in the quals, we'll actually perform up + * to N index descents (not just one), but the underlying array key's * operator can be considered to act the same as it normally does. */ indexBoundQuals = NIL; + indexSkipQuals = NIL; indexcol = 0; eqQualHere = false; - found_saop = false; + found_row_compare = false; + found_array = false; found_is_null_op = false; num_sa_scans = 1; foreach(lc, path->indexclauses) @@ -7057,17 +7201,203 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, IndexClause *iclause = lfirst_node(IndexClause, lc); ListCell *lc2; - if (indexcol != iclause->indexcol) + if (indexcol < iclause->indexcol) { - /* Beginning of a new column's quals */ - if (!eqQualHere) - break; /* done if no '=' qual for indexcol */ + double num_sa_scans_prev_cols = num_sa_scans; + + /* + * Beginning of a new column's quals. + * + * Skip scans use skip arrays, which are ScalarArrayOp style + * arrays that generate their elements procedurally and on demand. + * Given a multi-column index on "(a, b)", and an SQL WHERE clause + * "WHERE b = 42", a skip scan will effectively use an indexqual + * "WHERE a = ANY('{every col a value}') AND b = 42". (Obviously, + * the array on "a" must also return "IS NULL" matches, since our + * WHERE clause used no strict operator on "a"). + * + * Here we consider how nbtree will backfill skip arrays for any + * index columns that lacked an '=' qual. This maintains our + * num_sa_scans estimate, and determines if this new column (the + * "iclause->indexcol" column, not the prior "indexcol" column) + * can have its RestrictInfos/quals added to indexBoundQuals. + * + * We'll need to handle columns that have inequality quals, where + * the skip array generates values from a range constrained by the + * quals (not every possible value). We've been maintaining + * indexSkipQuals to help with this; it will now contain all of + * the prior column's quals (that is, indexcol's quals) when they + * might be used for this. + */ + if (found_row_compare) + { + /* + * Skip arrays can't be added after a RowCompare input qual + * due to limitations in nbtree + */ + break; + } + if (eqQualHere) + { + /* + * Don't need to add a skip array for an indexcol that already + * has an '=' qual/equality constraint + */ + indexcol++; + indexSkipQuals = NIL; + } eqQualHere = false; - indexcol++; + + while (indexcol < iclause->indexcol) + { + double ndistinct; + bool isdefault = true; + + found_array = true; + + /* + * A skipped attribute's ndistinct forms the basis of our + * estimate of the total number of "array elements" used by + * its skip array at runtime. Look that up first. + */ + examine_indexcol_variable(root, index, indexcol, &vardata); + ndistinct = get_variable_numdistinct(&vardata, &isdefault); + + if (indexcol == 0) + { + /* + * Get an estimate of the leading column's correlation in + * passing (avoids rereading variable stats below) + */ + if (HeapTupleIsValid(vardata.statsTuple)) + correlation = btcost_correlation(index, &vardata); + have_correlation = true; + } + + ReleaseVariableStats(vardata); + + /* + * If ndistinct is a default estimate, conservatively assume + * that no skipping will happen at runtime + */ + if (isdefault) + { + num_sa_scans = num_sa_scans_prev_cols; + break; /* done building indexBoundQuals */ + } + + /* + * Apply indexcol's indexSkipQuals selectivity to ndistinct + */ + if (indexSkipQuals != NIL) + { + List *partialSkipQuals; + Selectivity ndistinctfrac; + + /* + * If the index is partial, AND the index predicate with + * the index-bound quals to produce a more accurate idea + * of the number of distinct values for prior indexcol + */ + partialSkipQuals = add_predicate_to_index_quals(index, + indexSkipQuals); + + ndistinctfrac = clauselist_selectivity(root, partialSkipQuals, + index->rel->relid, + JOIN_INNER, + NULL); + + /* + * If ndistinctfrac is selective (on its own), the scan is + * unlikely to benefit from repositioning itself using + * later quals. Do not allow iclause->indexcol's quals to + * be added to indexBoundQuals (it would increase descent + * costs, without lowering numIndexTuples costs by much). + */ + if (ndistinctfrac < DEFAULT_RANGE_INEQ_SEL) + { + num_sa_scans = num_sa_scans_prev_cols; + break; /* done building indexBoundQuals */ + } + + /* Adjust ndistinct downward */ + ndistinct = rint(ndistinct * ndistinctfrac); + ndistinct = Max(ndistinct, 1); + } + + /* + * When there's no inequality quals, account for the need to + * find an initial value by counting -inf/+inf as a value. + * + * We don't charge anything extra for possible next/prior key + * index probes, which are sometimes used to find the next + * valid skip array element (ahead of using the located + * element value to relocate the scan to the next position + * that might contain matching tuples). It seems hard to do + * better here. Use of the skip support infrastructure often + * avoids most next/prior key probes. But even when it can't, + * there's a decent chance that most individual next/prior key + * probes will locate a leaf page whose key space overlaps all + * of the scan's keys (even the lower-order keys) -- which + * also avoids the need for a separate, extra index descent. + * Note also that these probes are much cheaper than non-probe + * primitive index scans: they're reliably very selective. + */ + if (indexSkipQuals == NIL) + ndistinct += 1; + + /* + * Update num_sa_scans estimate by multiplying by ndistinct. + * + * We make the pessimistic assumption that there is no + * naturally occurring cross-column correlation. This is + * often wrong, but it seems best to err on the side of not + * expecting skipping to be helpful... + */ + num_sa_scans *= ndistinct; + + /* + * ...but back out of adding this latest group of 1 or more + * skip arrays when num_sa_scans exceeds the total number of + * index pages (revert to num_sa_scans from before indexcol). + * This causes a sharp discontinuity in cost (as a function of + * the indexcol's ndistinct), but that is representative of + * actual runtime costs. + * + * Note that skipping is helpful when each primitive index + * scan only manages to skip over 1 or 2 irrelevant leaf pages + * on average. Skip arrays bring savings in CPU costs due to + * the scan not needing to evaluate indexquals against every + * tuple, which can greatly exceed any savings in I/O costs. + * This test is a test of whether num_sa_scans implies that + * we're past the point where the ability to skip ceases to + * lower the scan's costs (even qual evaluation CPU costs). + */ + if (index->pages < num_sa_scans) + { + num_sa_scans = num_sa_scans_prev_cols; + break; /* done building indexBoundQuals */ + } + + indexcol++; + indexSkipQuals = NIL; + } + + /* + * Finished considering the need to add skip arrays to bridge an + * initial eqQualHere gap between the old and new index columns + * (or there was no initial eqQualHere gap in the first place). + * + * If an initial gap could not be bridged, then new column's quals + * (i.e. iclause->indexcol's quals) won't go into indexBoundQuals, + * and so won't affect our final numIndexTuples estimate. + */ if (indexcol != iclause->indexcol) - break; /* no quals at all for indexcol */ + break; /* done building indexBoundQuals */ } + Assert(indexcol == iclause->indexcol); + /* Examine each indexqual associated with this index clause */ foreach(lc2, iclause->indexquals) { @@ -7087,6 +7417,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, RowCompareExpr *rc = (RowCompareExpr *) clause; clause_op = linitial_oid(rc->opnos); + found_row_compare = true; } else if (IsA(clause, ScalarArrayOpExpr)) { @@ -7095,7 +7426,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, double alength = estimate_array_length(root, other_operand); clause_op = saop->opno; - found_saop = true; + found_array = true; /* estimate SA descents by indexBoundQuals only */ if (alength > 1) num_sa_scans *= alength; @@ -7107,7 +7438,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, if (nt->nulltesttype == IS_NULL) { found_is_null_op = true; - /* IS NULL is like = for selectivity purposes */ + /* IS NULL is like = for selectivity/skip scan purposes */ eqQualHere = true; } } @@ -7126,19 +7457,28 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, } indexBoundQuals = lappend(indexBoundQuals, rinfo); + + /* + * We apply inequality selectivities to estimate index descent + * costs with scans that use skip arrays. Save this indexcol's + * RestrictInfos if it looks like they'll be needed for that. + */ + if (!eqQualHere && !found_row_compare && + indexcol < index->nkeycolumns - 1) + indexSkipQuals = lappend(indexSkipQuals, rinfo); } } /* * If index is unique and we found an '=' clause for each column, we can * just assume numIndexTuples = 1 and skip the expensive - * clauselist_selectivity calculations. However, a ScalarArrayOp or - * NullTest invalidates that theory, even though it sets eqQualHere. + * clauselist_selectivity calculations. However, an array or NullTest + * always invalidates that theory (even when eqQualHere has been set). */ if (index->unique && indexcol == index->nkeycolumns - 1 && eqQualHere && - !found_saop && + !found_array && !found_is_null_op) numIndexTuples = 1.0; else @@ -7160,7 +7500,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, numIndexTuples = btreeSelectivity * index->rel->tuples; /* - * btree automatically combines individual ScalarArrayOpExpr primitive + * btree automatically combines individual array element primitive * index scans whenever the tuples covered by the next set of array * keys are close to tuples covered by the current set. That puts a * natural ceiling on the worst case number of descents -- there @@ -7178,16 +7518,18 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * of leaf pages (we make it 1/3 the total number of pages instead) to * give the btree code credit for its ability to continue on the leaf * level with low selectivity scans. + * + * Note: num_sa_scans includes both ScalarArrayOp array elements and + * skip array elements whose qual affects our numIndexTuples estimate. */ num_sa_scans = Min(num_sa_scans, ceil(index->pages * 0.3333333)); num_sa_scans = Max(num_sa_scans, 1); /* - * As in genericcostestimate(), we have to adjust for any - * ScalarArrayOpExpr quals included in indexBoundQuals, and then round - * to integer. + * As in genericcostestimate(), we have to adjust for any array quals + * included in indexBoundQuals, and then round to integer. * - * It is tempting to make genericcostestimate behave as if SAOP + * It is tempting to make genericcostestimate behave as if array * clauses work in almost the same way as scalar operators during * btree scans, making the top-level scan look like a continuous scan * (as opposed to num_sa_scans-many primitive index scans). After @@ -7220,7 +7562,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * comparisons to descend a btree of N leaf tuples. We charge one * cpu_operator_cost per comparison. * - * If there are ScalarArrayOpExprs, charge this once per estimated SA + * If there are SAOP or skip array keys, charge this once per estimated * index descent. The ones after the first one are not startup cost so * far as the overall plan goes, so just add them to "total" cost. */ @@ -7240,110 +7582,25 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page * touched. The number of such pages is btree tree height plus one (ie, * we charge for the leaf page too). As above, charge once per estimated - * SA index descent. + * SAOP/skip array descent. */ descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; - /* - * If we can get an estimate of the first column's ordering correlation C - * from pg_statistic, estimate the index correlation as C for a - * single-column index, or C * 0.75 for multiple columns. (The idea here - * is that multiple columns dilute the importance of the first column's - * ordering, but don't negate it entirely. Before 8.0 we divided the - * correlation by the number of columns, but that seems too strong.) - */ - if (index->indexkeys[0] != 0) + if (!have_correlation) { - /* Simple variable --- look to stats for the underlying table */ - RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); - - Assert(rte->rtekind == RTE_RELATION); - relid = rte->relid; - Assert(relid != InvalidOid); - colnum = index->indexkeys[0]; - - if (get_relation_stats_hook && - (*get_relation_stats_hook) (root, rte, colnum, &vardata)) - { - /* - * The hook took control of acquiring a stats tuple. If it did - * supply a tuple, it'd better have supplied a freefunc. - */ - if (HeapTupleIsValid(vardata.statsTuple) && - !vardata.freefunc) - elog(ERROR, "no function provided to release variable stats with"); - } - else - { - vardata.statsTuple = SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(relid), - Int16GetDatum(colnum), - BoolGetDatum(rte->inh)); - vardata.freefunc = ReleaseSysCache; - } + examine_indexcol_variable(root, index, 0, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + costs.indexCorrelation = btcost_correlation(index, &vardata); + ReleaseVariableStats(vardata); } else { - /* Expression --- maybe there are stats for the index itself */ - relid = index->indexoid; - colnum = 1; - - if (get_index_stats_hook && - (*get_index_stats_hook) (root, relid, colnum, &vardata)) - { - /* - * The hook took control of acquiring a stats tuple. If it did - * supply a tuple, it'd better have supplied a freefunc. - */ - if (HeapTupleIsValid(vardata.statsTuple) && - !vardata.freefunc) - elog(ERROR, "no function provided to release variable stats with"); - } - else - { - vardata.statsTuple = SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(relid), - Int16GetDatum(colnum), - BoolGetDatum(false)); - vardata.freefunc = ReleaseSysCache; - } + /* btcost_correlation already called earlier on */ + costs.indexCorrelation = correlation; } - if (HeapTupleIsValid(vardata.statsTuple)) - { - Oid sortop; - AttStatsSlot sslot; - - sortop = get_opfamily_member(index->opfamily[0], - index->opcintype[0], - index->opcintype[0], - BTLessStrategyNumber); - if (OidIsValid(sortop) && - get_attstatsslot(&sslot, vardata.statsTuple, - STATISTIC_KIND_CORRELATION, sortop, - ATTSTATSSLOT_NUMBERS)) - { - double varCorrelation; - - Assert(sslot.nnumbers == 1); - varCorrelation = sslot.numbers[0]; - - if (index->reverse_sort[0]) - varCorrelation = -varCorrelation; - - if (index->nkeycolumns > 1) - costs.indexCorrelation = varCorrelation * 0.75; - else - costs.indexCorrelation = varCorrelation; - - free_attstatsslot(&sslot); - } - } - - ReleaseVariableStats(vardata); - *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; diff --git a/src/backend/utils/adt/skipsupport.c b/src/backend/utils/adt/skipsupport.c new file mode 100644 index 00000000000..2bd35d2d272 --- /dev/null +++ b/src/backend/utils/adt/skipsupport.c @@ -0,0 +1,61 @@ +/*------------------------------------------------------------------------- + * + * skipsupport.c + * Support routines for B-Tree skip scan. + * + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/skipsupport.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/nbtree.h" +#include "utils/lsyscache.h" +#include "utils/skipsupport.h" + +/* + * Fill in SkipSupport given an operator class (opfamily + opcintype). + * + * On success, returns skip support struct, allocating in caller's memory + * context. Otherwise returns NULL, indicating that operator class has no + * skip support function. + */ +SkipSupport +PrepareSkipSupportFromOpclass(Oid opfamily, Oid opcintype, bool reverse) +{ + Oid skipSupportFunction; + SkipSupport sksup; + + /* Look for a skip support function */ + skipSupportFunction = get_opfamily_proc(opfamily, opcintype, opcintype, + BTSKIPSUPPORT_PROC); + if (!OidIsValid(skipSupportFunction)) + return NULL; + + sksup = palloc(sizeof(SkipSupportData)); + OidFunctionCall1(skipSupportFunction, PointerGetDatum(sksup)); + + if (reverse) + { + /* + * DESC/reverse case: swap low_elem with high_elem, and swap decrement + * with increment + */ + Datum low_elem = sksup->low_elem; + SkipSupportIncDec decrement = sksup->decrement; + + sksup->low_elem = sksup->high_elem; + sksup->decrement = sksup->increment; + + sksup->high_elem = low_elem; + sksup->increment = decrement; + } + + return sksup; +} diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 9682f9dbdca..347089b7626 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -37,6 +37,7 @@ #include "utils/datetime.h" #include "utils/float.h" #include "utils/numeric.h" +#include "utils/skipsupport.h" #include "utils/sortsupport.h" /* @@ -2304,6 +2305,53 @@ timestamp_sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +/* note: this is used for timestamptz also */ +static Datum +timestamp_decrement(Relation rel, Datum existing, bool *underflow) +{ + Timestamp texisting = DatumGetTimestamp(existing); + + if (texisting == PG_INT64_MIN) + { + /* return value is undefined */ + *underflow = true; + return (Datum) 0; + } + + *underflow = false; + return TimestampGetDatum(texisting - 1); +} + +/* note: this is used for timestamptz also */ +static Datum +timestamp_increment(Relation rel, Datum existing, bool *overflow) +{ + Timestamp texisting = DatumGetTimestamp(existing); + + if (texisting == PG_INT64_MAX) + { + /* return value is undefined */ + *overflow = true; + return (Datum) 0; + } + + *overflow = false; + return TimestampGetDatum(texisting + 1); +} + +Datum +timestamp_skipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + + sksup->decrement = timestamp_decrement; + sksup->increment = timestamp_increment; + sksup->low_elem = TimestampGetDatum(PG_INT64_MIN); + sksup->high_elem = TimestampGetDatum(PG_INT64_MAX); + + PG_RETURN_VOID(); +} + Datum timestamp_hash(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index be0f0f9f1ce..bce7309c183 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -13,6 +13,7 @@ #include "postgres.h" +#include <limits.h> #include <time.h> /* for clock_gettime() */ #include "common/hashfn.h" @@ -21,6 +22,7 @@ #include "port/pg_bswap.h" #include "utils/fmgrprotos.h" #include "utils/guc.h" +#include "utils/skipsupport.h" #include "utils/sortsupport.h" #include "utils/timestamp.h" #include "utils/uuid.h" @@ -418,6 +420,74 @@ uuid_abbrev_convert(Datum original, SortSupport ssup) return res; } +static Datum +uuid_decrement(Relation rel, Datum existing, bool *underflow) +{ + pg_uuid_t *uuid; + + uuid = (pg_uuid_t *) palloc(UUID_LEN); + memcpy(uuid, DatumGetUUIDP(existing), UUID_LEN); + for (int i = UUID_LEN - 1; i >= 0; i--) + { + if (uuid->data[i] > 0) + { + uuid->data[i]--; + *underflow = false; + return UUIDPGetDatum(uuid); + } + uuid->data[i] = UCHAR_MAX; + } + + pfree(uuid); /* cannot leak memory */ + + /* return value is undefined */ + *underflow = true; + return (Datum) 0; +} + +static Datum +uuid_increment(Relation rel, Datum existing, bool *overflow) +{ + pg_uuid_t *uuid; + + uuid = (pg_uuid_t *) palloc(UUID_LEN); + memcpy(uuid, DatumGetUUIDP(existing), UUID_LEN); + for (int i = UUID_LEN - 1; i >= 0; i--) + { + if (uuid->data[i] < UCHAR_MAX) + { + uuid->data[i]++; + *overflow = false; + return UUIDPGetDatum(uuid); + } + uuid->data[i] = 0; + } + + pfree(uuid); /* cannot leak memory */ + + /* return value is undefined */ + *overflow = true; + return (Datum) 0; +} + +Datum +uuid_skipsupport(PG_FUNCTION_ARGS) +{ + SkipSupport sksup = (SkipSupport) PG_GETARG_POINTER(0); + pg_uuid_t *uuid_min = palloc(UUID_LEN); + pg_uuid_t *uuid_max = palloc(UUID_LEN); + + memset(uuid_min->data, 0x00, UUID_LEN); + memset(uuid_max->data, 0xFF, UUID_LEN); + + sksup->decrement = uuid_decrement; + sksup->increment = uuid_increment; + sksup->low_elem = UUIDPGetDatum(uuid_min); + sksup->high_elem = UUIDPGetDatum(uuid_max); + + PG_RETURN_VOID(); +} + /* hash index support */ Datum uuid_hash(PG_FUNCTION_ARGS) diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index c4a0737731f..52916bab7a3 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -214,7 +214,8 @@ typedef void (*amrestrpos_function) (IndexScanDesc scan); */ /* estimate size of parallel scan descriptor */ -typedef Size (*amestimateparallelscan_function) (int nkeys, int norderbys); +typedef Size (*amestimateparallelscan_function) (Relation indexRelation, + int nkeys, int norderbys); /* prepare for parallel index scan */ typedef void (*aminitparallelscan_function) (void *target); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index faabcb78e7b..9d9b76d5b48 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -24,6 +24,7 @@ #include "lib/stringinfo.h" #include "storage/bufmgr.h" #include "storage/shm_toc.h" +#include "utils/skipsupport.h" /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */ typedef uint16 BTCycleId; @@ -707,6 +708,10 @@ BTreeTupleGetMaxHeapTID(IndexTuple itup) * (BTOPTIONS_PROC). These procedures define a set of user-visible * parameters that can be used to control operator class behavior. None of * the built-in B-Tree operator classes currently register an "options" proc. + * + * To facilitate more efficient B-Tree skip scans, an operator class may + * choose to offer a sixth amproc procedure (BTSKIPSUPPORT_PROC). For full + * details, see src/include/utils/skipsupport.h. */ #define BTORDER_PROC 1 @@ -714,7 +719,8 @@ BTreeTupleGetMaxHeapTID(IndexTuple itup) #define BTINRANGE_PROC 3 #define BTEQUALIMAGE_PROC 4 #define BTOPTIONS_PROC 5 -#define BTNProcs 5 +#define BTSKIPSUPPORT_PROC 6 +#define BTNProcs 6 /* * We need to be able to tell the difference between read and write @@ -1027,10 +1033,21 @@ typedef BTScanPosData *BTScanPos; /* We need one of these for each equality-type SK_SEARCHARRAY scan key */ typedef struct BTArrayKeyInfo { + /* fields set for both kinds of array (SAOP arrays and skip arrays) */ int scan_key; /* index of associated key in keyData */ - int cur_elem; /* index of current element in elem_values */ - int num_elems; /* number of elems in current array value */ + int num_elems; /* number of elems (-1 means skip array) */ + + /* fields set for ScalarArrayOpExpr arrays only */ Datum *elem_values; /* array of num_elems Datums */ + int cur_elem; /* index of current element in elem_values */ + + /* fields set for skip arrays only */ + int16 attlen; /* attr's length, in bytes */ + bool attbyval; /* attr's FormData_pg_attribute.attbyval */ + bool null_elem; /* NULL is lowest/highest element? */ + SkipSupport sksup; /* skip support (NULL if opclass lacks it) */ + ScanKey low_compare; /* array's > or >= lower bound */ + ScanKey high_compare; /* array's < or <= upper bound */ } BTArrayKeyInfo; typedef struct BTScanOpaqueData @@ -1119,6 +1136,15 @@ typedef struct BTReadPageState */ #define SK_BT_REQFWD 0x00010000 /* required to continue forward scan */ #define SK_BT_REQBKWD 0x00020000 /* required to continue backward scan */ +#define SK_BT_SKIP 0x00040000 /* skip array on column without input = */ + +/* SK_BT_SKIP-only flags (set and unset by array advancement) */ +#define SK_BT_MINVAL 0x00080000 /* invalid sk_argument, use low_compare */ +#define SK_BT_MAXVAL 0x00100000 /* invalid sk_argument, use high_compare */ +#define SK_BT_NEXT 0x00200000 /* positions the scan > sk_argument */ +#define SK_BT_PRIOR 0x00400000 /* positions the scan < sk_argument */ + +/* Remaps pg_index flag bits to uppermost SK_BT_* byte */ #define SK_BT_INDOPTION_SHIFT 24 /* must clear the above bits */ #define SK_BT_DESC (INDOPTION_DESC << SK_BT_INDOPTION_SHIFT) #define SK_BT_NULLS_FIRST (INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT) @@ -1165,7 +1191,7 @@ extern bool btinsert(Relation rel, Datum *values, bool *isnull, bool indexUnchanged, struct IndexInfo *indexInfo); extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys); -extern Size btestimateparallelscan(int nkeys, int norderbys); +extern Size btestimateparallelscan(Relation rel, int nkeys, int norderbys); extern void btinitparallelscan(void *target); extern bool btgettuple(IndexScanDesc scan, ScanDirection dir); extern int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 950fda777fb..208936962ef 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202504031 +#define CATALOG_VERSION_NO 202504041 #endif diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 41056171059..92505148998 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -22,6 +22,8 @@ { amprocfamily => 'btree/bool_ops', amproclefttype => 'bool', amprocrighttype => 'bool', amprocnum => '1', amproc => 'btboolcmp' }, { amprocfamily => 'btree/bool_ops', amproclefttype => 'bool', + amprocrighttype => 'bool', amprocnum => '6', amproc => 'btboolskipsupport' }, +{ amprocfamily => 'btree/bool_ops', amproclefttype => 'bool', amprocrighttype => 'bool', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/bpchar_ops', amproclefttype => 'bpchar', amprocrighttype => 'bpchar', amprocnum => '1', amproc => 'bpcharcmp' }, @@ -41,6 +43,8 @@ amprocrighttype => 'char', amprocnum => '1', amproc => 'btcharcmp' }, { amprocfamily => 'btree/char_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '4', amproc => 'btequalimage' }, +{ amprocfamily => 'btree/char_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '6', amproc => 'btcharskipsupport' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', amprocrighttype => 'date', amprocnum => '1', amproc => 'date_cmp' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', @@ -48,6 +52,8 @@ { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', amprocrighttype => 'date', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '6', amproc => 'date_skipsupport' }, +{ amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', amprocrighttype => 'timestamp', amprocnum => '1', amproc => 'date_cmp_timestamp' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', @@ -61,6 +67,9 @@ { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', amprocrighttype => 'timestamp', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '6', + amproc => 'timestamp_skipsupport' }, +{ amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', amprocrighttype => 'date', amprocnum => '1', amproc => 'timestamp_cmp_date' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', amprocrighttype => 'timestamptz', amprocnum => '1', @@ -75,6 +84,9 @@ amprocrighttype => 'timestamptz', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '6', + amproc => 'timestamp_skipsupport' }, +{ amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamptz', amprocrighttype => 'date', amprocnum => '1', amproc => 'timestamptz_cmp_date' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamptz', @@ -123,6 +135,8 @@ { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', amprocrighttype => 'int2', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '6', amproc => 'btint2skipsupport' }, +{ amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', amprocrighttype => 'int4', amprocnum => '1', amproc => 'btint24cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', amprocrighttype => 'int8', amprocnum => '1', amproc => 'btint28cmp' }, @@ -142,6 +156,8 @@ { amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', amprocrighttype => 'int4', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '6', amproc => 'btint4skipsupport' }, +{ amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', amprocrighttype => 'int8', amprocnum => '1', amproc => 'btint48cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', amprocrighttype => 'int2', amprocnum => '1', amproc => 'btint42cmp' }, @@ -161,6 +177,8 @@ { amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', amprocrighttype => 'int8', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '6', amproc => 'btint8skipsupport' }, +{ amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', amprocrighttype => 'int4', amprocnum => '1', amproc => 'btint84cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', amprocrighttype => 'int2', amprocnum => '1', amproc => 'btint82cmp' }, @@ -193,6 +211,8 @@ amprocrighttype => 'oid', amprocnum => '2', amproc => 'btoidsortsupport' }, { amprocfamily => 'btree/oid_ops', amproclefttype => 'oid', amprocrighttype => 'oid', amprocnum => '4', amproc => 'btequalimage' }, +{ amprocfamily => 'btree/oid_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '6', amproc => 'btoidskipsupport' }, { amprocfamily => 'btree/oidvector_ops', amproclefttype => 'oidvector', amprocrighttype => 'oidvector', amprocnum => '1', amproc => 'btoidvectorcmp' }, @@ -261,6 +281,8 @@ amprocrighttype => 'uuid', amprocnum => '2', amproc => 'uuid_sortsupport' }, { amprocfamily => 'btree/uuid_ops', amproclefttype => 'uuid', amprocrighttype => 'uuid', amprocnum => '4', amproc => 'btequalimage' }, +{ amprocfamily => 'btree/uuid_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '6', amproc => 'uuid_skipsupport' }, { amprocfamily => 'btree/record_ops', amproclefttype => 'record', amprocrighttype => 'record', amprocnum => '1', amproc => 'btrecordcmp' }, { amprocfamily => 'btree/record_image_ops', amproclefttype => 'record', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index a28a15993a2..5d5be8ba4e1 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1004,18 +1004,27 @@ { oid => '3129', descr => 'sort support', proname => 'btint2sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint2sortsupport' }, +{ oid => '9290', descr => 'skip support', + proname => 'btint2skipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'btint2skipsupport' }, { oid => '351', descr => 'less-equal-greater', proname => 'btint4cmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'int4 int4', prosrc => 'btint4cmp' }, { oid => '3130', descr => 'sort support', proname => 'btint4sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint4sortsupport' }, +{ oid => '9291', descr => 'skip support', + proname => 'btint4skipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'btint4skipsupport' }, { oid => '842', descr => 'less-equal-greater', proname => 'btint8cmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'int8 int8', prosrc => 'btint8cmp' }, { oid => '3131', descr => 'sort support', proname => 'btint8sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint8sortsupport' }, +{ oid => '9292', descr => 'skip support', + proname => 'btint8skipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'btint8skipsupport' }, { oid => '354', descr => 'less-equal-greater', proname => 'btfloat4cmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'float4 float4', prosrc => 'btfloat4cmp' }, @@ -1034,12 +1043,18 @@ { oid => '3134', descr => 'sort support', proname => 'btoidsortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btoidsortsupport' }, +{ oid => '9293', descr => 'skip support', + proname => 'btoidskipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'btoidskipsupport' }, { oid => '404', descr => 'less-equal-greater', proname => 'btoidvectorcmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'oidvector oidvector', prosrc => 'btoidvectorcmp' }, { oid => '358', descr => 'less-equal-greater', proname => 'btcharcmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'char char', prosrc => 'btcharcmp' }, +{ oid => '9294', descr => 'skip support', + proname => 'btcharskipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'btcharskipsupport' }, { oid => '359', descr => 'less-equal-greater', proname => 'btnamecmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'name name', prosrc => 'btnamecmp' }, @@ -2300,6 +2315,9 @@ { oid => '3136', descr => 'sort support', proname => 'date_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'date_sortsupport' }, +{ oid => '9295', descr => 'skip support', + proname => 'date_skipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'date_skipsupport' }, { oid => '4133', descr => 'window RANGE support', proname => 'in_range', prorettype => 'bool', proargtypes => 'date date interval bool bool', @@ -4497,6 +4515,9 @@ { oid => '1693', descr => 'less-equal-greater', proname => 'btboolcmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'bool bool', prosrc => 'btboolcmp' }, +{ oid => '9296', descr => 'skip support', + proname => 'btboolskipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'btboolskipsupport' }, { oid => '1688', descr => 'hash', proname => 'time_hash', prorettype => 'int4', proargtypes => 'time', @@ -6376,6 +6397,9 @@ { oid => '3137', descr => 'sort support', proname => 'timestamp_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'timestamp_sortsupport' }, +{ oid => '9297', descr => 'skip support', + proname => 'timestamp_skipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'timestamp_skipsupport' }, { oid => '4134', descr => 'window RANGE support', proname => 'in_range', prorettype => 'bool', @@ -9431,6 +9455,9 @@ { oid => '3300', descr => 'sort support', proname => 'uuid_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'uuid_sortsupport' }, +{ oid => '9298', descr => 'skip support', + proname => 'uuid_skipsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'uuid_skipsupport' }, { oid => '2961', descr => 'I/O', proname => 'uuid_recv', prorettype => 'uuid', proargtypes => 'internal', prosrc => 'uuid_recv' }, diff --git a/src/include/utils/skipsupport.h b/src/include/utils/skipsupport.h new file mode 100644 index 00000000000..bc51847cf61 --- /dev/null +++ b/src/include/utils/skipsupport.h @@ -0,0 +1,98 @@ +/*------------------------------------------------------------------------- + * + * skipsupport.h + * Support routines for B-Tree skip scan. + * + * B-Tree operator classes for discrete types can optionally provide a support + * function for skipping. This is used during skip scans. + * + * A B-tree operator class that implements skip support provides B-tree index + * scans with a way of enumerating and iterating through every possible value + * from the domain of indexable values. This gives scans a way to determine + * the next value in line for a given skip array/scan key/skipped attribute. + * Scans request the next (or previous) value whenever they run out of tuples + * matching the skip array's current element value. The next (or previous) + * value can be used to relocate the scan; it is applied in combination with + * at least one additional lower-order non-skip key, taken from the query. + * + * Skip support is used by discrete type (e.g., integer and date) opclasses. + * Indexes with an attribute whose input opclass is of one of these types tend + * to store adjacent values in adjoining groups of index tuples. Each time a + * skip scan with skip support successfully guesses that the next value in the + * index (for a given skipped column) is indeed the value that skip support + * just incremented its skip array to, it will have saved the scan some work. + * The scan will have avoided an index probe that directly finds the next + * value that appears in the index. (When skip support guesses wrong, then it + * won't have saved any work, but it also won't have added any useless work. + * The failed attempt to locate exactly-matching index tuples acts just like + * an explicit probe would; it'll still find the index's true next value.) + * + * It usually isn't feasible to implement skip support for an opclass whose + * input type is continuous. The B-Tree code falls back on next-key sentinel + * values for any opclass that doesn't provide its own skip support function. + * This isn't really an implementation restriction; there is no benefit to + * providing skip support for an opclass where guessing that the next indexed + * value is the next possible indexable value never (or hardly ever) works out. + * + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/utils/skipsupport.h + * + *------------------------------------------------------------------------- + */ +#ifndef SKIPSUPPORT_H +#define SKIPSUPPORT_H + +#include "utils/relcache.h" + +typedef struct SkipSupportData *SkipSupport; +typedef Datum (*SkipSupportIncDec) (Relation rel, + Datum existing, + bool *overflow); + +/* + * State/callbacks used by skip arrays to procedurally generate elements. + * + * A BTSKIPSUPPORT_PROC function must set each and every field when called + * (there are no optional fields). + */ +typedef struct SkipSupportData +{ + /* + * low_elem and high_elem must be set with the lowest and highest possible + * values from the domain of indexable values (assuming ascending order) + */ + Datum low_elem; /* lowest sorting/leftmost non-NULL value */ + Datum high_elem; /* highest sorting/rightmost non-NULL value */ + + /* + * Decrement/increment functions. + * + * Returns a decremented/incremented copy of caller's existing datum, + * allocated in caller's memory context (for pass-by-reference types). + * It's not okay for these functions to leak any memory. + * + * When the decrement function (or increment function) is called with a + * value that already matches low_elem (or high_elem), function must set + * the *overflow argument. The return value is treated as undefined by + * the B-Tree code; it shouldn't need to be (and won't be) pfree'd. + * + * The B-Tree code's "existing" datum argument is often just a straight + * copy of a value from an index tuple. Operator classes must accept + * every possible representational variation within the underlying type. + * On the other hand, opclasses are _not_ required to preserve information + * that doesn't affect how datums are sorted (e.g., skip support for a + * fixed precision numeric type needn't preserve datum display scale). + * Operator class decrement/increment functions will never be called with + * a NULL "existing" argument, either. + */ + SkipSupportIncDec decrement; + SkipSupportIncDec increment; +} SkipSupportData; + +extern SkipSupport PrepareSkipSupportFromOpclass(Oid opfamily, Oid opcintype, + bool reverse); + +#endif /* SKIPSUPPORT_H */ diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index 0c274d56a04..23bf33f10a9 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -362,9 +362,9 @@ ERROR: invalid operator number 0, must be between 1 and 5 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types ERROR: operator argument types must be specified in ALTER OPERATOR FAMILY ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- invalid options parsing function -ERROR: invalid function number 0, must be between 1 and 5 -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 6 btint42cmp(int4, int2); -- function number should be between 1 and 5 -ERROR: invalid function number 6, must be between 1 and 5 +ERROR: invalid function number 0, must be between 1 and 6 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 7 btint42cmp(int4, int2); -- function number should be between 1 and 6 +ERROR: invalid function number 7, must be between 1 and 6 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD STORAGE invalid_storage; -- Ensure STORAGE is not a part of ALTER OPERATOR FAMILY ERROR: STORAGE cannot be specified in ALTER OPERATOR FAMILY DROP OPERATOR FAMILY alt_opf4 USING btree; @@ -505,6 +505,10 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD ALTER OPERATOR FAMILY alt_opf18 USING btree ADD FUNCTION 4 (int4, int2) btequalimage(oid); ERROR: ordering equal image functions must not be cross-type +-- Should fail. Not allowed to have cross-type skip support function. +ALTER OPERATOR FAMILY alt_opf18 USING btree + ADD FUNCTION 6 (int4, int2) btint4skipsupport(internal); +ERROR: btree skip support functions must not be cross-type ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); ERROR: function 2(integer,integer) does not exist in operator family "alt_opf18" DROP OPERATOR FAMILY alt_opf18 USING btree; diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index 8879554c3f7..bfb1a286ea4 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -581,6 +581,47 @@ alter table btree_tall_tbl alter COLUMN t set storage plain; create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10); insert into btree_tall_tbl select g, repeat('x', 250) from generate_series(1, 130) g; +insert into btree_tall_tbl select g, NULL +from generate_series(50, 60) g; +-- +-- Test for skip scan with type that lacks skip support (text) +-- +set enable_seqscan to false; +set enable_bitmapscan to false; +-- Forwards scan +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t, id; + id +---- + 55 + 55 +(2 rows) + +explain (costs off) +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t, id; + QUERY PLAN +-------------------------------------------------------- + Index Only Scan using btree_tall_idx on btree_tall_tbl + Index Cond: (id = 55) +(2 rows) + +-- Backwards scan +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t DESC, id DESC; + id +---- + 55 + 55 +(2 rows) + +explain (costs off) +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t DESC, id DESC; + QUERY PLAN +----------------------------------------------------------------- + Index Only Scan Backward using btree_tall_idx on btree_tall_tbl + Index Cond: (id = 55) +(2 rows) + +reset enable_seqscan; +reset enable_bitmapscan; -- -- Test for multilevel page deletion -- diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 15be0043ad4..2cfb26699be 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1637,7 +1637,9 @@ DROP TABLE syscol_table; -- Tests for IS NULL/IS NOT NULL with b-tree indexes -- CREATE TABLE onek_with_null AS SELECT unique1, unique2 FROM onek; -INSERT INTO onek_with_null (unique1,unique2) VALUES (NULL, -1), (NULL, NULL); +INSERT INTO onek_with_null(unique1, unique2) +VALUES (NULL, -1), (NULL, 2_147_483_647), (NULL, NULL), + (100, NULL), (500, NULL); CREATE UNIQUE INDEX onek_nulltest ON onek_with_null (unique2,unique1); SET enable_seqscan = OFF; SET enable_indexscan = ON; @@ -1645,7 +1647,7 @@ SET enable_bitmapscan = ON; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL; count ------- - 2 + 3 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; @@ -1657,13 +1659,13 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL; count ------- - 1000 + 1002 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NULL; count ------- - 1 + 2 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; @@ -1678,12 +1680,18 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500; 0 (1 row) +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; + unique1 | unique2 +---------+--------- + 500 | +(1 row) + DROP INDEX onek_nulltest; CREATE UNIQUE INDEX onek_nulltest ON onek_with_null (unique2 desc,unique1); SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL; count ------- - 2 + 3 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; @@ -1695,13 +1703,13 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL; count ------- - 1000 + 1002 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NULL; count ------- - 1 + 2 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; @@ -1722,12 +1730,18 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IN (-1, 0, 1 (1 row) +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; + unique1 | unique2 +---------+--------- + 500 | +(1 row) + DROP INDEX onek_nulltest; CREATE UNIQUE INDEX onek_nulltest ON onek_with_null (unique2 desc nulls last,unique1); SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL; count ------- - 2 + 3 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; @@ -1739,13 +1753,13 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL; count ------- - 1000 + 1002 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NULL; count ------- - 1 + 2 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; @@ -1760,12 +1774,18 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500; 0 (1 row) +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; + unique1 | unique2 +---------+--------- + 500 | +(1 row) + DROP INDEX onek_nulltest; CREATE UNIQUE INDEX onek_nulltest ON onek_with_null (unique2 nulls first,unique1); SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL; count ------- - 2 + 3 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; @@ -1777,13 +1797,13 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL; count ------- - 1000 + 1002 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NULL; count ------- - 1 + 2 (1 row) SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; @@ -1798,6 +1818,12 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500; 0 (1 row) +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; + unique1 | unique2 +---------+--------- + 500 | +(1 row) + DROP INDEX onek_nulltest; -- Check initial-positioning logic too CREATE UNIQUE INDEX onek_nulltest ON onek_with_null (unique2); @@ -1829,20 +1855,24 @@ SELECT unique1, unique2 FROM onek_with_null WHERE unique2 >= 0 (2 rows) SELECT unique1, unique2 FROM onek_with_null - ORDER BY unique2 DESC LIMIT 2; - unique1 | unique2 ----------+--------- - | - 278 | 999 -(2 rows) + ORDER BY unique2 DESC LIMIT 5; + unique1 | unique2 +---------+------------ + 500 | + 100 | + | + | 2147483647 + 278 | 999 +(5 rows) SELECT unique1, unique2 FROM onek_with_null WHERE unique2 >= -1 - ORDER BY unique2 DESC LIMIT 2; - unique1 | unique2 ----------+--------- - 278 | 999 - 0 | 998 -(2 rows) + ORDER BY unique2 DESC LIMIT 3; + unique1 | unique2 +---------+------------ + | 2147483647 + 278 | 999 + 0 | 998 +(3 rows) SELECT unique1, unique2 FROM onek_with_null WHERE unique2 < 999 ORDER BY unique2 DESC LIMIT 2; @@ -2247,7 +2277,8 @@ SELECT count(*) FROM dupindexcols (1 row) -- --- Check that index scans with =ANY indexquals return rows in index order +-- Check that index scans with SAOP array and/or skip array indexquals +-- return rows in index order -- explain (costs off) SELECT unique1 FROM tenk1 @@ -2269,7 +2300,7 @@ ORDER BY unique1; 42 (3 rows) --- Non-required array scan key on "tenthous": +-- Skip array on "thousand", SAOP array on "tenthous": explain (costs off) SELECT thousand, tenthous FROM tenk1 WHERE thousand < 2 AND tenthous IN (1001,3000) @@ -2289,7 +2320,7 @@ ORDER BY thousand; 1 | 1001 (2 rows) --- Non-required array scan key on "tenthous", backward scan: +-- Skip array on "thousand", SAOP array on "tenthous", backward scan: explain (costs off) SELECT thousand, tenthous FROM tenk1 WHERE thousand < 2 AND tenthous IN (1001,3000) @@ -2309,6 +2340,25 @@ ORDER BY thousand DESC, tenthous DESC; 0 | 3000 (2 rows) +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > 995 and tenthous in (998, 999) +ORDER BY thousand desc; + QUERY PLAN +-------------------------------------------------------------------------------- + Index Only Scan Backward using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand > 995) AND (tenthous = ANY ('{998,999}'::integer[]))) +(2 rows) + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > 995 and tenthous in (998, 999) +ORDER BY thousand desc; + thousand | tenthous +----------+---------- + 999 | 999 + 998 | 998 +(2 rows) + -- -- Check elimination of redundant and contradictory index quals -- @@ -2340,6 +2390,45 @@ SELECT unique1 FROM tenk1 WHERE unique1 = ANY('{7, 14, 22}') and unique1 = ANY(' (0 rows) explain (costs off) +SELECT unique1 FROM tenk1 WHERE unique1 = ANY(NULL); + QUERY PLAN +------------------------------------------------- + Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 = ANY (NULL::integer[])) +(2 rows) + +SELECT unique1 FROM tenk1 WHERE unique1 = ANY(NULL); + unique1 +--------- +(0 rows) + +explain (costs off) +SELECT unique1 FROM tenk1 WHERE unique1 = ANY('{NULL,NULL,NULL}'); + QUERY PLAN +--------------------------------------------------------------- + Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 = ANY ('{NULL,NULL,NULL}'::integer[])) +(2 rows) + +SELECT unique1 FROM tenk1 WHERE unique1 = ANY('{NULL,NULL,NULL}'); + unique1 +--------- +(0 rows) + +explain (costs off) +SELECT unique1 FROM tenk1 WHERE unique1 IS NULL AND unique1 IS NULL; + QUERY PLAN +--------------------------------------------------------- + Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: ((unique1 IS NULL) AND (unique1 IS NULL)) +(2 rows) + +SELECT unique1 FROM tenk1 WHERE unique1 IS NULL AND unique1 IS NULL; + unique1 +--------- +(0 rows) + +explain (costs off) SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 = 1; QUERY PLAN --------------------------------------------------------------------------- @@ -2462,6 +2551,44 @@ SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5); --------- (0 rows) +-- Skip array redundancy (pair of redundant low_compare inequalities) +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 and thousand >= 0 AND tenthous = 3000 +ORDER BY thousand; + QUERY PLAN +-------------------------------------------------------------------------------------- + Index Only Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand > '-1'::integer) AND (thousand >= 0) AND (tenthous = 3000)) +(2 rows) + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 and thousand >= 0 AND tenthous = 3000 +ORDER BY thousand; + thousand | tenthous +----------+---------- + 0 | 3000 +(1 row) + +-- Skip array redundancy (pair of redundant high_compare inequalities) +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand < 3 and thousand <= 2 AND tenthous = 1001 +ORDER BY thousand; + QUERY PLAN +-------------------------------------------------------------------------- + Index Only Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand < 3) AND (thousand <= 2) AND (tenthous = 1001)) +(2 rows) + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand < 3 and thousand <= 2 AND tenthous = 1001 +ORDER BY thousand; + thousand | tenthous +----------+---------- + 1 | 1001 +(1 row) + -- -- Check elimination of constant-NULL subexpressions -- diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index b1d12585eae..cf48ae6d0c2 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -5332,9 +5332,10 @@ Function | in_range(time without time zone,time without time zone,i btree | uuid_ops | uuid | uuid | 1 | uuid_cmp btree | uuid_ops | uuid | uuid | 2 | uuid_sortsupport btree | uuid_ops | uuid | uuid | 4 | btequalimage + btree | uuid_ops | uuid | uuid | 6 | uuid_skipsupport hash | uuid_ops | uuid | uuid | 1 | uuid_hash hash | uuid_ops | uuid | uuid | 2 | uuid_hash_extended -(5 rows) +(6 rows) -- check \dconfig set work_mem = 10240; diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index de58d268d31..5e20dc63337 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -310,7 +310,7 @@ ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 6 < (int4, int2); -- ope ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 5 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- invalid options parsing function -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 6 btint42cmp(int4, int2); -- function number should be between 1 and 5 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 7 btint42cmp(int4, int2); -- function number should be between 1 and 6 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD STORAGE invalid_storage; -- Ensure STORAGE is not a part of ALTER OPERATOR FAMILY DROP OPERATOR FAMILY alt_opf4 USING btree; @@ -444,6 +444,9 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD -- Should fail. Not allowed to have cross-type equalimage function. ALTER OPERATOR FAMILY alt_opf18 USING btree ADD FUNCTION 4 (int4, int2) btequalimage(oid); +-- Should fail. Not allowed to have cross-type skip support function. +ALTER OPERATOR FAMILY alt_opf18 USING btree + ADD FUNCTION 6 (int4, int2) btint4skipsupport(internal); ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); DROP OPERATOR FAMILY alt_opf18 USING btree; diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 670ad5c6e6a..68c61dbc7d1 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -327,6 +327,27 @@ alter table btree_tall_tbl alter COLUMN t set storage plain; create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10); insert into btree_tall_tbl select g, repeat('x', 250) from generate_series(1, 130) g; +insert into btree_tall_tbl select g, NULL +from generate_series(50, 60) g; + +-- +-- Test for skip scan with type that lacks skip support (text) +-- +set enable_seqscan to false; +set enable_bitmapscan to false; + +-- Forwards scan +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t, id; +explain (costs off) +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t, id; + +-- Backwards scan +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t DESC, id DESC; +explain (costs off) +SELECT id FROM btree_tall_tbl WHERE id = 55 ORDER BY t DESC, id DESC; + +reset enable_seqscan; +reset enable_bitmapscan; -- -- Test for multilevel page deletion diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 6b3852dddd8..cd90b1c3a8f 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -650,7 +650,9 @@ DROP TABLE syscol_table; -- CREATE TABLE onek_with_null AS SELECT unique1, unique2 FROM onek; -INSERT INTO onek_with_null (unique1,unique2) VALUES (NULL, -1), (NULL, NULL); +INSERT INTO onek_with_null(unique1, unique2) +VALUES (NULL, -1), (NULL, 2_147_483_647), (NULL, NULL), + (100, NULL), (500, NULL); CREATE UNIQUE INDEX onek_nulltest ON onek_with_null (unique2,unique1); SET enable_seqscan = OFF; @@ -663,6 +665,7 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500; +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; DROP INDEX onek_nulltest; @@ -675,6 +678,7 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NUL SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IN (-1, 0, 1); +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; DROP INDEX onek_nulltest; @@ -686,6 +690,7 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500; +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; DROP INDEX onek_nulltest; @@ -697,6 +702,7 @@ SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique2 IS NOT NULL; SELECT count(*) FROM onek_with_null WHERE unique1 IS NOT NULL AND unique1 > 500; SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500; +SELECT unique1, unique2 FROM onek_with_null WHERE unique1 = 500 ORDER BY unique2 DESC, unique1 DESC LIMIT 1; DROP INDEX onek_nulltest; @@ -716,9 +722,9 @@ SELECT unique1, unique2 FROM onek_with_null WHERE unique2 >= 0 ORDER BY unique2 LIMIT 2; SELECT unique1, unique2 FROM onek_with_null - ORDER BY unique2 DESC LIMIT 2; + ORDER BY unique2 DESC LIMIT 5; SELECT unique1, unique2 FROM onek_with_null WHERE unique2 >= -1 - ORDER BY unique2 DESC LIMIT 2; + ORDER BY unique2 DESC LIMIT 3; SELECT unique1, unique2 FROM onek_with_null WHERE unique2 < 999 ORDER BY unique2 DESC LIMIT 2; @@ -852,7 +858,8 @@ SELECT count(*) FROM dupindexcols WHERE f1 BETWEEN 'WA' AND 'ZZZ' and id < 1000 and f1 ~<~ 'YX'; -- --- Check that index scans with =ANY indexquals return rows in index order +-- Check that index scans with SAOP array and/or skip array indexquals +-- return rows in index order -- explain (costs off) @@ -864,7 +871,7 @@ SELECT unique1 FROM tenk1 WHERE unique1 IN (1,42,7) ORDER BY unique1; --- Non-required array scan key on "tenthous": +-- Skip array on "thousand", SAOP array on "tenthous": explain (costs off) SELECT thousand, tenthous FROM tenk1 WHERE thousand < 2 AND tenthous IN (1001,3000) @@ -874,7 +881,7 @@ SELECT thousand, tenthous FROM tenk1 WHERE thousand < 2 AND tenthous IN (1001,3000) ORDER BY thousand; --- Non-required array scan key on "tenthous", backward scan: +-- Skip array on "thousand", SAOP array on "tenthous", backward scan: explain (costs off) SELECT thousand, tenthous FROM tenk1 WHERE thousand < 2 AND tenthous IN (1001,3000) @@ -884,6 +891,15 @@ SELECT thousand, tenthous FROM tenk1 WHERE thousand < 2 AND tenthous IN (1001,3000) ORDER BY thousand DESC, tenthous DESC; +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > 995 and tenthous in (998, 999) +ORDER BY thousand desc; + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > 995 and tenthous in (998, 999) +ORDER BY thousand desc; + -- -- Check elimination of redundant and contradictory index quals -- @@ -898,6 +914,21 @@ SELECT unique1 FROM tenk1 WHERE unique1 = ANY('{7, 14, 22}') and unique1 = ANY(' SELECT unique1 FROM tenk1 WHERE unique1 = ANY('{7, 14, 22}') and unique1 = ANY('{33, 44}'::bigint[]); explain (costs off) +SELECT unique1 FROM tenk1 WHERE unique1 = ANY(NULL); + +SELECT unique1 FROM tenk1 WHERE unique1 = ANY(NULL); + +explain (costs off) +SELECT unique1 FROM tenk1 WHERE unique1 = ANY('{NULL,NULL,NULL}'); + +SELECT unique1 FROM tenk1 WHERE unique1 = ANY('{NULL,NULL,NULL}'); + +explain (costs off) +SELECT unique1 FROM tenk1 WHERE unique1 IS NULL AND unique1 IS NULL; + +SELECT unique1 FROM tenk1 WHERE unique1 IS NULL AND unique1 IS NULL; + +explain (costs off) SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 = 1; SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 = 1; @@ -942,6 +973,26 @@ SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5); SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5); +-- Skip array redundancy (pair of redundant low_compare inequalities) +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 and thousand >= 0 AND tenthous = 3000 +ORDER BY thousand; + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 and thousand >= 0 AND tenthous = 3000 +ORDER BY thousand; + +-- Skip array redundancy (pair of redundant high_compare inequalities) +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand < 3 and thousand <= 2 AND tenthous = 1001 +ORDER BY thousand; + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand < 3 and thousand <= 2 AND tenthous = 1001 +ORDER BY thousand; + -- -- Check elimination of constant-NULL subexpressions -- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index c3f05796a7c..229fbff47ae 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2754,6 +2754,8 @@ SimpleStringListCell SingleBoundSortItem Size SkipPages +SkipSupport +SkipSupportData SlabBlock SlabContext SlabSlot |