diff options
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 491 |
1 files changed, 374 insertions, 117 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 462308807ef..021ea7517d7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -193,6 +193,8 @@ static double convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure); static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata); +static void examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, + int indexcol, VariableStatData *vardata); static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max); @@ -214,6 +216,8 @@ static bool get_actual_variable_endpoint(Relation heapRel, MemoryContext outercontext, Datum *endpointDatum); static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); +static double btcost_correlation(IndexOptInfo *index, + VariableStatData *vardata); /* @@ -5944,6 +5948,92 @@ examine_simple_variable(PlannerInfo *root, Var *var, } /* + * examine_indexcol_variable + * Try to look up statistical data about an index column/expression. + * Fill in a VariableStatData struct to describe the column. + * + * Inputs: + * root: the planner info + * index: the index whose column we're interested in + * indexcol: 0-based index column number (subscripts index->indexkeys[]) + * + * Outputs: *vardata is filled as follows: + * var: the input expression (with any binary relabeling stripped, if + * it is or contains a variable; but otherwise the type is preserved) + * rel: RelOptInfo for table relation containing variable. + * statsTuple: the pg_statistic entry for the variable, if one exists; + * otherwise NULL. + * freefunc: pointer to a function to release statsTuple with. + * + * Caller is responsible for doing ReleaseVariableStats() before exiting. + */ +static void +examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, + int indexcol, VariableStatData *vardata) +{ + AttrNumber colnum; + Oid relid; + + if (index->indexkeys[indexcol] != 0) + { + /* Simple variable --- look to stats for the underlying table */ + RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); + + Assert(rte->rtekind == RTE_RELATION); + relid = rte->relid; + Assert(relid != InvalidOid); + colnum = index->indexkeys[indexcol]; + vardata->rel = index->rel; + + if (get_relation_stats_hook && + (*get_relation_stats_hook) (root, rte, colnum, vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata->statsTuple) && + !vardata->freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata->statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(colnum), + BoolGetDatum(rte->inh)); + vardata->freefunc = ReleaseSysCache; + } + } + else + { + /* Expression --- maybe there are stats for the index itself */ + relid = index->indexoid; + colnum = indexcol + 1; + + if (get_index_stats_hook && + (*get_index_stats_hook) (root, relid, colnum, vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata->statsTuple) && + !vardata->freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata->statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(colnum), + BoolGetDatum(false)); + vardata->freefunc = ReleaseSysCache; + } + } +} + +/* * Check whether it is permitted to call func_oid passing some of the * pg_statistic data in vardata. We allow this either if the user has SELECT * privileges on the table or column underlying the pg_statistic data or if @@ -7007,6 +7097,53 @@ add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals) return list_concat(predExtraQuals, indexQuals); } +/* + * Estimate correlation of btree index's first column. + * + * If we can get an estimate of the first column's ordering correlation C + * from pg_statistic, estimate the index correlation as C for a single-column + * index, or C * 0.75 for multiple columns. The idea here is that multiple + * columns dilute the importance of the first column's ordering, but don't + * negate it entirely. + * + * We already filled in the stats tuple for *vardata when called. + */ +static double +btcost_correlation(IndexOptInfo *index, VariableStatData *vardata) +{ + Oid sortop; + AttStatsSlot sslot; + double indexCorrelation = 0; + + Assert(HeapTupleIsValid(vardata->statsTuple)); + + sortop = get_opfamily_member(index->opfamily[0], + index->opcintype[0], + index->opcintype[0], + BTLessStrategyNumber); + if (OidIsValid(sortop) && + get_attstatsslot(&sslot, vardata->statsTuple, + STATISTIC_KIND_CORRELATION, sortop, + ATTSTATSSLOT_NUMBERS)) + { + double varCorrelation; + + Assert(sslot.nnumbers == 1); + varCorrelation = sslot.numbers[0]; + + if (index->reverse_sort[0]) + varCorrelation = -varCorrelation; + + if (index->nkeycolumns > 1) + indexCorrelation = varCorrelation * 0.75; + else + indexCorrelation = varCorrelation; + + free_attstatsslot(&sslot); + } + + return indexCorrelation; +} void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, @@ -7016,17 +7153,19 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { IndexOptInfo *index = path->indexinfo; GenericCosts costs = {0}; - Oid relid; - AttrNumber colnum; VariableStatData vardata = {0}; double numIndexTuples; Cost descentCost; List *indexBoundQuals; + List *indexSkipQuals; int indexcol; bool eqQualHere; - bool found_saop; + bool found_row_compare; + bool found_array; bool found_is_null_op; + bool have_correlation = false; double num_sa_scans; + double correlation = 0.0; ListCell *lc; /* @@ -7037,19 +7176,24 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * it's OK to count them in indexSelectivity, but they should not count * for estimating numIndexTuples. So we must examine the given indexquals * to find out which ones count as boundary quals. We rely on the - * knowledge that they are given in index column order. + * knowledge that they are given in index column order. Note that nbtree + * preprocessing can add skip arrays that act as leading '=' quals in the + * absence of ordinary input '=' quals, so in practice _most_ input quals + * are able to act as index bound quals (which we take into account here). * * For a RowCompareExpr, we consider only the first column, just as * rowcomparesel() does. * - * If there's a ScalarArrayOpExpr in the quals, we'll actually perform up - * to N index descents (not just one), but the ScalarArrayOpExpr's + * If there's a SAOP or skip array in the quals, we'll actually perform up + * to N index descents (not just one), but the underlying array key's * operator can be considered to act the same as it normally does. */ indexBoundQuals = NIL; + indexSkipQuals = NIL; indexcol = 0; eqQualHere = false; - found_saop = false; + found_row_compare = false; + found_array = false; found_is_null_op = false; num_sa_scans = 1; foreach(lc, path->indexclauses) @@ -7057,17 +7201,203 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, IndexClause *iclause = lfirst_node(IndexClause, lc); ListCell *lc2; - if (indexcol != iclause->indexcol) + if (indexcol < iclause->indexcol) { - /* Beginning of a new column's quals */ - if (!eqQualHere) - break; /* done if no '=' qual for indexcol */ + double num_sa_scans_prev_cols = num_sa_scans; + + /* + * Beginning of a new column's quals. + * + * Skip scans use skip arrays, which are ScalarArrayOp style + * arrays that generate their elements procedurally and on demand. + * Given a multi-column index on "(a, b)", and an SQL WHERE clause + * "WHERE b = 42", a skip scan will effectively use an indexqual + * "WHERE a = ANY('{every col a value}') AND b = 42". (Obviously, + * the array on "a" must also return "IS NULL" matches, since our + * WHERE clause used no strict operator on "a"). + * + * Here we consider how nbtree will backfill skip arrays for any + * index columns that lacked an '=' qual. This maintains our + * num_sa_scans estimate, and determines if this new column (the + * "iclause->indexcol" column, not the prior "indexcol" column) + * can have its RestrictInfos/quals added to indexBoundQuals. + * + * We'll need to handle columns that have inequality quals, where + * the skip array generates values from a range constrained by the + * quals (not every possible value). We've been maintaining + * indexSkipQuals to help with this; it will now contain all of + * the prior column's quals (that is, indexcol's quals) when they + * might be used for this. + */ + if (found_row_compare) + { + /* + * Skip arrays can't be added after a RowCompare input qual + * due to limitations in nbtree + */ + break; + } + if (eqQualHere) + { + /* + * Don't need to add a skip array for an indexcol that already + * has an '=' qual/equality constraint + */ + indexcol++; + indexSkipQuals = NIL; + } eqQualHere = false; - indexcol++; + + while (indexcol < iclause->indexcol) + { + double ndistinct; + bool isdefault = true; + + found_array = true; + + /* + * A skipped attribute's ndistinct forms the basis of our + * estimate of the total number of "array elements" used by + * its skip array at runtime. Look that up first. + */ + examine_indexcol_variable(root, index, indexcol, &vardata); + ndistinct = get_variable_numdistinct(&vardata, &isdefault); + + if (indexcol == 0) + { + /* + * Get an estimate of the leading column's correlation in + * passing (avoids rereading variable stats below) + */ + if (HeapTupleIsValid(vardata.statsTuple)) + correlation = btcost_correlation(index, &vardata); + have_correlation = true; + } + + ReleaseVariableStats(vardata); + + /* + * If ndistinct is a default estimate, conservatively assume + * that no skipping will happen at runtime + */ + if (isdefault) + { + num_sa_scans = num_sa_scans_prev_cols; + break; /* done building indexBoundQuals */ + } + + /* + * Apply indexcol's indexSkipQuals selectivity to ndistinct + */ + if (indexSkipQuals != NIL) + { + List *partialSkipQuals; + Selectivity ndistinctfrac; + + /* + * If the index is partial, AND the index predicate with + * the index-bound quals to produce a more accurate idea + * of the number of distinct values for prior indexcol + */ + partialSkipQuals = add_predicate_to_index_quals(index, + indexSkipQuals); + + ndistinctfrac = clauselist_selectivity(root, partialSkipQuals, + index->rel->relid, + JOIN_INNER, + NULL); + + /* + * If ndistinctfrac is selective (on its own), the scan is + * unlikely to benefit from repositioning itself using + * later quals. Do not allow iclause->indexcol's quals to + * be added to indexBoundQuals (it would increase descent + * costs, without lowering numIndexTuples costs by much). + */ + if (ndistinctfrac < DEFAULT_RANGE_INEQ_SEL) + { + num_sa_scans = num_sa_scans_prev_cols; + break; /* done building indexBoundQuals */ + } + + /* Adjust ndistinct downward */ + ndistinct = rint(ndistinct * ndistinctfrac); + ndistinct = Max(ndistinct, 1); + } + + /* + * When there's no inequality quals, account for the need to + * find an initial value by counting -inf/+inf as a value. + * + * We don't charge anything extra for possible next/prior key + * index probes, which are sometimes used to find the next + * valid skip array element (ahead of using the located + * element value to relocate the scan to the next position + * that might contain matching tuples). It seems hard to do + * better here. Use of the skip support infrastructure often + * avoids most next/prior key probes. But even when it can't, + * there's a decent chance that most individual next/prior key + * probes will locate a leaf page whose key space overlaps all + * of the scan's keys (even the lower-order keys) -- which + * also avoids the need for a separate, extra index descent. + * Note also that these probes are much cheaper than non-probe + * primitive index scans: they're reliably very selective. + */ + if (indexSkipQuals == NIL) + ndistinct += 1; + + /* + * Update num_sa_scans estimate by multiplying by ndistinct. + * + * We make the pessimistic assumption that there is no + * naturally occurring cross-column correlation. This is + * often wrong, but it seems best to err on the side of not + * expecting skipping to be helpful... + */ + num_sa_scans *= ndistinct; + + /* + * ...but back out of adding this latest group of 1 or more + * skip arrays when num_sa_scans exceeds the total number of + * index pages (revert to num_sa_scans from before indexcol). + * This causes a sharp discontinuity in cost (as a function of + * the indexcol's ndistinct), but that is representative of + * actual runtime costs. + * + * Note that skipping is helpful when each primitive index + * scan only manages to skip over 1 or 2 irrelevant leaf pages + * on average. Skip arrays bring savings in CPU costs due to + * the scan not needing to evaluate indexquals against every + * tuple, which can greatly exceed any savings in I/O costs. + * This test is a test of whether num_sa_scans implies that + * we're past the point where the ability to skip ceases to + * lower the scan's costs (even qual evaluation CPU costs). + */ + if (index->pages < num_sa_scans) + { + num_sa_scans = num_sa_scans_prev_cols; + break; /* done building indexBoundQuals */ + } + + indexcol++; + indexSkipQuals = NIL; + } + + /* + * Finished considering the need to add skip arrays to bridge an + * initial eqQualHere gap between the old and new index columns + * (or there was no initial eqQualHere gap in the first place). + * + * If an initial gap could not be bridged, then new column's quals + * (i.e. iclause->indexcol's quals) won't go into indexBoundQuals, + * and so won't affect our final numIndexTuples estimate. + */ if (indexcol != iclause->indexcol) - break; /* no quals at all for indexcol */ + break; /* done building indexBoundQuals */ } + Assert(indexcol == iclause->indexcol); + /* Examine each indexqual associated with this index clause */ foreach(lc2, iclause->indexquals) { @@ -7087,6 +7417,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, RowCompareExpr *rc = (RowCompareExpr *) clause; clause_op = linitial_oid(rc->opnos); + found_row_compare = true; } else if (IsA(clause, ScalarArrayOpExpr)) { @@ -7095,7 +7426,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, double alength = estimate_array_length(root, other_operand); clause_op = saop->opno; - found_saop = true; + found_array = true; /* estimate SA descents by indexBoundQuals only */ if (alength > 1) num_sa_scans *= alength; @@ -7107,7 +7438,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, if (nt->nulltesttype == IS_NULL) { found_is_null_op = true; - /* IS NULL is like = for selectivity purposes */ + /* IS NULL is like = for selectivity/skip scan purposes */ eqQualHere = true; } } @@ -7126,19 +7457,28 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, } indexBoundQuals = lappend(indexBoundQuals, rinfo); + + /* + * We apply inequality selectivities to estimate index descent + * costs with scans that use skip arrays. Save this indexcol's + * RestrictInfos if it looks like they'll be needed for that. + */ + if (!eqQualHere && !found_row_compare && + indexcol < index->nkeycolumns - 1) + indexSkipQuals = lappend(indexSkipQuals, rinfo); } } /* * If index is unique and we found an '=' clause for each column, we can * just assume numIndexTuples = 1 and skip the expensive - * clauselist_selectivity calculations. However, a ScalarArrayOp or - * NullTest invalidates that theory, even though it sets eqQualHere. + * clauselist_selectivity calculations. However, an array or NullTest + * always invalidates that theory (even when eqQualHere has been set). */ if (index->unique && indexcol == index->nkeycolumns - 1 && eqQualHere && - !found_saop && + !found_array && !found_is_null_op) numIndexTuples = 1.0; else @@ -7160,7 +7500,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, numIndexTuples = btreeSelectivity * index->rel->tuples; /* - * btree automatically combines individual ScalarArrayOpExpr primitive + * btree automatically combines individual array element primitive * index scans whenever the tuples covered by the next set of array * keys are close to tuples covered by the current set. That puts a * natural ceiling on the worst case number of descents -- there @@ -7178,16 +7518,18 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * of leaf pages (we make it 1/3 the total number of pages instead) to * give the btree code credit for its ability to continue on the leaf * level with low selectivity scans. + * + * Note: num_sa_scans includes both ScalarArrayOp array elements and + * skip array elements whose qual affects our numIndexTuples estimate. */ num_sa_scans = Min(num_sa_scans, ceil(index->pages * 0.3333333)); num_sa_scans = Max(num_sa_scans, 1); /* - * As in genericcostestimate(), we have to adjust for any - * ScalarArrayOpExpr quals included in indexBoundQuals, and then round - * to integer. + * As in genericcostestimate(), we have to adjust for any array quals + * included in indexBoundQuals, and then round to integer. * - * It is tempting to make genericcostestimate behave as if SAOP + * It is tempting to make genericcostestimate behave as if array * clauses work in almost the same way as scalar operators during * btree scans, making the top-level scan look like a continuous scan * (as opposed to num_sa_scans-many primitive index scans). After @@ -7220,7 +7562,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * comparisons to descend a btree of N leaf tuples. We charge one * cpu_operator_cost per comparison. * - * If there are ScalarArrayOpExprs, charge this once per estimated SA + * If there are SAOP or skip array keys, charge this once per estimated * index descent. The ones after the first one are not startup cost so * far as the overall plan goes, so just add them to "total" cost. */ @@ -7240,110 +7582,25 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page * touched. The number of such pages is btree tree height plus one (ie, * we charge for the leaf page too). As above, charge once per estimated - * SA index descent. + * SAOP/skip array descent. */ descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost; costs.indexStartupCost += descentCost; costs.indexTotalCost += costs.num_sa_scans * descentCost; - /* - * If we can get an estimate of the first column's ordering correlation C - * from pg_statistic, estimate the index correlation as C for a - * single-column index, or C * 0.75 for multiple columns. (The idea here - * is that multiple columns dilute the importance of the first column's - * ordering, but don't negate it entirely. Before 8.0 we divided the - * correlation by the number of columns, but that seems too strong.) - */ - if (index->indexkeys[0] != 0) + if (!have_correlation) { - /* Simple variable --- look to stats for the underlying table */ - RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); - - Assert(rte->rtekind == RTE_RELATION); - relid = rte->relid; - Assert(relid != InvalidOid); - colnum = index->indexkeys[0]; - - if (get_relation_stats_hook && - (*get_relation_stats_hook) (root, rte, colnum, &vardata)) - { - /* - * The hook took control of acquiring a stats tuple. If it did - * supply a tuple, it'd better have supplied a freefunc. - */ - if (HeapTupleIsValid(vardata.statsTuple) && - !vardata.freefunc) - elog(ERROR, "no function provided to release variable stats with"); - } - else - { - vardata.statsTuple = SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(relid), - Int16GetDatum(colnum), - BoolGetDatum(rte->inh)); - vardata.freefunc = ReleaseSysCache; - } + examine_indexcol_variable(root, index, 0, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + costs.indexCorrelation = btcost_correlation(index, &vardata); + ReleaseVariableStats(vardata); } else { - /* Expression --- maybe there are stats for the index itself */ - relid = index->indexoid; - colnum = 1; - - if (get_index_stats_hook && - (*get_index_stats_hook) (root, relid, colnum, &vardata)) - { - /* - * The hook took control of acquiring a stats tuple. If it did - * supply a tuple, it'd better have supplied a freefunc. - */ - if (HeapTupleIsValid(vardata.statsTuple) && - !vardata.freefunc) - elog(ERROR, "no function provided to release variable stats with"); - } - else - { - vardata.statsTuple = SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(relid), - Int16GetDatum(colnum), - BoolGetDatum(false)); - vardata.freefunc = ReleaseSysCache; - } + /* btcost_correlation already called earlier on */ + costs.indexCorrelation = correlation; } - if (HeapTupleIsValid(vardata.statsTuple)) - { - Oid sortop; - AttStatsSlot sslot; - - sortop = get_opfamily_member(index->opfamily[0], - index->opcintype[0], - index->opcintype[0], - BTLessStrategyNumber); - if (OidIsValid(sortop) && - get_attstatsslot(&sslot, vardata.statsTuple, - STATISTIC_KIND_CORRELATION, sortop, - ATTSTATSSLOT_NUMBERS)) - { - double varCorrelation; - - Assert(sslot.nnumbers == 1); - varCorrelation = sslot.numbers[0]; - - if (index->reverse_sort[0]) - varCorrelation = -varCorrelation; - - if (index->nkeycolumns > 1) - costs.indexCorrelation = varCorrelation * 0.75; - else - costs.indexCorrelation = varCorrelation; - - free_attstatsslot(&sslot); - } - } - - ReleaseVariableStats(vardata); - *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; |