aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c395
1 files changed, 278 insertions, 117 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bd63377ada7..72c2c30ad40 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -201,6 +201,7 @@ static Selectivity regex_selectivity(const char *patt, int pattlen,
static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
static Const *string_to_bytea_const(const char *str, size_t str_len);
+static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
/*
@@ -5916,76 +5917,55 @@ string_to_bytea_const(const char *str, size_t str_len)
*/
/*
- * If the index is partial, add its predicate to the given qual list.
+ * genericcostestimate is a general-purpose estimator that can be used for
+ * most index types. In some cases we use genericcostestimate as the base
+ * code and then incorporate additional index-type-specific knowledge in
+ * the type-specific calling function. To avoid code duplication, we make
+ * genericcostestimate return a number of intermediate values as well as
+ * its preliminary estimates of the output cost values. The GenericCosts
+ * struct includes all these values.
*
- * ANDing the index predicate with the explicitly given indexquals produces
- * a more accurate idea of the index's selectivity. However, we need to be
- * careful not to insert redundant clauses, because clauselist_selectivity()
- * is easily fooled into computing a too-low selectivity estimate. Our
- * approach is to add only the predicate clause(s) that cannot be proven to
- * be implied by the given indexquals. This successfully handles cases such
- * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
- * There are many other cases where we won't detect redundancy, leading to a
- * too-low selectivity estimate, which will bias the system in favor of using
- * partial indexes where possible. That is not necessarily bad though.
- *
- * Note that indexQuals contains RestrictInfo nodes while the indpred
- * does not, so the output list will be mixed. This is OK for both
- * predicate_implied_by() and clauselist_selectivity(), but might be
- * problematic if the result were passed to other things.
+ * Callers should initialize all fields of GenericCosts to zero. In addition,
+ * they can set numIndexTuples to some positive value if they have a better
+ * than default way of estimating the number of leaf index tuples visited.
*/
-static List *
-add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
+typedef struct
{
- List *predExtraQuals = NIL;
- ListCell *lc;
-
- if (index->indpred == NIL)
- return indexQuals;
-
- foreach(lc, index->indpred)
- {
- Node *predQual = (Node *) lfirst(lc);
- List *oneQual = list_make1(predQual);
+ /* These are the values the cost estimator must return to the planner */
+ Cost indexStartupCost; /* index-related startup cost */
+ Cost indexTotalCost; /* total index-related scan cost */
+ Selectivity indexSelectivity; /* selectivity of index */
+ double indexCorrelation; /* order correlation of index */
+
+ /* Intermediate values we obtain along the way */
+ double numIndexPages; /* number of leaf pages visited */
+ double numIndexTuples; /* number of leaf tuples visited */
+ double spc_random_page_cost; /* relevant random_page_cost value */
+ double num_sa_scans; /* # indexscans from ScalarArrayOps */
+} GenericCosts;
- if (!predicate_implied_by(oneQual, indexQuals))
- predExtraQuals = list_concat(predExtraQuals, oneQual);
- }
- /* list_concat avoids modifying the passed-in indexQuals list */
- return list_concat(predExtraQuals, indexQuals);
-}
-
-/*
- * genericcostestimate is a general-purpose estimator for use when we
- * don't have any better idea about how to estimate. Index-type-specific
- * knowledge can be incorporated in the type-specific routines.
- *
- * One bit of index-type-specific knowledge we can relatively easily use
- * in genericcostestimate is the estimate of the number of index tuples
- * visited. If numIndexTuples is not 0 then it is used as the estimate,
- * otherwise we compute a generic estimate.
- */
static void
genericcostestimate(PlannerInfo *root,
IndexPath *path,
double loop_count,
- double numIndexTuples,
- Cost *indexStartupCost,
- Cost *indexTotalCost,
- Selectivity *indexSelectivity,
- double *indexCorrelation)
+ GenericCosts *costs)
{
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
List *indexOrderBys = path->indexorderbys;
+ Cost indexStartupCost;
+ Cost indexTotalCost;
+ Selectivity indexSelectivity;
+ double indexCorrelation;
double numIndexPages;
+ double numIndexTuples;
+ double spc_random_page_cost;
double num_sa_scans;
double num_outer_scans;
double num_scans;
QualCost index_qual_cost;
double qual_op_cost;
double qual_arg_cost;
- double spc_random_page_cost;
List *selectivityQuals;
ListCell *l;
@@ -6016,19 +5996,20 @@ genericcostestimate(PlannerInfo *root,
}
/* Estimate the fraction of main-table tuples that will be visited */
- *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
- index->rel->relid,
- JOIN_INNER,
- NULL);
+ indexSelectivity = clauselist_selectivity(root, selectivityQuals,
+ index->rel->relid,
+ JOIN_INNER,
+ NULL);
/*
* If caller didn't give us an estimate, estimate the number of index
* tuples that will be visited. We do it in this rather peculiar-looking
* way in order to get the right answer for partial indexes.
*/
+ numIndexTuples = costs->numIndexTuples;
if (numIndexTuples <= 0.0)
{
- numIndexTuples = *indexSelectivity * index->rel->tuples;
+ numIndexTuples = indexSelectivity * index->rel->tuples;
/*
* The above calculation counts all the tuples visited across all
@@ -6055,9 +6036,12 @@ genericcostestimate(PlannerInfo *root,
*
* We use the simplistic method of taking a pro-rata fraction of the total
* number of index pages. In effect, this counts only leaf pages and not
- * any overhead such as index metapage or upper tree levels. In practice
- * this seems a better approximation than charging for access to the upper
- * levels, perhaps because those tend to stay in cache under load.
+ * any overhead such as index metapage or upper tree levels.
+ *
+ * In practice access to upper index levels is often nearly free because
+ * those tend to stay in cache under load; moreover, the cost involved is
+ * highly dependent on index type. We therefore ignore such costs here
+ * and leave it to the caller to add a suitable charge if needed.
*/
if (index->pages > 1 && index->tuples > 1)
numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
@@ -6107,7 +6091,7 @@ genericcostestimate(PlannerInfo *root,
* share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
* since that's internal to the indexscan.)
*/
- *indexTotalCost = (pages_fetched * spc_random_page_cost)
+ indexTotalCost = (pages_fetched * spc_random_page_cost)
/ num_outer_scans;
}
else
@@ -6116,30 +6100,10 @@ genericcostestimate(PlannerInfo *root,
* For a single index scan, we just charge spc_random_page_cost per
* page touched.
*/
- *indexTotalCost = numIndexPages * spc_random_page_cost;
+ indexTotalCost = numIndexPages * spc_random_page_cost;
}
/*
- * A difficulty with the leaf-pages-only cost approach is that for small
- * selectivities (eg, single index tuple fetched) all indexes will look
- * equally attractive because we will estimate exactly 1 leaf page to be
- * fetched. All else being equal, we should prefer physically smaller
- * indexes over larger ones. (An index might be smaller because it is
- * partial or because it contains fewer columns; presumably the other
- * columns in the larger index aren't useful to the query, or the larger
- * index would have better selectivity.)
- *
- * We can deal with this by adding a very small "fudge factor" that
- * depends on the index size, so that indexes of different sizes won't
- * look exactly equally attractive. To ensure the fudge factor stays
- * small even for very large indexes, use a log function. (We previously
- * used a factor of one spc_random_page_cost per 10000 index pages, which
- * grew too large for large indexes. This expression has about the same
- * growth rate for small indexes, but tails off quickly.)
- */
- *indexTotalCost += log(1.0 + index->pages / 10000.0) * spc_random_page_cost;
-
- /*
* CPU cost: any complex expressions in the indexquals will need to be
* evaluated once at the start of the scan to reduce them to runtime keys
* to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
@@ -6149,10 +6113,9 @@ genericcostestimate(PlannerInfo *root,
* for ScalarArrayOpExpr cases. Similarly add in costs for any index
* ORDER BY expressions.
*
- * Note: this neglects the possible costs of rechecking lossy operators
- * and OR-clause expressions. Detecting that that might be needed seems
- * more expensive than it's worth, though, considering all the other
- * inaccuracies here ...
+ * Note: this neglects the possible costs of rechecking lossy operators.
+ * Detecting that that might be needed seems more expensive than it's
+ * worth, though, considering all the other inaccuracies here ...
*/
cost_qual_eval(&index_qual_cost, indexQuals, root);
qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
@@ -6164,29 +6127,66 @@ genericcostestimate(PlannerInfo *root,
if (qual_arg_cost < 0) /* just in case... */
qual_arg_cost = 0;
- *indexStartupCost = qual_arg_cost;
- *indexTotalCost += qual_arg_cost;
- *indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
+ indexStartupCost = qual_arg_cost;
+ indexTotalCost += qual_arg_cost;
+ indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
/*
- * We also add a CPU-cost component to represent the general costs of
- * starting an indexscan, such as analysis of btree index keys and initial
- * tree descent. This is estimated at 100x cpu_operator_cost, which is a
- * bit arbitrary but seems the right order of magnitude. (As noted above,
- * we don't charge any I/O for touching upper tree levels, but charging
- * nothing at all has been found too optimistic.)
- *
- * Although this is startup cost with respect to any one scan, we add it
- * to the "total" cost component because it's only very interesting in the
- * many-ScalarArrayOpExpr-scan case, and there it will be paid over the
- * life of the scan node.
+ * Generic assumption about index correlation: there isn't any.
*/
- *indexTotalCost += num_sa_scans * 100.0 * cpu_operator_cost;
+ indexCorrelation = 0.0;
/*
- * Generic assumption about index correlation: there isn't any.
+ * Return everything to caller.
*/
- *indexCorrelation = 0.0;
+ costs->indexStartupCost = indexStartupCost;
+ costs->indexTotalCost = indexTotalCost;
+ costs->indexSelectivity = indexSelectivity;
+ costs->indexCorrelation = indexCorrelation;
+ costs->numIndexPages = numIndexPages;
+ costs->numIndexTuples = numIndexTuples;
+ costs->spc_random_page_cost = spc_random_page_cost;
+ costs->num_sa_scans = num_sa_scans;
+}
+
+/*
+ * If the index is partial, add its predicate to the given qual list.
+ *
+ * ANDing the index predicate with the explicitly given indexquals produces
+ * a more accurate idea of the index's selectivity. However, we need to be
+ * careful not to insert redundant clauses, because clauselist_selectivity()
+ * is easily fooled into computing a too-low selectivity estimate. Our
+ * approach is to add only the predicate clause(s) that cannot be proven to
+ * be implied by the given indexquals. This successfully handles cases such
+ * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
+ * There are many other cases where we won't detect redundancy, leading to a
+ * too-low selectivity estimate, which will bias the system in favor of using
+ * partial indexes where possible. That is not necessarily bad though.
+ *
+ * Note that indexQuals contains RestrictInfo nodes while the indpred
+ * does not, so the output list will be mixed. This is OK for both
+ * predicate_implied_by() and clauselist_selectivity(), but might be
+ * problematic if the result were passed to other things.
+ */
+static List *
+add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
+{
+ List *predExtraQuals = NIL;
+ ListCell *lc;
+
+ if (index->indpred == NIL)
+ return indexQuals;
+
+ foreach(lc, index->indpred)
+ {
+ Node *predQual = (Node *) lfirst(lc);
+ List *oneQual = list_make1(predQual);
+
+ if (!predicate_implied_by(oneQual, indexQuals))
+ predExtraQuals = list_concat(predExtraQuals, oneQual);
+ }
+ /* list_concat avoids modifying the passed-in indexQuals list */
+ return list_concat(predExtraQuals, indexQuals);
}
@@ -6201,10 +6201,12 @@ btcostestimate(PG_FUNCTION_ARGS)
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
+ GenericCosts costs;
Oid relid;
AttrNumber colnum;
VariableStatData vardata;
double numIndexTuples;
+ Cost descentCost;
List *indexBoundQuals;
int indexcol;
bool eqQualHere;
@@ -6379,10 +6381,45 @@ btcostestimate(PG_FUNCTION_ARGS)
numIndexTuples = rint(numIndexTuples / num_sa_scans);
}
- genericcostestimate(root, path, loop_count,
- numIndexTuples,
- indexStartupCost, indexTotalCost,
- indexSelectivity, indexCorrelation);
+ /*
+ * Now do generic index cost estimation.
+ */
+ MemSet(&costs, 0, sizeof(costs));
+ costs.numIndexTuples = numIndexTuples;
+
+ genericcostestimate(root, path, loop_count, &costs);
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial btree
+ * descent. We don't charge any I/O cost for touching upper btree levels,
+ * since they tend to stay in cache, but we still have to do about log2(N)
+ * comparisons to descend a btree of N leaf tuples. We charge one
+ * cpu_operator_cost per comparison.
+ *
+ * If there are ScalarArrayOpExprs, charge this once per SA scan. The
+ * ones after the first one are not startup cost so far as the overall
+ * plan is concerned, so add them only to "total" cost.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Even though we're not charging I/O cost for touching upper btree pages,
+ * it's still reasonable to charge some CPU cost per page descended
+ * through. Moreover, if we had no such charge at all, bloated indexes
+ * would appear to have the same search cost as unbloated ones, at least
+ * in cases where only a single leaf page is expected to be visited. This
+ * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
+ * touched. The number of such pages is btree tree height plus one (ie,
+ * we charge for the leaf page too). As above, charge once per SA scan.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
/*
* If we can get an estimate of the first column's ordering correlation C
@@ -6478,9 +6515,9 @@ btcostestimate(PG_FUNCTION_ARGS)
varCorrelation = -varCorrelation;
if (index->ncolumns > 1)
- *indexCorrelation = varCorrelation * 0.75;
+ costs.indexCorrelation = varCorrelation * 0.75;
else
- *indexCorrelation = varCorrelation;
+ costs.indexCorrelation = varCorrelation;
free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
}
@@ -6488,6 +6525,11 @@ btcostestimate(PG_FUNCTION_ARGS)
ReleaseVariableStats(vardata);
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+
PG_RETURN_VOID();
}
@@ -6501,10 +6543,41 @@ hashcostestimate(PG_FUNCTION_ARGS)
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ GenericCosts costs;
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, &costs);
+
+ /*
+ * A hash index has no descent costs as such, since the index AM can go
+ * directly to the target bucket after computing the hash value. There
+ * are a couple of other hash-specific costs that we could conceivably add
+ * here, though:
+ *
+ * Ideally we'd charge spc_random_page_cost for each page in the target
+ * bucket, not just the numIndexPages pages that genericcostestimate
+ * thought we'd visit. However in most cases we don't know which bucket
+ * that will be. There's no point in considering the average bucket size
+ * because the hash AM makes sure that's always one page.
+ *
+ * Likewise, we could consider charging some CPU for each index tuple in
+ * the bucket, if we knew how many there were. But the per-tuple cost is
+ * just a hash value comparison, not a general datatype-dependent
+ * comparison, so any such charge ought to be quite a bit less than
+ * cpu_operator_cost; which makes it probably not worth worrying about.
+ *
+ * A bigger issue is that chance hash-value collisions will result in
+ * wasted probes into the heap. We don't currently attempt to model this
+ * cost on the grounds that it's rare, but maybe it's not rare enough.
+ * (Any fix for this ought to consider the generic lossy-operator problem,
+ * though; it's not entirely hash-specific.)
+ */
- genericcostestimate(root, path, loop_count, 0.0,
- indexStartupCost, indexTotalCost,
- indexSelectivity, indexCorrelation);
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
PG_RETURN_VOID();
}
@@ -6519,10 +6592,54 @@ gistcostestimate(PG_FUNCTION_ARGS)
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ IndexOptInfo *index = path->indexinfo;
+ GenericCosts costs;
+ Cost descentCost;
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, &costs);
+
+ /*
+ * We model index descent costs similarly to those for btree, but to do
+ * that we first need an idea of the tree height. We somewhat arbitrarily
+ * assume that the fanout is 100, meaning the tree height is at most
+ * log100(index->pages).
+ *
+ * Although this computation isn't really expensive enough to require
+ * caching, we might as well use index->tree_height to cache it.
+ */
+ if (index->tree_height < 0) /* unknown? */
+ {
+ if (index->pages > 1) /* avoid computing log(0) */
+ index->tree_height = (int) (log(index->pages) / log(100.0));
+ else
+ index->tree_height = 0;
+ }
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial descent.
+ * We just use log(N) here not log2(N) since the branching factor isn't
+ * necessarily two anyway. As for btree, charge once per SA scan.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Likewise add a per-page charge, calculated the same as for btrees.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
- genericcostestimate(root, path, loop_count, 0.0,
- indexStartupCost, indexTotalCost,
- indexSelectivity, indexCorrelation);
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
PG_RETURN_VOID();
}
@@ -6537,10 +6654,54 @@ spgcostestimate(PG_FUNCTION_ARGS)
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ IndexOptInfo *index = path->indexinfo;
+ GenericCosts costs;
+ Cost descentCost;
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, &costs);
- genericcostestimate(root, path, loop_count, 0.0,
- indexStartupCost, indexTotalCost,
- indexSelectivity, indexCorrelation);
+ /*
+ * We model index descent costs similarly to those for btree, but to do
+ * that we first need an idea of the tree height. We somewhat arbitrarily
+ * assume that the fanout is 100, meaning the tree height is at most
+ * log100(index->pages).
+ *
+ * Although this computation isn't really expensive enough to require
+ * caching, we might as well use index->tree_height to cache it.
+ */
+ if (index->tree_height < 0) /* unknown? */
+ {
+ if (index->pages > 1) /* avoid computing log(0) */
+ index->tree_height = (int) (log(index->pages) / log(100.0));
+ else
+ index->tree_height = 0;
+ }
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial descent.
+ * We just use log(N) here not log2(N) since the branching factor isn't
+ * necessarily two anyway. As for btree, charge once per SA scan.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Likewise add a per-page charge, calculated the same as for btrees.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
PG_RETURN_VOID();
}