diff options
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 121 | ||||
-rw-r--r-- | src/backend/optimizer/util/var.c | 57 |
2 files changed, 124 insertions, 54 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7dfe834b779..dddca240e95 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -31,17 +31,18 @@ * result by interpolating between startup_cost and total_cost. In detail: * actual_cost = startup_cost + * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows; - * Note that a relation's rows count (and, by extension, a Plan's plan_rows) - * are set without regard to any LIMIT, so that this equation works properly. - * (Also, these routines guarantee not to set the rows count to zero, so there - * will be no zero divide.) The LIMIT is applied as a separate Plan node. + * Note that a base relation's rows count (and, by extension, plan_rows for + * plan nodes below the LIMIT node) are set without regard to any LIMIT, so + * that this equation works properly. (Also, these routines guarantee not to + * set the rows count to zero, so there will be no zero divide.) The LIMIT is + * applied as a top-level plan node. * * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.72 2001/05/09 00:35:09 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.73 2001/05/09 23:13:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -205,12 +206,18 @@ cost_index(Path *path, Query *root, { Cost startup_cost = 0; Cost run_cost = 0; - Cost cpu_per_tuple; Cost indexStartupCost; Cost indexTotalCost; Selectivity indexSelectivity; + double indexCorrelation, + csquared; + Cost min_IO_cost, + max_IO_cost; + Cost cpu_per_tuple; double tuples_fetched; double pages_fetched; + double T, + b; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo) &&IsA(index, IndexOptInfo)); @@ -224,38 +231,52 @@ cost_index(Path *path, Query *root, * Call index-access-method-specific code to estimate the processing * cost for scanning the index, as well as the selectivity of the * index (ie, the fraction of main-table tuples we will have to - * retrieve). + * retrieve) and its correlation to the main-table tuple order. */ - OidFunctionCall7(index->amcostestimate, + OidFunctionCall8(index->amcostestimate, PointerGetDatum(root), PointerGetDatum(baserel), PointerGetDatum(index), PointerGetDatum(indexQuals), PointerGetDatum(&indexStartupCost), PointerGetDatum(&indexTotalCost), - PointerGetDatum(&indexSelectivity)); + PointerGetDatum(&indexSelectivity), + PointerGetDatum(&indexCorrelation)); /* all costs for touching index itself included here */ startup_cost += indexStartupCost; run_cost += indexTotalCost - indexStartupCost; - /* + /*---------- * Estimate number of main-table tuples and pages fetched. * - * If the number of tuples is much smaller than the number of pages in - * the relation, each tuple will cost a separate nonsequential fetch. - * If it is comparable or larger, then probably we will be able to - * avoid some fetches. We use a growth rate of log(#tuples/#pages + - * 1) --- probably totally bogus, but intuitively it gives the right - * shape of curve at least. + * When the index ordering is uncorrelated with the table ordering, + * we use an approximation proposed by Mackert and Lohman, "Index Scans + * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions + * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424. + * The Mackert and Lohman approximation is that the number of pages + * fetched is + * PF = + * min(2TNs/(2T+Ns), T) when T <= b + * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b) + * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b) + * where + * T = # pages in table + * N = # tuples in table + * s = selectivity = fraction of table to be scanned + * b = # buffer pages available (we include kernel space here) * - * XXX if the relation has recently been "clustered" using this index, - * then in fact the target tuples will be highly nonuniformly - * distributed, and we will be seriously overestimating the scan cost! - * Currently we have no way to know whether the relation has been - * clustered, nor how much it's been modified since the last - * clustering, so we ignore this effect. Would be nice to do better - * someday. + * When the index ordering is exactly correlated with the table ordering + * (just after a CLUSTER, for example), the number of pages fetched should + * be just sT. What's more, these will be sequential fetches, not the + * random fetches that occur in the uncorrelated case. So, depending on + * the extent of correlation, we should estimate the actual I/O cost + * somewhere between s * T * 1.0 and PF * random_cost. We currently + * interpolate linearly between these two endpoints based on the + * correlation squared (XXX is that appropriate?). + * + * In any case the number of tuples fetched is Ns. + *---------- */ tuples_fetched = indexSelectivity * baserel->tuples; @@ -263,24 +284,56 @@ cost_index(Path *path, Query *root, if (tuples_fetched < 1.0) tuples_fetched = 1.0; - if (baserel->pages > 0) - pages_fetched = ceil(baserel->pages * - log(tuples_fetched / baserel->pages + 1.0)); + /* This part is the Mackert and Lohman formula */ + + T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; + b = (effective_cache_size > 1) ? effective_cache_size : 1.0; + + if (T <= b) + { + pages_fetched = + (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + if (pages_fetched > T) + pages_fetched = T; + } else - pages_fetched = tuples_fetched; + { + double lim; + + lim = (2.0 * T * b) / (2.0 * T - b); + if (tuples_fetched <= lim) + { + pages_fetched = + (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + } + else + { + pages_fetched = + b + (tuples_fetched - lim) * (T - b) / T; + } + } /* - * Now estimate one nonsequential access per page fetched, plus - * appropriate CPU costs per tuple. + * min_IO_cost corresponds to the perfectly correlated case (csquared=1), + * max_IO_cost to the perfectly uncorrelated case (csquared=0). Note + * that we just charge random_page_cost per page in the uncorrelated + * case, rather than using cost_nonsequential_access, since we've already + * accounted for caching effects by using the Mackert model. */ + min_IO_cost = ceil(indexSelectivity * T); + max_IO_cost = pages_fetched * random_page_cost; - /* disk costs for main table */ - run_cost += pages_fetched * cost_nonsequential_access(baserel->pages); + /* + * Now interpolate based on estimated index order correlation + * to get total disk I/O cost for main table accesses. + */ + csquared = indexCorrelation * indexCorrelation; - /* CPU costs */ - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; + run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); /* + * Estimate CPU costs per tuple. + * * Normally the indexquals will be removed from the list of * restriction clauses that we have to evaluate as qpquals, so we * should subtract their costs from baserestrictcost. For a lossy @@ -290,6 +343,8 @@ cost_index(Path *path, Query *root, * Rather than work out exactly how much to subtract, we don't * subtract anything in that case either. */ + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; + if (!index->lossy && !is_injoin) cpu_per_tuple -= cost_qual_eval(indexQuals); diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 30f02de5c72..9b620c80f5a 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.31 2001/04/18 20:42:55 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.32 2001/05/09 23:13:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,8 +28,9 @@ typedef struct typedef struct { int varno; + int varattno; int sublevels_up; -} contain_whole_tuple_var_context; +} contain_var_reference_context; typedef struct { @@ -39,8 +40,8 @@ typedef struct static bool pull_varnos_walker(Node *node, pull_varnos_context *context); -static bool contain_whole_tuple_var_walker(Node *node, - contain_whole_tuple_var_context *context); +static bool contain_var_reference_walker(Node *node, + contain_var_reference_context *context); static bool contain_var_clause_walker(Node *node, void *context); static bool pull_var_clause_walker(Node *node, pull_var_clause_context *context); @@ -129,10 +130,10 @@ pull_varnos_walker(Node *node, pull_varnos_context *context) /* - * contain_whole_tuple_var + * contain_var_reference * - * Detect whether a parsetree contains any references to the whole - * tuple of a given rtable entry (ie, a Var with varattno = 0). + * Detect whether a parsetree contains any references to a specified + * attribute of a specified rtable entry. * * NOTE: this is used on not-yet-planned expressions. It may therefore find * bare SubLinks, and if so it needs to recurse into them to look for uplevel @@ -140,11 +141,12 @@ pull_varnos_walker(Node *node, pull_varnos_context *context) * SubPlan, we only need to look at the parameters passed to the subplan. */ bool -contain_whole_tuple_var(Node *node, int varno, int levelsup) +contain_var_reference(Node *node, int varno, int varattno, int levelsup) { - contain_whole_tuple_var_context context; + contain_var_reference_context context; context.varno = varno; + context.varattno = varattno; context.sublevels_up = levelsup; /* @@ -154,15 +156,15 @@ contain_whole_tuple_var(Node *node, int varno, int levelsup) */ if (node && IsA(node, Query)) return query_tree_walker((Query *) node, - contain_whole_tuple_var_walker, + contain_var_reference_walker, (void *) &context, true); else - return contain_whole_tuple_var_walker(node, &context); + return contain_var_reference_walker(node, &context); } static bool -contain_whole_tuple_var_walker(Node *node, - contain_whole_tuple_var_context *context) +contain_var_reference_walker(Node *node, + contain_var_reference_context *context) { if (node == NULL) return false; @@ -171,8 +173,8 @@ contain_whole_tuple_var_walker(Node *node, Var *var = (Var *) node; if (var->varno == context->varno && - var->varlevelsup == context->sublevels_up && - var->varattno == InvalidAttrNumber) + var->varattno == context->varattno && + var->varlevelsup == context->sublevels_up) return true; return false; } @@ -187,11 +189,11 @@ contain_whole_tuple_var_walker(Node *node, */ Expr *expr = (Expr *) node; - if (contain_whole_tuple_var_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper, - context)) + if (contain_var_reference_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper, + context)) return true; - if (contain_whole_tuple_var_walker((Node *) expr->args, - context)) + if (contain_var_reference_walker((Node *) expr->args, + context)) return true; return false; } @@ -202,17 +204,30 @@ contain_whole_tuple_var_walker(Node *node, context->sublevels_up++; result = query_tree_walker((Query *) node, - contain_whole_tuple_var_walker, + contain_var_reference_walker, (void *) context, true); context->sublevels_up--; return result; } - return expression_tree_walker(node, contain_whole_tuple_var_walker, + return expression_tree_walker(node, contain_var_reference_walker, (void *) context); } /* + * contain_whole_tuple_var + * + * Detect whether a parsetree contains any references to the whole + * tuple of a given rtable entry (ie, a Var with varattno = 0). + */ +bool +contain_whole_tuple_var(Node *node, int varno, int levelsup) +{ + return contain_var_reference(node, varno, InvalidAttrNumber, levelsup); +} + + +/* * contain_var_clause * Recursively scan a clause to discover whether it contains any Var nodes * (of the current query level). |