diff options
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 118 |
1 files changed, 85 insertions, 33 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d3455228660..c4404b1bd17 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1335,25 +1335,40 @@ cost_material(Path *path, * Determines and returns the cost of performing an Agg plan node, * including the cost of its input. * + * aggcosts can be NULL when there are no actual aggregate functions (i.e., + * we are using a hashed Agg node just to do grouping). + * * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs * are for appropriately-sorted input. */ void cost_agg(Path *path, PlannerInfo *root, - AggStrategy aggstrategy, int numAggs, + AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, double numGroups, Cost input_startup_cost, Cost input_total_cost, double input_tuples) { Cost startup_cost; Cost total_cost; + AggClauseCosts dummy_aggcosts; + + /* Use all-zero per-aggregate costs if NULL is passed */ + if (aggcosts == NULL) + { + Assert(aggstrategy == AGG_HASHED); + MemSet(&dummy_aggcosts, 0, sizeof(AggClauseCosts)); + aggcosts = &dummy_aggcosts; + } /* - * We charge one cpu_operator_cost per aggregate function per input tuple, - * and another one per output tuple (corresponding to transfn and finalfn - * calls respectively). If we are grouping, we charge an additional - * cpu_operator_cost per grouping column per input tuple for grouping - * comparisons. + * The transCost.per_tuple component of aggcosts should be charged once + * per input tuple, corresponding to the costs of evaluating the aggregate + * transfns and their input expressions (with any startup cost of course + * charged but once). The finalCost component is charged once per output + * tuple, corresponding to the costs of evaluating the finalfns. + * + * If we are grouping, we charge an additional cpu_operator_cost per + * grouping column per input tuple for grouping comparisons. * * We will produce a single output tuple if not grouping, and a tuple per * group otherwise. We charge cpu_tuple_cost for each output tuple. @@ -1366,15 +1381,13 @@ cost_agg(Path *path, PlannerInfo *root, * there's roundoff error we might do the wrong thing. So be sure that * the computations below form the same intermediate values in the same * order. - * - * Note: ideally we should use the pg_proc.procost costs of each - * aggregate's component functions, but for now that seems like an - * excessive amount of work. */ if (aggstrategy == AGG_PLAIN) { startup_cost = input_total_cost; - startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs; + startup_cost += aggcosts->transCost.startup; + startup_cost += aggcosts->transCost.per_tuple * input_tuples; + startup_cost += aggcosts->finalCost; /* we aren't grouping */ total_cost = startup_cost + cpu_tuple_cost; } @@ -1384,19 +1397,21 @@ cost_agg(Path *path, PlannerInfo *root, startup_cost = input_startup_cost; total_cost = input_total_cost; /* calcs phrased this way to match HASHED case, see note above */ - total_cost += cpu_operator_cost * input_tuples * numGroupCols; - total_cost += cpu_operator_cost * input_tuples * numAggs; - total_cost += cpu_operator_cost * numGroups * numAggs; + total_cost += aggcosts->transCost.startup; + total_cost += aggcosts->transCost.per_tuple * input_tuples; + total_cost += (cpu_operator_cost * numGroupCols) * input_tuples; + total_cost += aggcosts->finalCost * numGroups; total_cost += cpu_tuple_cost * numGroups; } else { /* must be AGG_HASHED */ startup_cost = input_total_cost; - startup_cost += cpu_operator_cost * input_tuples * numGroupCols; - startup_cost += cpu_operator_cost * input_tuples * numAggs; + startup_cost += aggcosts->transCost.startup; + startup_cost += aggcosts->transCost.per_tuple * input_tuples; + startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples; total_cost = startup_cost; - total_cost += cpu_operator_cost * numGroups * numAggs; + total_cost += aggcosts->finalCost * numGroups; total_cost += cpu_tuple_cost * numGroups; } @@ -1413,25 +1428,53 @@ cost_agg(Path *path, PlannerInfo *root, */ void cost_windowagg(Path *path, PlannerInfo *root, - int numWindowFuncs, int numPartCols, int numOrderCols, + List *windowFuncs, int numPartCols, int numOrderCols, Cost input_startup_cost, Cost input_total_cost, double input_tuples) { Cost startup_cost; Cost total_cost; + ListCell *lc; startup_cost = input_startup_cost; total_cost = input_total_cost; /* - * We charge one cpu_operator_cost per window function per tuple (often a - * drastic underestimate, but without a way to gauge how many tuples the - * window function will fetch, it's hard to do better). We also charge - * cpu_operator_cost per grouping column per tuple for grouping - * comparisons, plus cpu_tuple_cost per tuple for general overhead. - */ - total_cost += cpu_operator_cost * input_tuples * numWindowFuncs; - total_cost += cpu_operator_cost * input_tuples * (numPartCols + numOrderCols); + * Window functions are assumed to cost their stated execution cost, plus + * the cost of evaluating their input expressions, per tuple. Since they + * may in fact evaluate their inputs at multiple rows during each cycle, + * this could be a drastic underestimate; but without a way to know how + * many rows the window function will fetch, it's hard to do better. In + * any case, it's a good estimate for all the built-in window functions, + * so we'll just do this for now. + */ + foreach(lc, windowFuncs) + { + WindowFunc *wfunc = (WindowFunc *) lfirst(lc); + Cost wfunccost; + QualCost argcosts; + + Assert(IsA(wfunc, WindowFunc)); + + wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost; + + /* also add the input expressions' cost to per-input-row costs */ + cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root); + startup_cost += argcosts.startup; + wfunccost += argcosts.per_tuple; + + total_cost += wfunccost * input_tuples; + } + + /* + * We also charge cpu_operator_cost per grouping column per tuple for + * grouping comparisons, plus cpu_tuple_cost per tuple for general + * overhead. + * + * XXX this neglects costs of spooling the data to disk when it overflows + * work_mem. Sooner or later that should get accounted for. + */ + total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples; total_cost += cpu_tuple_cost * input_tuples; path->startup_cost = startup_cost; @@ -2640,17 +2683,12 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) * Vars and Consts are charged zero, and so are boolean operators (AND, * OR, NOT). Simplistic, but a lot better than no model at all. * - * Note that Aggref and WindowFunc nodes are (and should be) treated like - * Vars --- whatever execution cost they have is absorbed into - * plan-node-specific costing. As far as expression evaluation is - * concerned they're just like Vars. - * * Should we try to account for the possibility of short-circuit * evaluation of AND/OR? Probably *not*, because that would make the * results depend on the clause ordering, and we are not in any position * to expect that the current ordering of the clauses is the one that's - * going to end up being used. (Is it worth applying order_qual_clauses - * much earlier in the planning process to fix this?) + * going to end up being used. The above per-RestrictInfo caching would + * not mix well with trying to re-order clauses anyway. */ if (IsA(node, FuncExpr)) { @@ -2679,6 +2717,20 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) context->total.per_tuple += get_func_cost(saop->opfuncid) * cpu_operator_cost * estimate_array_length(arraynode) * 0.5; } + else if (IsA(node, Aggref) || + IsA(node, WindowFunc)) + { + /* + * Aggref and WindowFunc nodes are (and should be) treated like Vars, + * ie, zero execution cost in the current model, because they behave + * essentially like Vars in execQual.c. We disregard the costs of + * their input expressions for the same reason. The actual execution + * costs of the aggregate/window functions and their arguments have to + * be factored into plan-node-specific costing of the Agg or WindowAgg + * plan node. + */ + return false; /* don't recurse into children */ + } else if (IsA(node, CoerceViaIO)) { CoerceViaIO *iocoerce = (CoerceViaIO *) node; |