aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/costsize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r--src/backend/optimizer/path/costsize.c118
1 files changed, 85 insertions, 33 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index d3455228660..c4404b1bd17 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1335,25 +1335,40 @@ cost_material(Path *path,
* Determines and returns the cost of performing an Agg plan node,
* including the cost of its input.
*
+ * aggcosts can be NULL when there are no actual aggregate functions (i.e.,
+ * we are using a hashed Agg node just to do grouping).
+ *
* Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
* are for appropriately-sorted input.
*/
void
cost_agg(Path *path, PlannerInfo *root,
- AggStrategy aggstrategy, int numAggs,
+ AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
int numGroupCols, double numGroups,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples)
{
Cost startup_cost;
Cost total_cost;
+ AggClauseCosts dummy_aggcosts;
+
+ /* Use all-zero per-aggregate costs if NULL is passed */
+ if (aggcosts == NULL)
+ {
+ Assert(aggstrategy == AGG_HASHED);
+ MemSet(&dummy_aggcosts, 0, sizeof(AggClauseCosts));
+ aggcosts = &dummy_aggcosts;
+ }
/*
- * We charge one cpu_operator_cost per aggregate function per input tuple,
- * and another one per output tuple (corresponding to transfn and finalfn
- * calls respectively). If we are grouping, we charge an additional
- * cpu_operator_cost per grouping column per input tuple for grouping
- * comparisons.
+ * The transCost.per_tuple component of aggcosts should be charged once
+ * per input tuple, corresponding to the costs of evaluating the aggregate
+ * transfns and their input expressions (with any startup cost of course
+ * charged but once). The finalCost component is charged once per output
+ * tuple, corresponding to the costs of evaluating the finalfns.
+ *
+ * If we are grouping, we charge an additional cpu_operator_cost per
+ * grouping column per input tuple for grouping comparisons.
*
* We will produce a single output tuple if not grouping, and a tuple per
* group otherwise. We charge cpu_tuple_cost for each output tuple.
@@ -1366,15 +1381,13 @@ cost_agg(Path *path, PlannerInfo *root,
* there's roundoff error we might do the wrong thing. So be sure that
* the computations below form the same intermediate values in the same
* order.
- *
- * Note: ideally we should use the pg_proc.procost costs of each
- * aggregate's component functions, but for now that seems like an
- * excessive amount of work.
*/
if (aggstrategy == AGG_PLAIN)
{
startup_cost = input_total_cost;
- startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
+ startup_cost += aggcosts->transCost.startup;
+ startup_cost += aggcosts->transCost.per_tuple * input_tuples;
+ startup_cost += aggcosts->finalCost;
/* we aren't grouping */
total_cost = startup_cost + cpu_tuple_cost;
}
@@ -1384,19 +1397,21 @@ cost_agg(Path *path, PlannerInfo *root,
startup_cost = input_startup_cost;
total_cost = input_total_cost;
/* calcs phrased this way to match HASHED case, see note above */
- total_cost += cpu_operator_cost * input_tuples * numGroupCols;
- total_cost += cpu_operator_cost * input_tuples * numAggs;
- total_cost += cpu_operator_cost * numGroups * numAggs;
+ total_cost += aggcosts->transCost.startup;
+ total_cost += aggcosts->transCost.per_tuple * input_tuples;
+ total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
+ total_cost += aggcosts->finalCost * numGroups;
total_cost += cpu_tuple_cost * numGroups;
}
else
{
/* must be AGG_HASHED */
startup_cost = input_total_cost;
- startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
- startup_cost += cpu_operator_cost * input_tuples * numAggs;
+ startup_cost += aggcosts->transCost.startup;
+ startup_cost += aggcosts->transCost.per_tuple * input_tuples;
+ startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
total_cost = startup_cost;
- total_cost += cpu_operator_cost * numGroups * numAggs;
+ total_cost += aggcosts->finalCost * numGroups;
total_cost += cpu_tuple_cost * numGroups;
}
@@ -1413,25 +1428,53 @@ cost_agg(Path *path, PlannerInfo *root,
*/
void
cost_windowagg(Path *path, PlannerInfo *root,
- int numWindowFuncs, int numPartCols, int numOrderCols,
+ List *windowFuncs, int numPartCols, int numOrderCols,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples)
{
Cost startup_cost;
Cost total_cost;
+ ListCell *lc;
startup_cost = input_startup_cost;
total_cost = input_total_cost;
/*
- * We charge one cpu_operator_cost per window function per tuple (often a
- * drastic underestimate, but without a way to gauge how many tuples the
- * window function will fetch, it's hard to do better). We also charge
- * cpu_operator_cost per grouping column per tuple for grouping
- * comparisons, plus cpu_tuple_cost per tuple for general overhead.
- */
- total_cost += cpu_operator_cost * input_tuples * numWindowFuncs;
- total_cost += cpu_operator_cost * input_tuples * (numPartCols + numOrderCols);
+ * Window functions are assumed to cost their stated execution cost, plus
+ * the cost of evaluating their input expressions, per tuple. Since they
+ * may in fact evaluate their inputs at multiple rows during each cycle,
+ * this could be a drastic underestimate; but without a way to know how
+ * many rows the window function will fetch, it's hard to do better. In
+ * any case, it's a good estimate for all the built-in window functions,
+ * so we'll just do this for now.
+ */
+ foreach(lc, windowFuncs)
+ {
+ WindowFunc *wfunc = (WindowFunc *) lfirst(lc);
+ Cost wfunccost;
+ QualCost argcosts;
+
+ Assert(IsA(wfunc, WindowFunc));
+
+ wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost;
+
+ /* also add the input expressions' cost to per-input-row costs */
+ cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
+ startup_cost += argcosts.startup;
+ wfunccost += argcosts.per_tuple;
+
+ total_cost += wfunccost * input_tuples;
+ }
+
+ /*
+ * We also charge cpu_operator_cost per grouping column per tuple for
+ * grouping comparisons, plus cpu_tuple_cost per tuple for general
+ * overhead.
+ *
+ * XXX this neglects costs of spooling the data to disk when it overflows
+ * work_mem. Sooner or later that should get accounted for.
+ */
+ total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples;
total_cost += cpu_tuple_cost * input_tuples;
path->startup_cost = startup_cost;
@@ -2640,17 +2683,12 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
* Vars and Consts are charged zero, and so are boolean operators (AND,
* OR, NOT). Simplistic, but a lot better than no model at all.
*
- * Note that Aggref and WindowFunc nodes are (and should be) treated like
- * Vars --- whatever execution cost they have is absorbed into
- * plan-node-specific costing. As far as expression evaluation is
- * concerned they're just like Vars.
- *
* Should we try to account for the possibility of short-circuit
* evaluation of AND/OR? Probably *not*, because that would make the
* results depend on the clause ordering, and we are not in any position
* to expect that the current ordering of the clauses is the one that's
- * going to end up being used. (Is it worth applying order_qual_clauses
- * much earlier in the planning process to fix this?)
+ * going to end up being used. The above per-RestrictInfo caching would
+ * not mix well with trying to re-order clauses anyway.
*/
if (IsA(node, FuncExpr))
{
@@ -2679,6 +2717,20 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
context->total.per_tuple += get_func_cost(saop->opfuncid) *
cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
}
+ else if (IsA(node, Aggref) ||
+ IsA(node, WindowFunc))
+ {
+ /*
+ * Aggref and WindowFunc nodes are (and should be) treated like Vars,
+ * ie, zero execution cost in the current model, because they behave
+ * essentially like Vars in execQual.c. We disregard the costs of
+ * their input expressions for the same reason. The actual execution
+ * costs of the aggregate/window functions and their arguments have to
+ * be factored into plan-node-specific costing of the Agg or WindowAgg
+ * plan node.
+ */
+ return false; /* don't recurse into children */
+ }
else if (IsA(node, CoerceViaIO))
{
CoerceViaIO *iocoerce = (CoerceViaIO *) node;