aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/costsize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r--src/backend/optimizer/path/costsize.c470
1 files changed, 227 insertions, 243 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index bb506678ce4..8a1df9e0a2d 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -49,7 +49,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.148 2005/10/05 17:19:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.149 2005/10/15 02:49:19 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -121,8 +121,8 @@ clamp_row_est(double nrows)
{
/*
* Force estimate to be at least one row, to make explain output look
- * better and to avoid possible divide-by-zero when interpolating
- * costs. Make it an integer, too.
+ * better and to avoid possible divide-by-zero when interpolating costs.
+ * Make it an integer, too.
*/
if (nrows < 1.0)
nrows = 1.0;
@@ -155,12 +155,11 @@ cost_seqscan(Path *path, PlannerInfo *root,
/*
* disk costs
*
- * The cost of reading a page sequentially is 1.0, by definition. Note
- * that the Unix kernel will typically do some amount of read-ahead
- * optimization, so that this cost is less than the true cost of
- * reading a page from disk. We ignore that issue here, but must take
- * it into account when estimating the cost of non-sequential
- * accesses!
+ * The cost of reading a page sequentially is 1.0, by definition. Note that
+ * the Unix kernel will typically do some amount of read-ahead
+ * optimization, so that this cost is less than the true cost of reading a
+ * page from disk. We ignore that issue here, but must take it into
+ * account when estimating the cost of non-sequential accesses!
*/
run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
@@ -276,10 +275,10 @@ cost_index(IndexPath *path, PlannerInfo *root,
startup_cost += disable_cost;
/*
- * Call index-access-method-specific code to estimate the processing
- * cost for scanning the index, as well as the selectivity of the
- * index (ie, the fraction of main-table tuples we will have to
- * retrieve) and its correlation to the main-table tuple order.
+ * Call index-access-method-specific code to estimate the processing cost
+ * for scanning the index, as well as the selectivity of the index (ie,
+ * the fraction of main-table tuples we will have to retrieve) and its
+ * correlation to the main-table tuple order.
*/
OidFunctionCall7(index->amcostestimate,
PointerGetDatum(root),
@@ -292,8 +291,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
/*
* Save amcostestimate's results for possible use in bitmap scan planning.
- * We don't bother to save indexStartupCost or indexCorrelation, because
- * a bitmap scan doesn't care about either.
+ * We don't bother to save indexStartupCost or indexCorrelation, because a
+ * bitmap scan doesn't care about either.
*/
path->indextotalcost = indexTotalCost;
path->indexselectivity = indexSelectivity;
@@ -366,19 +365,18 @@ cost_index(IndexPath *path, PlannerInfo *root,
}
/*
- * min_IO_cost corresponds to the perfectly correlated case
- * (csquared=1), max_IO_cost to the perfectly uncorrelated case
- * (csquared=0). Note that we just charge random_page_cost per page
- * in the uncorrelated case, rather than using
- * cost_nonsequential_access, since we've already accounted for
- * caching effects by using the Mackert model.
+ * min_IO_cost corresponds to the perfectly correlated case (csquared=1),
+ * max_IO_cost to the perfectly uncorrelated case (csquared=0). Note that
+ * we just charge random_page_cost per page in the uncorrelated case,
+ * rather than using cost_nonsequential_access, since we've already
+ * accounted for caching effects by using the Mackert model.
*/
min_IO_cost = ceil(indexSelectivity * T);
max_IO_cost = pages_fetched * random_page_cost;
/*
- * Now interpolate based on estimated index order correlation to get
- * total disk I/O cost for main table accesses.
+ * Now interpolate based on estimated index order correlation to get total
+ * disk I/O cost for main table accesses.
*/
csquared = indexCorrelation * indexCorrelation;
@@ -390,9 +388,9 @@ cost_index(IndexPath *path, PlannerInfo *root,
* Normally the indexquals will be removed from the list of restriction
* clauses that we have to evaluate as qpquals, so we should subtract
* their costs from baserestrictcost. But if we are doing a join then
- * some of the indexquals are join clauses and shouldn't be
- * subtracted. Rather than work out exactly how much to subtract, we
- * don't subtract anything.
+ * some of the indexquals are join clauses and shouldn't be subtracted.
+ * Rather than work out exactly how much to subtract, we don't subtract
+ * anything.
*/
startup_cost += baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
@@ -467,9 +465,9 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
/*
* For small numbers of pages we should charge random_page_cost apiece,
* while if nearly all the table's pages are being read, it's more
- * appropriate to charge 1.0 apiece. The effect is nonlinear, too.
- * For lack of a better idea, interpolate like this to determine the
- * cost per page.
+ * appropriate to charge 1.0 apiece. The effect is nonlinear, too. For
+ * lack of a better idea, interpolate like this to determine the cost per
+ * page.
*/
if (pages_fetched >= 2.0)
cost_per_page = random_page_cost -
@@ -482,10 +480,10 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
/*
* Estimate CPU costs per tuple.
*
- * Often the indexquals don't need to be rechecked at each tuple ...
- * but not always, especially not if there are enough tuples involved
- * that the bitmaps become lossy. For the moment, just assume they
- * will be rechecked always.
+ * Often the indexquals don't need to be rechecked at each tuple ... but not
+ * always, especially not if there are enough tuples involved that the
+ * bitmaps become lossy. For the moment, just assume they will be
+ * rechecked always.
*/
startup_cost += baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
@@ -527,7 +525,7 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
* Estimate the cost of a BitmapAnd node
*
* Note that this considers only the costs of index scanning and bitmap
- * creation, not the eventual heap access. In that sense the object isn't
+ * creation, not the eventual heap access. In that sense the object isn't
* truly a Path, but it has enough path-like properties (costs in particular)
* to warrant treating it as one.
*/
@@ -535,24 +533,24 @@ void
cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
{
Cost totalCost;
- Selectivity selec;
+ Selectivity selec;
ListCell *l;
/*
- * We estimate AND selectivity on the assumption that the inputs
- * are independent. This is probably often wrong, but we don't
- * have the info to do better.
+ * We estimate AND selectivity on the assumption that the inputs are
+ * independent. This is probably often wrong, but we don't have the info
+ * to do better.
*
* The runtime cost of the BitmapAnd itself is estimated at 100x
- * cpu_operator_cost for each tbm_intersect needed. Probably too
- * small, definitely too simplistic?
+ * cpu_operator_cost for each tbm_intersect needed. Probably too small,
+ * definitely too simplistic?
*/
totalCost = 0.0;
selec = 1.0;
foreach(l, path->bitmapquals)
{
- Path *subpath = (Path *) lfirst(l);
- Cost subCost;
+ Path *subpath = (Path *) lfirst(l);
+ Cost subCost;
Selectivity subselec;
cost_bitmap_tree_node(subpath, &subCost, &subselec);
@@ -578,25 +576,25 @@ void
cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
{
Cost totalCost;
- Selectivity selec;
+ Selectivity selec;
ListCell *l;
/*
- * We estimate OR selectivity on the assumption that the inputs
- * are non-overlapping, since that's often the case in "x IN (list)"
- * type situations. Of course, we clamp to 1.0 at the end.
+ * We estimate OR selectivity on the assumption that the inputs are
+ * non-overlapping, since that's often the case in "x IN (list)" type
+ * situations. Of course, we clamp to 1.0 at the end.
*
* The runtime cost of the BitmapOr itself is estimated at 100x
- * cpu_operator_cost for each tbm_union needed. Probably too
- * small, definitely too simplistic? We are aware that the tbm_unions
- * are optimized out when the inputs are BitmapIndexScans.
+ * cpu_operator_cost for each tbm_union needed. Probably too small,
+ * definitely too simplistic? We are aware that the tbm_unions are
+ * optimized out when the inputs are BitmapIndexScans.
*/
totalCost = 0.0;
selec = 0.0;
foreach(l, path->bitmapquals)
{
- Path *subpath = (Path *) lfirst(l);
- Cost subCost;
+ Path *subpath = (Path *) lfirst(l);
+ Cost subCost;
Selectivity subselec;
cost_bitmap_tree_node(subpath, &subCost, &subselec);
@@ -661,10 +659,9 @@ cost_subqueryscan(Path *path, RelOptInfo *baserel)
Assert(baserel->rtekind == RTE_SUBQUERY);
/*
- * Cost of path is cost of evaluating the subplan, plus cost of
- * evaluating any restriction clauses that will be attached to the
- * SubqueryScan node, plus cpu_tuple_cost to account for selection and
- * projection overhead.
+ * Cost of path is cost of evaluating the subplan, plus cost of evaluating
+ * any restriction clauses that will be attached to the SubqueryScan node,
+ * plus cpu_tuple_cost to account for selection and projection overhead.
*/
path->startup_cost = baserel->subplan->startup_cost;
path->total_cost = baserel->subplan->total_cost;
@@ -694,8 +691,8 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
/*
* For now, estimate function's cost at one operator eval per function
- * call. Someday we should revive the function cost estimate columns
- * in pg_proc...
+ * call. Someday we should revive the function cost estimate columns in
+ * pg_proc...
*/
cpu_per_tuple = cpu_operator_cost;
@@ -758,9 +755,8 @@ cost_sort(Path *path, PlannerInfo *root,
startup_cost += disable_cost;
/*
- * We want to be sure the cost of a sort is never estimated as zero,
- * even if passed-in tuple count is zero. Besides, mustn't do
- * log(0)...
+ * We want to be sure the cost of a sort is never estimated as zero, even
+ * if passed-in tuple count is zero. Besides, mustn't do log(0)...
*/
if (tuples < 2.0)
tuples = 2.0;
@@ -790,8 +786,8 @@ cost_sort(Path *path, PlannerInfo *root,
}
/*
- * Also charge a small amount (arbitrarily set equal to operator cost)
- * per extracted tuple.
+ * Also charge a small amount (arbitrarily set equal to operator cost) per
+ * extracted tuple.
*/
run_cost += cpu_operator_cost * tuples;
@@ -828,17 +824,16 @@ cost_material(Path *path,
/*
* Charge a very small amount per inserted tuple, to reflect bookkeeping
- * costs. We use cpu_tuple_cost/10 for this. This is needed to break
- * the tie that would otherwise exist between nestloop with A outer,
+ * costs. We use cpu_tuple_cost/10 for this. This is needed to break the
+ * tie that would otherwise exist between nestloop with A outer,
* materialized B inner and nestloop with B outer, materialized A inner.
* The extra cost ensures we'll prefer materializing the smaller rel.
*/
startup_cost += cpu_tuple_cost * 0.1 * tuples;
/*
- * Also charge a small amount per extracted tuple. We use
- * cpu_tuple_cost so that it doesn't appear worthwhile to materialize
- * a bare seqscan.
+ * Also charge a small amount per extracted tuple. We use cpu_tuple_cost
+ * so that it doesn't appear worthwhile to materialize a bare seqscan.
*/
run_cost += cpu_tuple_cost * tuples;
@@ -865,23 +860,22 @@ cost_agg(Path *path, PlannerInfo *root,
Cost total_cost;
/*
- * We charge one cpu_operator_cost per aggregate function per input
- * tuple, and another one per output tuple (corresponding to transfn
- * and finalfn calls respectively). If we are grouping, we charge an
- * additional cpu_operator_cost per grouping column per input tuple
- * for grouping comparisons.
+ * We charge one cpu_operator_cost per aggregate function per input tuple,
+ * and another one per output tuple (corresponding to transfn and finalfn
+ * calls respectively). If we are grouping, we charge an additional
+ * cpu_operator_cost per grouping column per input tuple for grouping
+ * comparisons.
*
* We will produce a single output tuple if not grouping, and a tuple per
* group otherwise. We charge cpu_tuple_cost for each output tuple.
*
- * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
- * same total CPU cost, but AGG_SORTED has lower startup cost. If the
- * input path is already sorted appropriately, AGG_SORTED should be
- * preferred (since it has no risk of memory overflow). This will
- * happen as long as the computed total costs are indeed exactly equal
- * --- but if there's roundoff error we might do the wrong thing. So
- * be sure that the computations below form the same intermediate
- * values in the same order.
+ * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the same
+ * total CPU cost, but AGG_SORTED has lower startup cost. If the input
+ * path is already sorted appropriately, AGG_SORTED should be preferred
+ * (since it has no risk of memory overflow). This will happen as long as
+ * the computed total costs are indeed exactly equal --- but if there's
+ * roundoff error we might do the wrong thing. So be sure that the
+ * computations below form the same intermediate values in the same order.
*/
if (aggstrategy == AGG_PLAIN)
{
@@ -937,8 +931,8 @@ cost_group(Path *path, PlannerInfo *root,
total_cost = input_total_cost;
/*
- * Charge one cpu_operator_cost per comparison per input tuple. We
- * assume all columns get compared at most of the tuples.
+ * Charge one cpu_operator_cost per comparison per input tuple. We assume
+ * all columns get compared at most of the tuples.
*/
total_cost += cpu_operator_cost * input_tuples * numGroupCols;
@@ -968,10 +962,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
Selectivity joininfactor;
/*
- * If inner path is an indexscan, be sure to use its estimated output
- * row count, which may be lower than the restriction-clause-only row
- * count of its parent. (We don't include this case in the PATH_ROWS
- * macro because it applies *only* to a nestloop's inner relation.)
+ * If inner path is an indexscan, be sure to use its estimated output row
+ * count, which may be lower than the restriction-clause-only row count of
+ * its parent. (We don't include this case in the PATH_ROWS macro because
+ * it applies *only* to a nestloop's inner relation.)
*/
if (IsA(inner_path, IndexPath))
inner_path_rows = ((IndexPath *) inner_path)->rows;
@@ -982,11 +976,11 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
startup_cost += disable_cost;
/*
- * If we're doing JOIN_IN then we will stop scanning inner tuples for
- * an outer tuple as soon as we have one match. Account for the
- * effects of this by scaling down the cost estimates in proportion to
- * the JOIN_IN selectivity. (This assumes that all the quals attached
- * to the join are IN quals, which should be true.)
+ * If we're doing JOIN_IN then we will stop scanning inner tuples for an
+ * outer tuple as soon as we have one match. Account for the effects of
+ * this by scaling down the cost estimates in proportion to the JOIN_IN
+ * selectivity. (This assumes that all the quals attached to the join are
+ * IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(path, root);
@@ -996,9 +990,9 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
* NOTE: clearly, we must pay both outer and inner paths' startup_cost
* before we can start returning tuples, so the join's startup cost is
* their sum. What's not so clear is whether the inner path's
- * startup_cost must be paid again on each rescan of the inner path.
- * This is not true if the inner path is materialized or is a
- * hashjoin, but probably is true otherwise.
+ * startup_cost must be paid again on each rescan of the inner path. This
+ * is not true if the inner path is materialized or is a hashjoin, but
+ * probably is true otherwise.
*/
startup_cost += outer_path->startup_cost + inner_path->startup_cost;
run_cost += outer_path->total_cost - outer_path->startup_cost;
@@ -1077,12 +1071,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/*
* Compute cost and selectivity of the mergequals and qpquals (other
- * restriction clauses) separately. We use approx_selectivity here
- * for speed --- in most cases, any errors won't affect the result
- * much.
+ * restriction clauses) separately. We use approx_selectivity here for
+ * speed --- in most cases, any errors won't affect the result much.
*
- * Note: it's probably bogus to use the normal selectivity calculation
- * here when either the outer or inner path is a UniquePath.
+ * Note: it's probably bogus to use the normal selectivity calculation here
+ * when either the outer or inner path is a UniquePath.
*/
merge_selec = approx_selectivity(root, mergeclauses,
path->jpath.jointype);
@@ -1095,31 +1088,30 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);
/*
- * When there are equal merge keys in the outer relation, the
- * mergejoin must rescan any matching tuples in the inner relation.
- * This means re-fetching inner tuples. Our cost model for this is
- * that a re-fetch costs the same as an original fetch, which is
- * probably an overestimate; but on the other hand we ignore the
- * bookkeeping costs of mark/restore. Not clear if it's worth
- * developing a more refined model.
+ * When there are equal merge keys in the outer relation, the mergejoin
+ * must rescan any matching tuples in the inner relation. This means
+ * re-fetching inner tuples. Our cost model for this is that a re-fetch
+ * costs the same as an original fetch, which is probably an overestimate;
+ * but on the other hand we ignore the bookkeeping costs of mark/restore.
+ * Not clear if it's worth developing a more refined model.
*
- * The number of re-fetches can be estimated approximately as size of
- * merge join output minus size of inner relation. Assume that the
- * distinct key values are 1, 2, ..., and denote the number of values
- * of each key in the outer relation as m1, m2, ...; in the inner
- * relation, n1, n2, ... Then we have
+ * The number of re-fetches can be estimated approximately as size of merge
+ * join output minus size of inner relation. Assume that the distinct key
+ * values are 1, 2, ..., and denote the number of values of each key in
+ * the outer relation as m1, m2, ...; in the inner relation, n1, n2, ...
+ * Then we have
*
* size of join = m1 * n1 + m2 * n2 + ...
*
- * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
- * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
+ * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 * n1
+ * + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
* relation
*
- * This equation works correctly for outer tuples having no inner match
- * (nk = 0), but not for inner tuples having no outer match (mk = 0);
- * we are effectively subtracting those from the number of rescanned
- * tuples, when we should not. Can we do better without expensive
- * selectivity computations?
+ * This equation works correctly for outer tuples having no inner match (nk =
+ * 0), but not for inner tuples having no outer match (mk = 0); we are
+ * effectively subtracting those from the number of rescanned tuples, when
+ * we should not. Can we do better without expensive selectivity
+ * computations?
*/
if (IsA(outer_path, UniquePath))
rescannedtuples = 0;
@@ -1140,9 +1132,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* inputs that will actually need to be scanned. We use only the first
* (most significant) merge clause for this purpose.
*
- * Since this calculation is somewhat expensive, and will be the same for
- * all mergejoin paths associated with the merge clause, we cache the
- * results in the RestrictInfo node.
+ * Since this calculation is somewhat expensive, and will be the same for all
+ * mergejoin paths associated with the merge clause, we cache the results
+ * in the RestrictInfo node.
*/
if (mergeclauses && path->jpath.jointype != JOIN_FULL)
{
@@ -1181,9 +1173,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/*
* Readjust scan selectivities to account for above rounding. This is
- * normally an insignificant effect, but when there are only a few
- * rows in the inputs, failing to do this makes for a large percentage
- * error.
+ * normally an insignificant effect, but when there are only a few rows in
+ * the inputs, failing to do this makes for a large percentage error.
*/
outerscansel = outer_rows / outer_path_rows;
innerscansel = inner_rows / inner_path_rows;
@@ -1231,20 +1222,20 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/* CPU costs */
/*
- * If we're doing JOIN_IN then we will stop outputting inner tuples
- * for an outer tuple as soon as we have one match. Account for the
- * effects of this by scaling down the cost estimates in proportion to
- * the expected output size. (This assumes that all the quals
- * attached to the join are IN quals, which should be true.)
+ * If we're doing JOIN_IN then we will stop outputting inner tuples for an
+ * outer tuple as soon as we have one match. Account for the effects of
+ * this by scaling down the cost estimates in proportion to the expected
+ * output size. (This assumes that all the quals attached to the join are
+ * IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(&path->jpath, root);
/*
- * The number of tuple comparisons needed is approximately number of
- * outer rows plus number of inner rows plus number of rescanned
- * tuples (can we refine this?). At each one, we need to evaluate the
- * mergejoin quals. NOTE: JOIN_IN mode does not save any work here,
- * so do NOT include joininfactor.
+ * The number of tuple comparisons needed is approximately number of outer
+ * rows plus number of inner rows plus number of rescanned tuples (can we
+ * refine this?). At each one, we need to evaluate the mergejoin quals.
+ * NOTE: JOIN_IN mode does not save any work here, so do NOT include
+ * joininfactor.
*/
startup_cost += merge_qual_cost.startup;
run_cost += merge_qual_cost.per_tuple *
@@ -1253,9 +1244,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/*
* For each tuple that gets through the mergejoin proper, we charge
* cpu_tuple_cost plus the cost of evaluating additional restriction
- * clauses that are to be applied at the join. (This is pessimistic
- * since not all of the quals may get evaluated at each tuple.) This
- * work is skipped in JOIN_IN mode, so apply the factor.
+ * clauses that are to be applied at the join. (This is pessimistic since
+ * not all of the quals may get evaluated at each tuple.) This work is
+ * skipped in JOIN_IN mode, so apply the factor.
*/
startup_cost += qp_qual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
@@ -1290,9 +1281,9 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
double outer_path_rows = PATH_ROWS(outer_path);
double inner_path_rows = PATH_ROWS(inner_path);
double outerbytes = relation_byte_size(outer_path_rows,
- outer_path->parent->width);
+ outer_path->parent->width);
double innerbytes = relation_byte_size(inner_path_rows,
- inner_path->parent->width);
+ inner_path->parent->width);
int num_hashclauses = list_length(hashclauses);
int numbuckets;
int numbatches;
@@ -1306,12 +1297,11 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* Compute cost and selectivity of the hashquals and qpquals (other
- * restriction clauses) separately. We use approx_selectivity here
- * for speed --- in most cases, any errors won't affect the result
- * much.
+ * restriction clauses) separately. We use approx_selectivity here for
+ * speed --- in most cases, any errors won't affect the result much.
*
- * Note: it's probably bogus to use the normal selectivity calculation
- * here when either the outer or inner path is a UniquePath.
+ * Note: it's probably bogus to use the normal selectivity calculation here
+ * when either the outer or inner path is a UniquePath.
*/
hash_selec = approx_selectivity(root, hashclauses,
path->jpath.jointype);
@@ -1329,13 +1319,12 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
startup_cost += inner_path->total_cost;
/*
- * Cost of computing hash function: must do it once per input tuple.
- * We charge one cpu_operator_cost for each column's hash function.
+ * Cost of computing hash function: must do it once per input tuple. We
+ * charge one cpu_operator_cost for each column's hash function.
*
- * XXX when a hashclause is more complex than a single operator, we
- * really should charge the extra eval costs of the left or right
- * side, as appropriate, here. This seems more work than it's worth
- * at the moment.
+ * XXX when a hashclause is more complex than a single operator, we really
+ * should charge the extra eval costs of the left or right side, as
+ * appropriate, here. This seems more work than it's worth at the moment.
*/
startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows;
run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
@@ -1345,17 +1334,17 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
inner_path->parent->width,
&numbuckets,
&numbatches);
- virtualbuckets = (double) numbuckets * (double) numbatches;
+ virtualbuckets = (double) numbuckets *(double) numbatches;
/*
- * Determine bucketsize fraction for inner relation. We use the
- * smallest bucketsize estimated for any individual hashclause; this
- * is undoubtedly conservative.
+ * Determine bucketsize fraction for inner relation. We use the smallest
+ * bucketsize estimated for any individual hashclause; this is undoubtedly
+ * conservative.
*
- * BUT: if inner relation has been unique-ified, we can assume it's good
- * for hashing. This is important both because it's the right answer,
- * and because we avoid contaminating the cache with a value that's
- * wrong for non-unique-ified paths.
+ * BUT: if inner relation has been unique-ified, we can assume it's good for
+ * hashing. This is important both because it's the right answer, and
+ * because we avoid contaminating the cache with a value that's wrong for
+ * non-unique-ified paths.
*/
if (IsA(inner_path, UniquePath))
innerbucketsize = 1.0 / virtualbuckets;
@@ -1370,13 +1359,12 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
Assert(IsA(restrictinfo, RestrictInfo));
/*
- * First we have to figure out which side of the hashjoin
- * clause is the inner side.
+ * First we have to figure out which side of the hashjoin clause
+ * is the inner side.
*
* Since we tend to visit the same clauses over and over when
- * planning a large query, we cache the bucketsize estimate in
- * the RestrictInfo node to avoid repeated lookups of
- * statistics.
+ * planning a large query, we cache the bucketsize estimate in the
+ * RestrictInfo node to avoid repeated lookups of statistics.
*/
if (bms_is_subset(restrictinfo->right_relids,
inner_path->parent->relids))
@@ -1388,7 +1376,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/* not cached yet */
thisbucketsize =
estimate_hash_bucketsize(root,
- get_rightop(restrictinfo->clause),
+ get_rightop(restrictinfo->clause),
virtualbuckets);
restrictinfo->right_bucketsize = thisbucketsize;
}
@@ -1404,7 +1392,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/* not cached yet */
thisbucketsize =
estimate_hash_bucketsize(root,
- get_leftop(restrictinfo->clause),
+ get_leftop(restrictinfo->clause),
virtualbuckets);
restrictinfo->left_bucketsize = thisbucketsize;
}
@@ -1417,10 +1405,10 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* If inner relation is too big then we will need to "batch" the join,
- * which implies writing and reading most of the tuples to disk an
- * extra time. Charge one cost unit per page of I/O (correct since it
- * should be nice and sequential...). Writing the inner rel counts as
- * startup cost, all the rest as run cost.
+ * which implies writing and reading most of the tuples to disk an extra
+ * time. Charge one cost unit per page of I/O (correct since it should be
+ * nice and sequential...). Writing the inner rel counts as startup cost,
+ * all the rest as run cost.
*/
if (numbatches > 1)
{
@@ -1436,21 +1424,21 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/* CPU costs */
/*
- * If we're doing JOIN_IN then we will stop comparing inner tuples to
- * an outer tuple as soon as we have one match. Account for the
- * effects of this by scaling down the cost estimates in proportion to
- * the expected output size. (This assumes that all the quals
- * attached to the join are IN quals, which should be true.)
+ * If we're doing JOIN_IN then we will stop comparing inner tuples to an
+ * outer tuple as soon as we have one match. Account for the effects of
+ * this by scaling down the cost estimates in proportion to the expected
+ * output size. (This assumes that all the quals attached to the join are
+ * IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(&path->jpath, root);
/*
- * The number of tuple comparisons needed is the number of outer
- * tuples times the typical number of tuples in a hash bucket, which
- * is the inner relation size times its bucketsize fraction. At each
- * one, we need to evaluate the hashjoin quals. (Note: charging the
- * full qual eval cost at each tuple is pessimistic, since we don't
- * evaluate the quals unless the hash values match exactly.)
+ * The number of tuple comparisons needed is the number of outer tuples
+ * times the typical number of tuples in a hash bucket, which is the inner
+ * relation size times its bucketsize fraction. At each one, we need to
+ * evaluate the hashjoin quals. (Note: charging the full qual eval cost
+ * at each tuple is pessimistic, since we don't evaluate the quals unless
+ * the hash values match exactly.)
*/
startup_cost += hash_qual_cost.startup;
run_cost += hash_qual_cost.per_tuple *
@@ -1460,8 +1448,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* For each tuple that gets through the hashjoin proper, we charge
* cpu_tuple_cost plus the cost of evaluating additional restriction
- * clauses that are to be applied at the join. (This is pessimistic
- * since not all of the quals may get evaluated at each tuple.)
+ * clauses that are to be applied at the join. (This is pessimistic since
+ * not all of the quals may get evaluated at each tuple.)
*/
startup_cost += qp_qual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
@@ -1469,16 +1457,16 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* Bias against putting larger relation on inside. We don't want an
- * absolute prohibition, though, since larger relation might have
- * better bucketsize --- and we can't trust the size estimates
- * unreservedly, anyway. Instead, inflate the run cost by the square
- * root of the size ratio. (Why square root? No real good reason,
- * but it seems reasonable...)
+ * absolute prohibition, though, since larger relation might have better
+ * bucketsize --- and we can't trust the size estimates unreservedly,
+ * anyway. Instead, inflate the run cost by the square root of the size
+ * ratio. (Why square root? No real good reason, but it seems
+ * reasonable...)
*
* Note: before 7.4 we implemented this by inflating startup cost; but if
- * there's a disable_cost component in the input paths' startup cost,
- * that unfairly penalizes the hash. Probably it'd be better to keep
- * track of disable penalty separately from cost.
+ * there's a disable_cost component in the input paths' startup cost, that
+ * unfairly penalizes the hash. Probably it'd be better to keep track of
+ * disable penalty separately from cost.
*/
if (innerbytes > outerbytes && outerbytes > 0)
run_cost *= sqrt(innerbytes / outerbytes);
@@ -1545,13 +1533,13 @@ cost_qual_eval_walker(Node *node, QualCost *total)
return false;
/*
- * Our basic strategy is to charge one cpu_operator_cost for each
- * operator or function node in the given tree. Vars and Consts are
- * charged zero, and so are boolean operators (AND, OR, NOT).
- * Simplistic, but a lot better than no model at all.
+ * Our basic strategy is to charge one cpu_operator_cost for each operator
+ * or function node in the given tree. Vars and Consts are charged zero,
+ * and so are boolean operators (AND, OR, NOT). Simplistic, but a lot
+ * better than no model at all.
*
- * Should we try to account for the possibility of short-circuit
- * evaluation of AND/OR?
+ * Should we try to account for the possibility of short-circuit evaluation
+ * of AND/OR?
*/
if (IsA(node, FuncExpr) ||
IsA(node, OpExpr) ||
@@ -1572,12 +1560,12 @@ cost_qual_eval_walker(Node *node, QualCost *total)
{
/*
* A subplan node in an expression typically indicates that the
- * subplan will be executed on each evaluation, so charge
- * accordingly. (Sub-selects that can be executed as InitPlans
- * have already been removed from the expression.)
+ * subplan will be executed on each evaluation, so charge accordingly.
+ * (Sub-selects that can be executed as InitPlans have already been
+ * removed from the expression.)
*
- * An exception occurs when we have decided we can implement the
- * subplan by hashing.
+ * An exception occurs when we have decided we can implement the subplan
+ * by hashing.
*
*/
SubPlan *subplan = (SubPlan *) node;
@@ -1586,32 +1574,31 @@ cost_qual_eval_walker(Node *node, QualCost *total)
if (subplan->useHashTable)
{
/*
- * If we are using a hash table for the subquery outputs, then
- * the cost of evaluating the query is a one-time cost. We
- * charge one cpu_operator_cost per tuple for the work of
- * loading the hashtable, too.
+ * If we are using a hash table for the subquery outputs, then the
+ * cost of evaluating the query is a one-time cost. We charge one
+ * cpu_operator_cost per tuple for the work of loading the
+ * hashtable, too.
*/
total->startup += plan->total_cost +
cpu_operator_cost * plan->plan_rows;
/*
- * The per-tuple costs include the cost of evaluating the
- * lefthand expressions, plus the cost of probing the
- * hashtable. Recursion into the exprs list will handle the
- * lefthand expressions properly, and will count one
- * cpu_operator_cost for each comparison operator. That is
- * probably too low for the probing cost, but it's hard to
- * make a better estimate, so live with it for now.
+ * The per-tuple costs include the cost of evaluating the lefthand
+ * expressions, plus the cost of probing the hashtable. Recursion
+ * into the exprs list will handle the lefthand expressions
+ * properly, and will count one cpu_operator_cost for each
+ * comparison operator. That is probably too low for the probing
+ * cost, but it's hard to make a better estimate, so live with it
+ * for now.
*/
}
else
{
/*
* Otherwise we will be rescanning the subplan output on each
- * evaluation. We need to estimate how much of the output we
- * will actually need to scan. NOTE: this logic should agree
- * with the estimates used by make_subplan() in
- * plan/subselect.c.
+ * evaluation. We need to estimate how much of the output we will
+ * actually need to scan. NOTE: this logic should agree with the
+ * estimates used by make_subplan() in plan/subselect.c.
*/
Cost plan_run_cost = plan->total_cost - plan->startup_cost;
@@ -1636,10 +1623,10 @@ cost_qual_eval_walker(Node *node, QualCost *total)
/*
* Also account for subplan's startup cost. If the subplan is
- * uncorrelated or undirect correlated, AND its topmost node
- * is a Sort or Material node, assume that we'll only need to
- * pay its startup cost once; otherwise assume we pay the
- * startup cost every time.
+ * uncorrelated or undirect correlated, AND its topmost node is a
+ * Sort or Material node, assume that we'll only need to pay its
+ * startup cost once; otherwise assume we pay the startup cost
+ * every time.
*/
if (subplan->parParam == NIL &&
(IsA(plan, Sort) ||
@@ -1761,9 +1748,9 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
/*
* Compute joinclause selectivity. Note that we are only considering
- * clauses that become restriction clauses at this join level; we are
- * not double-counting them because they were not considered in
- * estimating the sizes of the component rels.
+ * clauses that become restriction clauses at this join level; we are not
+ * double-counting them because they were not considered in estimating the
+ * sizes of the component rels.
*/
selec = clauselist_selectivity(root,
restrictlist,
@@ -1773,13 +1760,13 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
/*
* Basically, we multiply size of Cartesian product by selectivity.
*
- * If we are doing an outer join, take that into account: the output must
- * be at least as large as the non-nullable input. (Is there any
- * chance of being even smarter?)
+ * If we are doing an outer join, take that into account: the output must be
+ * at least as large as the non-nullable input. (Is there any chance of
+ * being even smarter?)
*
- * For JOIN_IN and variants, the Cartesian product is figured with
- * respect to a unique-ified input, and then we can clamp to the size
- * of the other input.
+ * For JOIN_IN and variants, the Cartesian product is figured with respect to
+ * a unique-ified input, and then we can clamp to the size of the other
+ * input.
*/
switch (jointype)
{
@@ -1848,12 +1835,11 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
return 1.0;
/*
- * Return 1.0 if the inner side is already known unique. The case
- * where the inner path is already a UniquePath probably cannot happen
- * in current usage, but check it anyway for completeness. The
- * interesting case is where we've determined the inner relation
- * itself is unique, which we can check by looking at the rows
- * estimate for its UniquePath.
+ * Return 1.0 if the inner side is already known unique. The case where
+ * the inner path is already a UniquePath probably cannot happen in
+ * current usage, but check it anyway for completeness. The interesting
+ * case is where we've determined the inner relation itself is unique,
+ * which we can check by looking at the rows estimate for its UniquePath.
*/
if (IsA(path->innerjoinpath, UniquePath))
return 1.0;
@@ -1866,10 +1852,9 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
/*
* Compute same result set_joinrel_size_estimates would compute for
- * JOIN_INNER. Note that we use the input rels' absolute size
- * estimates, not PATH_ROWS() which might be less; if we used
- * PATH_ROWS() we'd be double-counting the effects of any join clauses
- * used in input scans.
+ * JOIN_INNER. Note that we use the input rels' absolute size estimates,
+ * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be
+ * double-counting the effects of any join clauses used in input scans.
*/
selec = clauselist_selectivity(root,
path->joinrestrictinfo,
@@ -1908,8 +1893,8 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
/*
* Estimate number of rows the function itself will return.
*
- * XXX no idea how to do this yet; but we can at least check whether
- * function returns set or not...
+ * XXX no idea how to do this yet; but we can at least check whether function
+ * returns set or not...
*/
if (expression_returns_set(rte->funcexpr))
rel->tuples = 1000;
@@ -1957,8 +1942,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
ndx = var->varattno - rel->min_attr;
/*
- * The width probably hasn't been cached yet, but may as well
- * check
+ * The width probably hasn't been cached yet, but may as well check
*/
if (rel->attr_widths[ndx] > 0)
{