diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2004-01-05 05:07:36 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2004-01-05 05:07:36 +0000 |
commit | 9091e8d1b233faf9994518fda7fcc171fddb53ac (patch) | |
tree | 572b200768bbfba3cfc121f1b4c12c79ab650d96 /src | |
parent | bf488a6842ef2bf43ab89337c8970971e84951da (diff) | |
download | postgresql-9091e8d1b233faf9994518fda7fcc171fddb53ac.tar.gz postgresql-9091e8d1b233faf9994518fda7fcc171fddb53ac.zip |
Add the ability to extract OR indexscan conditions from OR-of-AND
join conditions in which each OR subclause includes a constraint on
the same relation. This implements the other useful side-effect of
conversion to CNF format, without its unpleasant side-effects. As
per pghackers discussion of a few weeks ago.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/nodes/copyfuncs.c | 7 | ||||
-rw-r--r-- | src/backend/nodes/equalfuncs.c | 5 | ||||
-rw-r--r-- | src/backend/nodes/outfuncs.c | 9 | ||||
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 15 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 282 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 57 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 14 | ||||
-rw-r--r-- | src/backend/optimizer/path/orindxpath.c | 211 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 3 | ||||
-rw-r--r-- | src/backend/optimizer/plan/initsplan.c | 29 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 10 | ||||
-rw-r--r-- | src/backend/optimizer/util/restrictinfo.c | 44 | ||||
-rw-r--r-- | src/include/nodes/relation.h | 33 | ||||
-rw-r--r-- | src/include/optimizer/cost.h | 3 | ||||
-rw-r--r-- | src/include/optimizer/paths.h | 4 | ||||
-rw-r--r-- | src/include/optimizer/restrictinfo.h | 5 |
16 files changed, 437 insertions, 294 deletions
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 29f55e0abb8..75c2fa898d9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.272 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.273 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1168,8 +1168,9 @@ _copyRestrictInfo(RestrictInfo *from) RestrictInfo *newnode = makeNode(RestrictInfo); COPY_NODE_FIELD(clause); - COPY_SCALAR_FIELD(ispusheddown); - COPY_SCALAR_FIELD(canjoin); + COPY_SCALAR_FIELD(is_pushed_down); + COPY_SCALAR_FIELD(valid_everywhere); + COPY_SCALAR_FIELD(can_join); COPY_BITMAPSET_FIELD(clause_relids); COPY_BITMAPSET_FIELD(left_relids); COPY_BITMAPSET_FIELD(right_relids); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 5c7e5384402..41b292a47cc 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.211 2003/12/30 23:53:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.212 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -560,7 +560,8 @@ static bool _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b) { COMPARE_NODE_FIELD(clause); - COMPARE_SCALAR_FIELD(ispusheddown); + COMPARE_SCALAR_FIELD(is_pushed_down); + COMPARE_SCALAR_FIELD(valid_everywhere); /* * We ignore all the remaining fields, since they may not be set yet, diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 3b8613fd19c..e91e8e0d170 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.225 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -971,7 +971,7 @@ _outIndexPath(StringInfo str, IndexPath *node) WRITE_NODE_FIELD(indexqual); WRITE_NODE_FIELD(indexjoinclauses); WRITE_ENUM_FIELD(indexscandir, ScanDirection); - WRITE_FLOAT_FIELD(rows, "%.2f"); + WRITE_FLOAT_FIELD(rows, "%.0f"); } static void @@ -1073,8 +1073,9 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node) /* NB: this isn't a complete set of fields */ WRITE_NODE_FIELD(clause); - WRITE_BOOL_FIELD(ispusheddown); - WRITE_BOOL_FIELD(canjoin); + WRITE_BOOL_FIELD(is_pushed_down); + WRITE_BOOL_FIELD(valid_everywhere); + WRITE_BOOL_FIELD(can_join); WRITE_BITMAPSET_FIELD(clause_relids); WRITE_BITMAPSET_FIELD(left_relids); WRITE_BITMAPSET_FIELD(right_relids); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 1f979dd4094..2d724265f06 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.110 2003/12/17 17:07:48 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.111 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -151,6 +151,17 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte) /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); + /* Test any partial indexes of rel for applicability */ + check_partial_indexes(root, rel); + + /* + * Check to see if we can extract any restriction conditions from + * join quals that are OR-of-AND structures. If so, add them to the + * rel's restriction list, and recompute the size estimates. + */ + if (create_or_index_quals(root, rel)) + set_baserel_size_estimates(root, rel); + /* * Generate paths and add them to the rel's pathlist. * @@ -167,8 +178,6 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte) /* Consider index paths for both simple and OR index clauses */ create_index_paths(root, rel); - - /* create_index_paths must be done before create_or_index_paths */ create_or_index_paths(root, rel); /* Now find the cheapest of the paths for this rel */ diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 94202cda4b0..fa7dcdc9740 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -49,7 +49,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.119 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.120 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -107,12 +107,34 @@ static Selectivity estimate_hash_bucketsize(Query *root, Var *var, static bool cost_qual_eval_walker(Node *node, QualCost *total); static Selectivity approx_selectivity(Query *root, List *quals, JoinType jointype); +static Selectivity join_in_selectivity(JoinPath *path, Query *root); static void set_rel_width(Query *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); /* + * clamp_row_est + * Force a row-count estimate to a sane value. + */ +double +clamp_row_est(double nrows) +{ + /* + * Force estimate to be at least one row, to make explain output look + * better and to avoid possible divide-by-zero when interpolating + * costs. Make it an integer, too. + */ + if (nrows < 1.0) + nrows = 1.0; + else + nrows = ceil(nrows); + + return nrows; +} + + +/* * cost_seqscan * Determines and returns the cost of scanning a relation sequentially. */ @@ -300,10 +322,7 @@ cost_index(Path *path, Query *root, *---------- */ - tuples_fetched = indexSelectivity * baserel->tuples; - /* Don't believe estimates less than 1... */ - if (tuples_fetched < 1.0) - tuples_fetched = 1.0; + tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); /* This part is the Mackert and Lohman formula */ @@ -718,7 +737,6 @@ cost_nestloop(NestPath *path, Query *root) { Path *outer_path = path->outerjoinpath; Path *inner_path = path->innerjoinpath; - List *restrictlist = path->joinrestrictinfo; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; @@ -728,6 +746,15 @@ cost_nestloop(NestPath *path, Query *root) double ntuples; Selectivity joininfactor; + /* + * If inner path is an indexscan, be sure to use its estimated output row + * count, which may be lower than the restriction-clause-only row count of + * its parent. (We don't include this case in the PATH_ROWS macro because + * it applies *only* to a nestloop's inner relation.) + */ + if (IsA(inner_path, IndexPath)) + inner_path_rows = ((IndexPath *) inner_path)->rows; + if (!enable_nestloop) startup_cost += disable_cost; @@ -735,26 +762,12 @@ cost_nestloop(NestPath *path, Query *root) * If we're doing JOIN_IN then we will stop scanning inner tuples for * an outer tuple as soon as we have one match. Account for the * effects of this by scaling down the cost estimates in proportion to - * the expected output size. (This assumes that all the quals - * attached to the join are IN quals, which should be true.) - * - * Note: it's probably bogus to use the normal selectivity calculation - * here when either the outer or inner path is a UniquePath. + * the JOIN_IN selectivity. (This assumes that all the quals + * attached to the join are IN quals, which should be true.) This would + * probably be the wrong approach if an input path is a UniquePath, but + * we'd never have that with JOIN_IN join type. */ - if (path->jointype == JOIN_IN) - { - Selectivity qual_selec = approx_selectivity(root, restrictlist, - path->jointype); - double qptuples; - - qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows); - if (qptuples > path->path.parent->rows) - joininfactor = path->path.parent->rows / qptuples; - else - joininfactor = 1.0; - } - else - joininfactor = 1.0; + joininfactor = join_in_selectivity(path, root); /* cost of source data */ @@ -785,23 +798,12 @@ cost_nestloop(NestPath *path, Query *root) (inner_path->total_cost - inner_path->startup_cost) * joininfactor; /* - * Compute number of tuples processed (not number emitted!). If inner - * path is an indexscan, be sure to use its estimated output row - * count, which may be lower than the restriction-clause-only row - * count of its parent. (We don't include this case in the PATH_ROWS - * macro because it applies *only* to a nestloop's inner relation.) - * Note: it is correct to use the unadjusted inner_path_rows in the - * above calculation for joininfactor, since otherwise we'd be - * double-counting the selectivity of the join clause being used for - * the index. + * Compute number of tuples processed (not number emitted!) */ - if (IsA(inner_path, IndexPath)) - inner_path_rows = ((IndexPath *) inner_path)->rows; - - ntuples = inner_path_rows * outer_path_rows; + ntuples = outer_path_rows * inner_path_rows * joininfactor; /* CPU costs */ - cost_qual_eval(&restrict_qual_cost, restrictlist); + cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo); startup_cost += restrict_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple; run_cost += cpu_per_tuple * ntuples; @@ -827,7 +829,6 @@ cost_mergejoin(MergePath *path, Query *root) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; - List *restrictlist = path->jpath.joinrestrictinfo; List *mergeclauses = path->path_mergeclauses; List *outersortkeys = path->outersortkeys; List *innersortkeys = path->innersortkeys; @@ -835,18 +836,15 @@ cost_mergejoin(MergePath *path, Query *root) Cost run_cost = 0; Cost cpu_per_tuple; Selectivity merge_selec; - Selectivity qp_selec; QualCost merge_qual_cost; QualCost qp_qual_cost; RestrictInfo *firstclause; - List *qpquals; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, inner_rows; double mergejointuples, rescannedtuples; - double qptuples; double rescanratio; Selectivity outerscansel, innerscansel; @@ -868,16 +866,12 @@ cost_mergejoin(MergePath *path, Query *root) merge_selec = approx_selectivity(root, mergeclauses, path->jpath.jointype); cost_qual_eval(&merge_qual_cost, mergeclauses); - qpquals = set_ptrDifference(restrictlist, mergeclauses); - qp_selec = approx_selectivity(root, qpquals, - path->jpath.jointype); - cost_qual_eval(&qp_qual_cost, qpquals); - freeList(qpquals); + cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo); + qp_qual_cost.startup -= merge_qual_cost.startup; + qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple; /* approx # tuples passing the merge quals */ - mergejointuples = ceil(merge_selec * outer_path_rows * inner_path_rows); - /* approx # tuples passing qpquals as well */ - qptuples = ceil(mergejointuples * qp_selec); + mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows); /* * When there are equal merge keys in the outer relation, the @@ -948,13 +942,8 @@ cost_mergejoin(MergePath *path, Query *root) } /* convert selectivity to row count; must scan at least one row */ - - outer_rows = ceil(outer_path_rows * outerscansel); - if (outer_rows < 1) - outer_rows = 1; - inner_rows = ceil(inner_path_rows * innerscansel); - if (inner_rows < 1) - inner_rows = 1; + outer_rows = clamp_row_est(outer_path_rows * outerscansel); + inner_rows = clamp_row_est(inner_path_rows * innerscansel); /* * Readjust scan selectivities to account for above rounding. This is @@ -1012,13 +1001,11 @@ cost_mergejoin(MergePath *path, Query *root) * for an outer tuple as soon as we have one match. Account for the * effects of this by scaling down the cost estimates in proportion to * the expected output size. (This assumes that all the quals - * attached to the join are IN quals, which should be true.) + * attached to the join are IN quals, which should be true.) This would + * probably be the wrong approach if an input path is a UniquePath, but + * we'd never have that with JOIN_IN join type. */ - if (path->jpath.jointype == JOIN_IN && - qptuples > path->jpath.path.parent->rows) - joininfactor = path->jpath.path.parent->rows / qptuples; - else - joininfactor = 1.0; + joininfactor = join_in_selectivity(&path->jpath, root); /* * The number of tuple comparisons needed is approximately number of @@ -1060,17 +1047,14 @@ cost_hashjoin(HashPath *path, Query *root) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; - List *restrictlist = path->jpath.joinrestrictinfo; List *hashclauses = path->path_hashclauses; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; Selectivity hash_selec; - Selectivity qp_selec; QualCost hash_qual_cost; QualCost qp_qual_cost; double hashjointuples; - double qptuples; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outerbytes = relation_byte_size(outer_path_rows, @@ -1084,7 +1068,6 @@ cost_hashjoin(HashPath *path, Query *root) Selectivity innerbucketsize; Selectivity joininfactor; List *hcl; - List *qpquals; if (!enable_hashjoin) startup_cost += disable_cost; @@ -1101,16 +1084,12 @@ cost_hashjoin(HashPath *path, Query *root) hash_selec = approx_selectivity(root, hashclauses, path->jpath.jointype); cost_qual_eval(&hash_qual_cost, hashclauses); - qpquals = set_ptrDifference(restrictlist, hashclauses); - qp_selec = approx_selectivity(root, qpquals, - path->jpath.jointype); - cost_qual_eval(&qp_qual_cost, qpquals); - freeList(qpquals); + cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo); + qp_qual_cost.startup -= hash_qual_cost.startup; + qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple; /* approx # tuples passing the hash quals */ - hashjointuples = ceil(hash_selec * outer_path_rows * inner_path_rows); - /* approx # tuples passing qpquals as well */ - qptuples = ceil(hashjointuples * qp_selec); + hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows); /* cost of source data */ startup_cost += outer_path->startup_cost; @@ -1229,13 +1208,11 @@ cost_hashjoin(HashPath *path, Query *root) * an outer tuple as soon as we have one match. Account for the * effects of this by scaling down the cost estimates in proportion to * the expected output size. (This assumes that all the quals - * attached to the join are IN quals, which should be true.) + * attached to the join are IN quals, which should be true.) This would + * probably be the wrong approach if an input path is a UniquePath, but + * we'd never have that with JOIN_IN join type. */ - if (path->jpath.jointype == JOIN_IN && - qptuples > path->jpath.path.parent->rows) - joininfactor = path->jpath.path.parent->rows / qptuples; - else - joininfactor = 1.0; + joininfactor = join_in_selectivity(&path->jpath, root); /* * The number of tuple comparisons needed is the number of outer @@ -1245,7 +1222,7 @@ cost_hashjoin(HashPath *path, Query *root) */ startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * - outer_path_rows * ceil(inner_path_rows * innerbucketsize) * + outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * joininfactor; /* @@ -1669,28 +1646,18 @@ approx_selectivity(Query *root, List *quals, JoinType jointype) void set_baserel_size_estimates(Query *root, RelOptInfo *rel) { - double temp; + double nrows; /* Should only be applied to base relations */ Assert(rel->relid > 0); - temp = rel->tuples * + nrows = rel->tuples * clauselist_selectivity(root, rel->baserestrictinfo, 0, JOIN_INNER); - /* - * Force estimate to be at least one row, to make explain output look - * better and to avoid possible divide-by-zero when interpolating - * cost. Make it an integer, too. - */ - if (temp < 1.0) - temp = 1.0; - else - temp = ceil(temp); - - rel->rows = temp; + rel->rows = clamp_row_est(nrows); cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo); @@ -1719,7 +1686,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel) * rel1, JOIN_RIGHT). Also, JOIN_IN should produce the same result as * JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER. * - * We set the same relnode fields as set_baserel_size_estimates() does. + * We set only the rows field here. The width field was already set by + * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void set_joinrel_size_estimates(Query *root, RelOptInfo *rel, @@ -1729,7 +1697,7 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, List *restrictlist) { Selectivity selec; - double temp; + double nrows; UniquePath *upath; /* @@ -1757,63 +1725,87 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, switch (jointype) { case JOIN_INNER: - temp = outer_rel->rows * inner_rel->rows * selec; + nrows = outer_rel->rows * inner_rel->rows * selec; break; case JOIN_LEFT: - temp = outer_rel->rows * inner_rel->rows * selec; - if (temp < outer_rel->rows) - temp = outer_rel->rows; + nrows = outer_rel->rows * inner_rel->rows * selec; + if (nrows < outer_rel->rows) + nrows = outer_rel->rows; break; case JOIN_RIGHT: - temp = outer_rel->rows * inner_rel->rows * selec; - if (temp < inner_rel->rows) - temp = inner_rel->rows; + nrows = outer_rel->rows * inner_rel->rows * selec; + if (nrows < inner_rel->rows) + nrows = inner_rel->rows; break; case JOIN_FULL: - temp = outer_rel->rows * inner_rel->rows * selec; - if (temp < outer_rel->rows) - temp = outer_rel->rows; - if (temp < inner_rel->rows) - temp = inner_rel->rows; + nrows = outer_rel->rows * inner_rel->rows * selec; + if (nrows < outer_rel->rows) + nrows = outer_rel->rows; + if (nrows < inner_rel->rows) + nrows = inner_rel->rows; break; case JOIN_IN: case JOIN_UNIQUE_INNER: upath = create_unique_path(root, inner_rel, inner_rel->cheapest_total_path); - temp = outer_rel->rows * upath->rows * selec; - if (temp > outer_rel->rows) - temp = outer_rel->rows; + nrows = outer_rel->rows * upath->rows * selec; + if (nrows > outer_rel->rows) + nrows = outer_rel->rows; break; case JOIN_REVERSE_IN: case JOIN_UNIQUE_OUTER: upath = create_unique_path(root, outer_rel, outer_rel->cheapest_total_path); - temp = upath->rows * inner_rel->rows * selec; - if (temp > inner_rel->rows) - temp = inner_rel->rows; + nrows = upath->rows * inner_rel->rows * selec; + if (nrows > inner_rel->rows) + nrows = inner_rel->rows; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); - temp = 0; /* keep compiler quiet */ + nrows = 0; /* keep compiler quiet */ break; } - /* - * Force estimate to be at least one row, to make explain output look - * better and to avoid possible divide-by-zero when interpolating - * cost. Make it an integer, too. - */ - if (temp < 1.0) - temp = 1.0; - else - temp = ceil(temp); + rel->rows = clamp_row_est(nrows); +} + +/* + * join_in_selectivity + * Determines the factor by which a JOIN_IN join's result is expected + * to be smaller than an ordinary inner join. + * + * 'path' is already filled in except for the cost fields + */ +static Selectivity +join_in_selectivity(JoinPath *path, Query *root) +{ + Selectivity selec; + double nrows; - rel->rows = temp; + if (path->jointype != JOIN_IN) + return 1.0; /* - * We need not compute the output width here, because - * build_joinrel_tlist already did. + * Compute same result set_joinrel_size_estimates would compute + * for JOIN_INNER. Note that we use the input rels' absolute size + * estimates, not PATH_ROWS() which might be less; if we used PATH_ROWS() + * we'd be double-counting the effects of any join clauses used in + * input scans. */ + selec = clauselist_selectivity(root, + path->joinrestrictinfo, + 0, + JOIN_INNER); + nrows = path->outerjoinpath->parent->rows * + path->innerjoinpath->parent->rows * selec; + + nrows = clamp_row_est(nrows); + + /* See if it's larger than the actual JOIN_IN size estimate */ + if (nrows > path->path.parent->rows) + return path->path.parent->rows / nrows; + else + return 1.0; } /* @@ -1823,17 +1815,11 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, * The rel's targetlist and restrictinfo list must have been constructed * already. * - * We set the following fields of the rel node: - * rows: the estimated number of output tuples (after applying - * restriction clauses). - * width: the estimated average output tuple width in bytes. - * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. + * We set the same fields as set_baserel_size_estimates. */ void set_function_size_estimates(Query *root, RelOptInfo *rel) { - double temp; - /* Should only be applied to base relations that are functions */ Assert(rel->relid > 0); Assert(rel->rtekind == RTE_FUNCTION); @@ -1846,28 +1832,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel) */ rel->tuples = 1000; - /* Now estimate number of output rows */ - temp = rel->tuples * - clauselist_selectivity(root, - rel->baserestrictinfo, - 0, - JOIN_INNER); - - /* - * Force estimate to be at least one row, to make explain output look - * better and to avoid possible divide-by-zero when interpolating - * cost. Make it an integer, too. - */ - if (temp < 1.0) - temp = 1.0; - else - temp = ceil(temp); - - rel->rows = temp; - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo); - - set_rel_width(root, rel); + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); } diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 0fa02d4f74f..83d129d0c9c 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.153 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.154 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,8 +63,7 @@ static bool match_join_clause_to_indexcol(RelOptInfo *rel, IndexOptInfo *index, RestrictInfo *rinfo); static Oid indexable_operator(Expr *clause, Oid opclass, bool indexkey_on_left); -static bool pred_test(List *predicate_list, List *restrictinfo_list, - List *joininfo_list); +static bool pred_test(List *predicate_list, List *restrictinfo_list); static bool pred_test_restrict_list(Expr *predicate, List *restrictinfo_list); static bool pred_test_recurse_clause(Expr *predicate, Node *clause); static bool pred_test_recurse_pred(Expr *predicate, Node *clause); @@ -114,12 +113,12 @@ static Const *string_to_const(const char *str, Oid datatype); * and avoid repeated computation. * * 'rel' is the relation for which we want to generate index paths + * + * Note: check_partial_indexes() must have been run previously. */ void create_index_paths(Query *root, RelOptInfo *rel) { - List *restrictinfo_list = rel->baserestrictinfo; - List *joininfo_list = rel->joininfo; Relids all_join_outerrelids = NULL; List *ilist; @@ -132,16 +131,9 @@ create_index_paths(Query *root, RelOptInfo *rel) bool index_is_ordered; Relids join_outerrelids; - /* - * If this is a partial index, we can only use it if it passes the - * predicate test. - */ - if (index->indpred != NIL) - { - if (!pred_test(index->indpred, restrictinfo_list, joininfo_list)) - continue; - index->predOK = true; /* set flag for orindxpaths.c */ - } + /* Ignore partial indexes that do not match the query */ + if (index->indpred != NIL && !index->predOK) + continue; /* * 1. Match the index against non-OR restriction clauses. @@ -336,7 +328,7 @@ group_clauses_by_indexkey_for_join(Query *root, RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); /* Can't use pushed-down clauses in outer join */ - if (isouterjoin && rinfo->ispusheddown) + if (isouterjoin && rinfo->is_pushed_down) continue; if (match_clause_to_indexcol(rel, @@ -365,7 +357,7 @@ group_clauses_by_indexkey_for_join(Query *root, RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); /* Can't use pushed-down clauses in outer join */ - if (isouterjoin && rinfo->ispusheddown) + if (isouterjoin && rinfo->is_pushed_down) continue; if (match_join_clause_to_indexcol(rel, @@ -737,6 +729,32 @@ indexable_operator(Expr *clause, Oid opclass, bool indexkey_on_left) ****************************************************************************/ /* + * check_partial_indexes + * Check each partial index of the relation, and mark it predOK or not + * depending on whether the predicate is satisfied for this query. + */ +void +check_partial_indexes(Query *root, RelOptInfo *rel) +{ + List *restrictinfo_list = rel->baserestrictinfo; + List *ilist; + + foreach(ilist, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); + + /* + * If this is a partial index, we can only use it if it passes the + * predicate test. + */ + if (index->indpred == NIL) + continue; /* ignore non-partial indexes */ + + index->predOK = pred_test(index->indpred, restrictinfo_list); + } +} + +/* * pred_test * Does the "predicate inclusion test" for partial indexes. * @@ -751,7 +769,7 @@ indexable_operator(Expr *clause, Oid opclass, bool indexkey_on_left) * to CNF format). --Nels, Jan '93 */ static bool -pred_test(List *predicate_list, List *restrictinfo_list, List *joininfo_list) +pred_test(List *predicate_list, List *restrictinfo_list) { List *pred; @@ -1464,8 +1482,7 @@ make_innerjoin_index_path(Query *root, rel->relid, /* do not use 0! */ JOIN_INNER); /* Like costsize.c, force estimate to be at least one row */ - if (pathnode->rows < 1.0) - pathnode->rows = 1.0; + pathnode->rows = clamp_row_est(pathnode->rows); cost_index(&pathnode->path, root, rel, index, indexquals, true); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 91cd0ab22a5..8a666e80d89 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.84 2003/12/30 23:53:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.85 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -690,7 +690,7 @@ hash_inner_and_outer(Query *root, { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); - if (!restrictinfo->canjoin || + if (!restrictinfo->can_join || restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ @@ -698,7 +698,7 @@ hash_inner_and_outer(Query *root, * If processing an outer join, only use its own join clauses for * hashing. For inner joins we need not be so picky. */ - if (isouterjoin && restrictinfo->ispusheddown) + if (isouterjoin && restrictinfo->is_pushed_down) continue; /* @@ -804,17 +804,17 @@ select_mergejoin_clauses(RelOptInfo *joinrel, */ if (isouterjoin) { - if (restrictinfo->ispusheddown) + if (restrictinfo->is_pushed_down) continue; switch (jointype) { case JOIN_RIGHT: - if (!restrictinfo->canjoin || + if (!restrictinfo->can_join || restrictinfo->mergejoinoperator == InvalidOid) return NIL; /* not mergejoinable */ break; case JOIN_FULL: - if (!restrictinfo->canjoin || + if (!restrictinfo->can_join || restrictinfo->mergejoinoperator == InvalidOid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -826,7 +826,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel, } } - if (!restrictinfo->canjoin || + if (!restrictinfo->can_join || restrictinfo->mergejoinoperator == InvalidOid) continue; /* not mergejoinable */ diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index bea2a8e4161..02e486ec35c 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,20 +8,21 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.55 2004/01/04 00:07:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.56 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" -static IndexPath *best_or_subclause_indices(Query *root, RelOptInfo *rel, +static IndexPath *best_or_subclause_indexes(Query *root, RelOptInfo *rel, List *subclauses); static bool best_or_subclause_index(Query *root, RelOptInfo *rel, @@ -32,16 +33,166 @@ static bool best_or_subclause_index(Query *root, Cost *retTotalCost); +/*---------- + * create_or_index_quals + * Examine join OR-of-AND quals to see if any useful restriction OR + * clauses can be extracted. If so, add them to the query. + * + * Although a join clause must reference other relations overall, + * an OR of ANDs clause might contain sub-clauses that reference just this + * relation and can be used to build a restriction clause. + * For example consider + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); + * We can transform this into + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) + * AND (a.x = 42 OR a.x = 44) + * AND (b.y = 43 OR b.z = 45); + * which opens the potential to build OR indexscans on a and b. In essence + * this is a partial transformation to CNF (AND of ORs format). It is not + * complete, however, because we do not unravel the original OR --- doing so + * would usually bloat the qualification expression to little gain. + * + * The added quals are partially redundant with the original OR, and therefore + * will cause the size of the joinrel to be underestimated when it is finally + * formed. (This would be true of a full transformation to CNF as well; the + * fault is not really in the transformation, but in clauselist_selectivity's + * inability to recognize redundant conditions.) To minimize the collateral + * damage, we want to minimize the number of quals added. Therefore we do + * not add every possible extracted restriction condition to the query. + * Instead, we search for the single restriction condition that generates + * the most useful (cheapest) OR indexscan, and add only that condition. + * This is a pretty ad-hoc heuristic, but quite useful. + * + * We can then compensate for the redundancy of the added qual by poking + * the recorded selectivity of the original OR clause, thereby ensuring + * the added qual doesn't change the estimated size of the joinrel when + * it is finally formed. This is a MAJOR HACK: it depends on the fact + * that clause selectivities are cached and on the fact that the same + * RestrictInfo node will appear in every joininfo list that might be used + * when the joinrel is formed. And it probably isn't right in cases where + * the size estimation is nonlinear (i.e., outer and IN joins). But it + * beats not doing anything. + * + * NOTE: one might think this messiness could be worked around by generating + * the indexscan path with a small path->rows value, and not touching the + * rel's baserestrictinfo or rel->rows. However, that does not work. + * The optimizer's fundamental design assumes that every general-purpose + * Path for a given relation generates the same number of rows. Without + * this assumption we'd not be able to optimize solely on the cost of Paths, + * but would have to take number of output rows into account as well. + * (Perhaps someday that'd be worth doing, but it's a pretty big change...) + * + * 'rel' is the relation entry for which quals are to be created + * + * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE. + * If no quals available, returns FALSE and doesn't change rel. + * + * Note: check_partial_indexes() must have been run previously. + *---------- + */ +bool +create_or_index_quals(Query *root, RelOptInfo *rel) +{ + IndexPath *bestpath = NULL; + RestrictInfo *bestrinfo = NULL; + FastList orclauses; + List *orclause; + Expr *indxqual_or_expr; + RestrictInfo *or_rinfo; + Selectivity or_selec, + orig_selec; + List *i; + + /* + * We use the best_or_subclause_indexes() machinery to locate the + * best combination of restriction subclauses. Note we must ignore + * any joinclauses that are not marked valid_everywhere, because they + * cannot be pushed down due to outer-join rules. + */ + foreach(i, rel->joininfo) + { + JoinInfo *joininfo = (JoinInfo *) lfirst(i); + List *j; + + foreach(j, joininfo->jinfo_restrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); + + if (restriction_is_or_clause(rinfo) && + rinfo->valid_everywhere) + { + IndexPath *pathnode; + + pathnode = best_or_subclause_indexes(root, + rel, + ((BoolExpr *) rinfo->orclause)->args); + + if (pathnode) + { + if (bestpath == NULL || + pathnode->path.total_cost < bestpath->path.total_cost) + { + bestpath = pathnode; + bestrinfo = rinfo; + } + } + } + } + } + + /* Fail if no suitable clauses found */ + if (bestpath == NULL) + return false; + + /* + * Build an expression representation of the indexqual, expanding + * the implicit OR and AND semantics of the first- and + * second-level lists. + */ + FastListInit(&orclauses); + foreach(orclause, bestpath->indexqual) + FastAppend(&orclauses, make_ands_explicit(lfirst(orclause))); + indxqual_or_expr = make_orclause(FastListValue(&orclauses)); + + /* + * And add it to the rel's restriction list. + */ + or_rinfo = make_restrictinfo(indxqual_or_expr, true, true); + rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); + + /* + * Adjust the original OR clause's cached selectivity to compensate + * for the selectivity of the added (but redundant) lower-level qual. + * This should result in the join rel getting approximately the same + * rows estimate as it would have gotten without all these shenanigans. + * (XXX major hack alert ... this depends on the assumption that the + * selectivity will stay cached ...) + */ + or_selec = clause_selectivity(root, (Node *) or_rinfo, + 0, JOIN_INNER); + if (or_selec > 0 && or_selec < 1) + { + orig_selec = clause_selectivity(root, (Node *) bestrinfo, + 0, JOIN_INNER); + bestrinfo->this_selec = orig_selec / or_selec; + /* clamp result to sane range */ + if (bestrinfo->this_selec > 1) + bestrinfo->this_selec = 1; + } + + /* Tell caller to recompute rel's rows estimate */ + return true; +} + /* * create_or_index_paths - * Creates multi-scan index paths for indices that match OR clauses. + * Creates multi-scan index paths for indexes that match OR clauses. * * 'rel' is the relation entry for which the paths are to be created * * Returns nothing, but adds paths to rel->pathlist via add_path(). * - * Note: create_index_paths() must have been run already, since it does - * the heavy lifting to determine whether partial indexes may be used. + * Note: check_partial_indexes() must have been run previously. */ void create_or_index_paths(Query *root, RelOptInfo *rel) @@ -60,7 +211,7 @@ create_or_index_paths(Query *root, RelOptInfo *rel) { IndexPath *pathnode; - pathnode = best_or_subclause_indices(root, + pathnode = best_or_subclause_indexes(root, rel, ((BoolExpr *) rinfo->orclause)->args); @@ -68,49 +219,10 @@ create_or_index_paths(Query *root, RelOptInfo *rel) add_path(rel, (Path *) pathnode); } } - - /* - * Also consider join clauses that are ORs. Although a join clause - * must reference other relations overall, an OR of ANDs clause might - * contain sub-clauses that reference just our relation and can be - * used to build a non-join indexscan. For example consider - * WHERE (a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45); - * We could build an OR indexscan on a.x using those subclauses. - * - * XXX don't enable this code quite yet. Although the plans it creates - * are correct, and possibly even useful, we are totally confused about - * the number of rows returned, leading to poor choices of join plans - * above the indexscan. Need to restructure the way join sizes are - * calculated before this will really work. - */ -#ifdef NOT_YET - foreach(i, rel->joininfo) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); - List *j; - - foreach(j, joininfo->jinfo_restrictinfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); - - if (restriction_is_or_clause(rinfo)) - { - IndexPath *pathnode; - - pathnode = best_or_subclause_indices(root, - rel, - ((BoolExpr *) rinfo->orclause)->args); - - if (pathnode) - add_path(rel, (Path *) pathnode); - } - } - } -#endif } /* - * best_or_subclause_indices + * best_or_subclause_indexes * Determine the best index to be used in conjunction with each subclause * of an OR clause, and build a Path for a multi-index scan. * @@ -134,7 +246,7 @@ create_or_index_paths(Query *root, RelOptInfo *rel) * single tuple more than once). */ static IndexPath * -best_or_subclause_indices(Query *root, +best_or_subclause_indexes(Query *root, RelOptInfo *rel, List *subclauses) { @@ -202,7 +314,10 @@ best_or_subclause_indices(Query *root, /* We don't actually care what order the index scans in. */ pathnode->indexscandir = NoMovementScanDirection; - /* XXX this may be wrong when using join OR clauses... */ + /* + * The number of rows is the same as the parent rel's estimate, since + * this isn't a join inner indexscan. + */ pathnode->rows = rel->rows; return pathnode; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 906e1f7f257..d20b967b05c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.161 2003/11/29 19:51:50 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -643,6 +643,7 @@ create_unique_plan(Query *root, UniquePath *best_path) plan = (Plan *) make_unique(my_tlist, plan, sortList); } + /* Adjust output size estimate (other fields should be OK already) */ plan->plan_rows = best_path->rows; return plan; diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index bcfdb25403c..c4412030cc0 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.96 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.97 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,7 +37,7 @@ static void mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels); static void distribute_qual_to_rels(Query *root, Node *clause, - bool ispusheddown, + bool is_pushed_down, bool isdeduced, Relids outerjoin_nonnullable, Relids qualscope); @@ -356,7 +356,7 @@ mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels) * equijoined vars. * * 'clause': the qual clause to be distributed - * 'ispusheddown': if TRUE, force the clause to be marked 'ispusheddown' + * 'is_pushed_down': if TRUE, force the clause to be marked 'is_pushed_down' * (this indicates the clause came from a FromExpr, not a JoinExpr) * 'isdeduced': TRUE if the qual came from implied-equality deduction * 'outerjoin_nonnullable': NULL if not an outer-join qual, else the set of @@ -365,16 +365,17 @@ mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels) * * 'qualscope' identifies what level of JOIN the qual came from. For a top * level qual (WHERE qual), qualscope lists all baserel ids and in addition - * 'ispusheddown' will be TRUE. + * 'is_pushed_down' will be TRUE. */ static void distribute_qual_to_rels(Query *root, Node *clause, - bool ispusheddown, + bool is_pushed_down, bool isdeduced, Relids outerjoin_nonnullable, Relids qualscope) { Relids relids; + bool valid_everywhere; bool can_be_equijoin; RestrictInfo *restrictinfo; RelOptInfo *rel; @@ -415,6 +416,7 @@ distribute_qual_to_rels(Query *root, Node *clause, * the vars were equal). */ Assert(bms_equal(relids, qualscope)); + valid_everywhere = true; can_be_equijoin = true; } else if (bms_overlap(relids, outerjoin_nonnullable)) @@ -433,6 +435,7 @@ distribute_qual_to_rels(Query *root, Node *clause, * result, so we treat it the same as an ordinary inner-join qual. */ relids = qualscope; + valid_everywhere = false; can_be_equijoin = false; } else @@ -447,18 +450,26 @@ distribute_qual_to_rels(Query *root, Node *clause, * time we are called, the outerjoinset of each baserel will show * exactly those outer joins that are below the qual in the join * tree. + * + * We also need to determine whether the qual is "valid everywhere", + * which is true if the qual mentions no variables that are involved + * in lower-level outer joins (this may be an overly strong test). */ Relids addrelids = NULL; Relids tmprelids; int relno; + valid_everywhere = true; tmprelids = bms_copy(relids); while ((relno = bms_first_member(tmprelids)) >= 0) { RelOptInfo *rel = find_base_rel(root, relno); if (rel->outerjoinset != NULL) + { addrelids = bms_add_members(addrelids, rel->outerjoinset); + valid_everywhere = false; + } } bms_free(tmprelids); @@ -489,13 +500,15 @@ distribute_qual_to_rels(Query *root, Node *clause, * same joinrel. A qual originating from WHERE is always considered * "pushed down". */ - if (!ispusheddown) - ispusheddown = !bms_equal(relids, qualscope); + if (!is_pushed_down) + is_pushed_down = !bms_equal(relids, qualscope); /* * Build the RestrictInfo node itself. */ - restrictinfo = make_restrictinfo((Expr *) clause, ispusheddown); + restrictinfo = make_restrictinfo((Expr *) clause, + is_pushed_down, + valid_everywhere); /* * Figure out where to attach it. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 55581774f2f..94109ee01e3 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.96 2003/11/29 19:51:51 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -373,13 +373,6 @@ create_index_path(Query *root, */ pathnode->rows = rel->rows; - /* - * Not sure if this is necessary, but it should help if the statistics - * are too far off - */ - if (index->indpred && index->tuples < pathnode->rows) - pathnode->rows = index->tuples; - cost_index(&pathnode->path, root, rel, index, indexquals, false); return pathnode; @@ -398,6 +391,7 @@ create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval) pathnode->path.pathtype = T_TidScan; pathnode->path.parent = rel; pathnode->path.pathkeys = NIL; + pathnode->tideval = tideval; cost_tidscan(&pathnode->path, root, rel, tideval); diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index c72e2306aca..0ab4a75c5c1 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.23 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.24 2004/01/05 05:07:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,7 +20,8 @@ #include "optimizer/var.h" -static Expr *make_sub_restrictinfos(Expr *clause, bool ispusheddown); +static Expr *make_sub_restrictinfos(Expr *clause, bool is_pushed_down, + bool valid_everywhere); static bool join_clause_is_redundant(Query *root, RestrictInfo *rinfo, List *reference_list, @@ -32,18 +33,22 @@ static bool join_clause_is_redundant(Query *root, * * Build a RestrictInfo node containing the given subexpression. * - * The ispusheddown flag must be supplied by the caller. We initialize - * fields that depend only on the given subexpression, leaving others that - * depend on context (or may never be needed at all) to be filled later. + * The is_pushed_down and valid_everywhere flags must be supplied by the + * caller. + * + * We initialize fields that depend only on the given subexpression, leaving + * others that depend on context (or may never be needed at all) to be filled + * later. */ RestrictInfo * -make_restrictinfo(Expr *clause, bool ispusheddown) +make_restrictinfo(Expr *clause, bool is_pushed_down, bool valid_everywhere) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); restrictinfo->clause = clause; - restrictinfo->ispusheddown = ispusheddown; - restrictinfo->canjoin = false; /* may get set below */ + restrictinfo->is_pushed_down = is_pushed_down; + restrictinfo->valid_everywhere = valid_everywhere; + restrictinfo->can_join = false; /* may get set below */ /* * If it's a binary opclause, set up left/right relids info. @@ -67,7 +72,7 @@ make_restrictinfo(Expr *clause, bool ispusheddown) !bms_is_empty(restrictinfo->right_relids) && !bms_overlap(restrictinfo->left_relids, restrictinfo->right_relids)) - restrictinfo->canjoin = true; + restrictinfo->can_join = true; } else { @@ -84,7 +89,9 @@ make_restrictinfo(Expr *clause, bool ispusheddown) */ if (or_clause((Node *) clause)) { - restrictinfo->orclause = make_sub_restrictinfos(clause, ispusheddown); + restrictinfo->orclause = make_sub_restrictinfos(clause, + is_pushed_down, + valid_everywhere); } else { @@ -126,7 +133,8 @@ make_restrictinfo(Expr *clause, bool ispusheddown) * Recursively insert sub-RestrictInfo nodes into a boolean expression. */ static Expr * -make_sub_restrictinfos(Expr *clause, bool ispusheddown) +make_sub_restrictinfos(Expr *clause, bool is_pushed_down, + bool valid_everywhere) { if (or_clause((Node *) clause)) { @@ -136,7 +144,8 @@ make_sub_restrictinfos(Expr *clause, bool ispusheddown) foreach(temp, ((BoolExpr *) clause)->args) orlist = lappend(orlist, make_sub_restrictinfos(lfirst(temp), - ispusheddown)); + is_pushed_down, + valid_everywhere)); return make_orclause(orlist); } else if (and_clause((Node *) clause)) @@ -147,11 +156,14 @@ make_sub_restrictinfos(Expr *clause, bool ispusheddown) foreach(temp, ((BoolExpr *) clause)->args) andlist = lappend(andlist, make_sub_restrictinfos(lfirst(temp), - ispusheddown)); + is_pushed_down, + valid_everywhere)); return make_andclause(andlist); } else - return (Expr *) make_restrictinfo(clause, ispusheddown); + return (Expr *) make_restrictinfo(clause, + is_pushed_down, + valid_everywhere); } /* @@ -207,7 +219,7 @@ get_actual_join_clauses(List *restrictinfo_list, { RestrictInfo *clause = (RestrictInfo *) lfirst(temp); - if (clause->ispusheddown) + if (clause->is_pushed_down) *otherquals = lappend(*otherquals, clause->clause); else *joinquals = lappend(*joinquals, clause->clause); @@ -348,7 +360,7 @@ join_clause_is_redundant(Query *root, if (refrinfo->mergejoinoperator != InvalidOid && rinfo->left_pathkey == refrinfo->left_pathkey && rinfo->right_pathkey == refrinfo->right_pathkey && - (rinfo->ispusheddown == refrinfo->ispusheddown || + (rinfo->is_pushed_down == refrinfo->is_pushed_down || !IS_OUTER_JOIN(jointype))) { redundant = true; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 010adcc8592..14486591da9 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.90 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -321,6 +321,8 @@ typedef struct Path { NodeTag type; + NodeTag pathtype; /* tag identifying scan/join method */ + RelOptInfo *parent; /* the relation this path can build */ /* estimated execution costs for path (see costsize.c for more info) */ @@ -329,8 +331,6 @@ typedef struct Path Cost total_cost; /* total cost (assuming all tuples * fetched) */ - NodeTag pathtype; /* tag identifying scan/join method */ - List *pathkeys; /* sort ordering of path's output */ /* pathkeys is a List of Lists of PathKeyItem nodes; see above */ } Path; @@ -389,6 +389,9 @@ typedef struct IndexPath /* * TidPath represents a scan by TID + * + * tideval is an implicitly OR'ed list of quals of the form CTID = something. + * Note they are bare quals, not RestrictInfos. */ typedef struct TidPath { @@ -570,13 +573,17 @@ typedef struct HashPath * When we do form the outer join's joinrel, we still need to distinguish * those quals that are actually in that join's JOIN/ON condition from those * that appeared higher in the tree and were pushed down to the join rel - * because they used no other rels. That's what the ispusheddown flag is for; - * it tells us that a qual came from a point above the join of the specific - * set of base rels that it uses (or that the JoinInfo structures claim it - * uses). A clause that originally came from WHERE will *always* have its - * ispusheddown flag set; a clause that came from an INNER JOIN condition, - * but doesn't use all the rels being joined, will also have ispusheddown set - * because it will get attached to some lower joinrel. + * because they used no other rels. That's what the is_pushed_down flag is + * for; it tells us that a qual came from a point above the join of the + * specific set of base rels that it uses (or that the JoinInfo structures + * claim it uses). A clause that originally came from WHERE will *always* + * have its is_pushed_down flag set; a clause that came from an INNER JOIN + * condition, but doesn't use all the rels being joined, will also have + * is_pushed_down set because it will get attached to some lower joinrel. + * + * We also store a valid_everywhere flag, which says that the clause is not + * affected by any lower-level outer join, and therefore any conditions it + * asserts can be presumed true throughout the plan tree. * * In general, the referenced clause might be arbitrarily complex. The * kinds of clauses we can handle as indexscan quals, mergejoin clauses, @@ -602,7 +609,9 @@ typedef struct RestrictInfo Expr *clause; /* the represented clause of WHERE or JOIN */ - bool ispusheddown; /* TRUE if clause was pushed down in level */ + bool is_pushed_down; /* TRUE if clause was pushed down in level */ + + bool valid_everywhere; /* TRUE if valid on every level */ /* * This flag is set true if the clause looks potentially useful as a @@ -611,7 +620,7 @@ typedef struct RestrictInfo * (Whether the operator is actually merge or hash joinable isn't * checked, however.) */ - bool canjoin; + bool can_join; /* The set of relids (varnos) referenced in the clause: */ Relids clause_relids; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 15540a66643..002edfd91e8 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.59 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.60 2004/01/05 05:07:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -49,6 +49,7 @@ extern bool enable_nestloop; extern bool enable_mergejoin; extern bool enable_hashjoin; +extern double clamp_row_est(double nrows); extern void cost_seqscan(Path *path, Query *root, RelOptInfo *baserel); extern void cost_index(Path *path, Query *root, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 3e60d608020..e1a46bcb3f8 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.71 2004/01/04 00:07:32 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.72 2004/01/05 05:07:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -43,11 +43,13 @@ extern List *group_clauses_by_indexkey_for_or(RelOptInfo *rel, Expr *orsubclause); extern List *expand_indexqual_conditions(IndexOptInfo *index, List *clausegroups); +extern void check_partial_indexes(Query *root, RelOptInfo *rel); /* * orindxpath.c * additional routines for indexable OR clauses */ +extern bool create_or_index_quals(Query *root, RelOptInfo *rel); extern void create_or_index_paths(Query *root, RelOptInfo *rel); /* diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h index 362ed26a61b..022bfa253a3 100644 --- a/src/include/optimizer/restrictinfo.h +++ b/src/include/optimizer/restrictinfo.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.21 2004/01/04 00:07:32 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.22 2004/01/05 05:07:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,7 +16,8 @@ #include "nodes/relation.h" -extern RestrictInfo *make_restrictinfo(Expr *clause, bool ispusheddown); +extern RestrictInfo *make_restrictinfo(Expr *clause, bool is_pushed_down, + bool valid_everywhere); extern bool restriction_is_or_clause(RestrictInfo *restrictinfo); extern List *get_actual_clauses(List *restrictinfo_list); extern void get_actual_join_clauses(List *restrictinfo_list, |