diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/nodes/outfuncs.c | 15 | ||||
-rw-r--r-- | src/backend/optimizer/README | 3 | ||||
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 6 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 82 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 196 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 7 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 22 |
7 files changed, 324 insertions, 7 deletions
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 454a8d08867..0a7aa250dbb 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.363 2009/07/30 02:45:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.364 2009/09/17 20:49:28 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1422,6 +1422,16 @@ _outUniquePath(StringInfo str, UniquePath *node) } static void +_outNoOpPath(StringInfo str, NoOpPath *node) +{ + WRITE_NODE_TYPE("NOOPPATH"); + + _outPathInfo(str, (Path *) node); + + WRITE_NODE_FIELD(subpath); +} + +static void _outNestPath(StringInfo str, NestPath *node) { WRITE_NODE_TYPE("NESTPATH"); @@ -2634,6 +2644,9 @@ _outNode(StringInfo str, void *obj) case T_UniquePath: _outUniquePath(str, obj); break; + case T_NoOpPath: + _outNoOpPath(str, obj); + break; case T_NestPath: _outNestPath(str, obj); break; diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index f406e642db3..560e42981a5 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.50 2009/07/21 02:02:44 tgl Exp $ +$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.51 2009/09/17 20:49:28 tgl Exp $ Optimizer ========= @@ -354,6 +354,7 @@ RelOptInfo - a relation or joined relations NestPath - nested-loop joins MergePath - merge joins HashPath - hash joins + NoOpPath - same as its input path (used when a join is removed) EquivalenceClass - a data structure representing a set of values known equal diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index ada9140552e..5b7f0ff0e3f 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.185 2009/09/02 17:52:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.186 2009/09/17 20:49:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1387,6 +1387,10 @@ print_path(PlannerInfo *root, Path *path, int indent) ptype = "Unique"; subpath = ((UniquePath *) path)->subpath; break; + case T_NoOpPath: + ptype = "NoOp"; + subpath = ((NoOpPath *) path)->subpath; + break; case T_NestPath: ptype = "NestLoop"; join = true; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 3930acf05a7..7bb5d8993cc 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.241 2009/08/04 16:08:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.242 2009/09/17 20:49:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1918,6 +1918,86 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, return clause_list; } +/* + * relation_has_unique_index_for + * Determine whether the relation provably has at most one row satisfying + * a set of equality conditions, because the conditions constrain all + * columns of some unique index. + * + * The conditions are provided as a list of RestrictInfo nodes, where the + * caller has already determined that each condition is a mergejoinable + * equality with an expression in this relation on one side, and an + * expression not involving this relation on the other. The transient + * outer_is_left flag is used to identify which side we should look at: + * left side if outer_is_left is false, right side if it is true. + */ +bool +relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist) +{ + ListCell *ic; + + /* Short-circuit the easy case */ + if (restrictlist == NIL) + return false; + + /* Examine each index of the relation ... */ + foreach(ic, rel->indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic); + int c; + + /* + * If the index is not unique or if it's a partial index that doesn't + * match the query, it's useless here. + */ + if (!ind->unique || (ind->indpred != NIL && !ind->predOK)) + continue; + + /* + * Try to find each index column in the list of conditions. This is + * O(n^2) or worse, but we expect all the lists to be short. + */ + for (c = 0; c < ind->ncolumns; c++) + { + ListCell *lc; + + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind + * of equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c])) + continue; + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (match_index_to_operand(rexpr, c, ind)) + break; /* found a match; column is unique */ + } + + if (lc == NULL) + break; /* no match; this index doesn't help us */ + } + + /* Matched all columns of this index? */ + if (c == ind->ncolumns) + return true; + } + + return false; +} + /**************************************************************************** * ---- PATH CREATION UTILITIES ---- diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 269c4824b6a..827e18a5028 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.123 2009/09/12 22:12:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.124 2009/09/17 20:49:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,11 @@ #include "optimizer/paths.h" +static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype); +static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel); static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, @@ -79,10 +84,25 @@ add_paths_to_joinrel(PlannerInfo *root, List *mergeclause_list = NIL; /* + * 0. Consider join removal. This is always the most efficient strategy, + * so if it works, there's no need to consider anything further. + */ + if (join_is_removable(root, joinrel, outerrel, innerrel, + restrictlist, jointype)) + { + generate_outer_only(root, joinrel, outerrel); + return; + } + + /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin is currently our * only way of implementing full outer joins, so override mergejoin * disable if it's a full join. + * + * Note: do this after join_is_removable(), because this sets the + * outer_is_left flags in the mergejoin clauses, while join_is_removable + * uses those flags for its own purposes. */ if (enable_mergejoin || jointype == JOIN_FULL) mergeclause_list = select_mergejoin_clauses(root, @@ -134,6 +154,180 @@ add_paths_to_joinrel(PlannerInfo *root, } /* + * join_is_removable + * Determine whether we need not perform the join at all, because + * it will just duplicate its left input. + * + * This is true for a left join for which the join condition cannot match + * more than one inner-side row. (There are other possibly interesting + * cases, but we don't have the infrastructure to prove them.) + * + * Note: there is no need to consider the symmetrical case of duplicating the + * right input, because add_paths_to_joinrel() will be called with each rel + * on the outer side. + */ +static bool +join_is_removable(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype) +{ + List *clause_list = NIL; + ListCell *l; + int attroff; + + /* + * Currently, we only know how to remove left joins to a baserel with + * unique indexes. We can check most of these criteria pretty trivially + * to avoid doing useless extra work. But checking whether any of the + * indexes are unique would require iterating over the indexlist, so for + * now we just make sure there are indexes of some sort or other. If none + * of them are unique, join removal will still fail, just slightly later. + */ + if (jointype != JOIN_LEFT || + innerrel->reloptkind == RELOPT_JOINREL || + innerrel->rtekind != RTE_RELATION || + innerrel->indexlist == NIL) + return false; + + /* + * We can't remove the join if any inner-rel attributes are used above + * the join. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = innerrel->max_attr - innerrel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids)) + return false; + } + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * either the outer rel or a pseudoconstant. If an operator is + * mergejoinable then it behaves like equality for some btree opclass, + * so it's what we want. The mergejoinability test also eliminates + * clauses containing volatile functions, which we couldn't depend on. + */ + foreach(l, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * We are always considering an outer join here, so ignore pushed-down + * clauses. Also ignore anything that doesn't have a mergejoinable + * operator. + */ + if (restrictinfo->is_pushed_down) + continue; + + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause is usable with these input rels. All the vars + * needed on each side of the clause must be available from one or the + * other of the input rels. + */ + if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) && + bms_is_subset(restrictinfo->right_relids, innerrel->relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) && + bms_is_subset(restrictinfo->right_relids, outerrel->relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the rel's restriction clauses for var = const clauses */ + foreach(l, innerrel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator + * and doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * The clause certainly doesn't refer to anything but the given + * rel. If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, innerrel, clause_list)) + return true; + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} + +/* + * generate_outer_only + * Generate "join" paths when we have found the join is removable. + */ +static void +generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel) +{ + ListCell *lc; + + /* + * For the moment, replicate all of the outerrel's paths as join paths. + * Some of them might not really be interesting above the join, if they + * have sort orderings that have no real use except to do a mergejoin + * for the join we've just found we don't need. But distinguishing that + * case probably isn't worth the extra code it would take. + */ + foreach(lc, outerrel->pathlist) + { + Path *outerpath = (Path *) lfirst(lc); + + add_path(joinrel, (Path *) + create_noop_path(root, joinrel, outerpath)); + } +} + +/* * sort_inner_and_outer * Create mergejoin join paths by explicitly sorting both the outer and * inner join relations on each available merge ordering. diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ac9b96229a1..0bb53d33089 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.262 2009/09/12 22:12:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.263 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -164,6 +164,11 @@ create_plan(PlannerInfo *root, Path *best_path) case T_WorkTableScan: plan = create_scan_plan(root, best_path); break; + case T_Join: + /* this is only used for no-op joins */ + Assert(IsA(best_path, NoOpPath)); + plan = create_plan(root, ((NoOpPath *) best_path)->subpath); + break; case T_HashJoin: case T_MergeJoin: case T_NestLoop: diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e2de24956a2..e9733892cc2 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.153 2009/09/12 22:12:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.154 2009/09/17 20:49:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1216,6 +1216,26 @@ distinct_col_search(int colno, List *colnos, List *opids) } /* + * create_noop_path + * Creates a path equivalent to the input subpath, but having a different + * parent rel. This is used when a join is found to be removable. + */ +NoOpPath * +create_noop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) +{ + NoOpPath *pathnode = makeNode(NoOpPath); + + pathnode->path.pathtype = T_Join; /* by convention */ + pathnode->path.parent = rel; + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + pathnode->path.pathkeys = subpath->pathkeys; + pathnode->subpath = subpath; + + return pathnode; +} + +/* * create_subqueryscan_path * Creates a path corresponding to a sequential scan of a subquery, * returning the pathnode. |