diff options
Diffstat (limited to 'src/backend/optimizer/plan/analyzejoins.c')
-rw-r--r-- | src/backend/optimizer/plan/analyzejoins.c | 413 |
1 files changed, 413 insertions, 0 deletions
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c new file mode 100644 index 00000000000..da6482b4c3c --- /dev/null +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -0,0 +1,413 @@ +/*------------------------------------------------------------------------- + * + * analyzejoins.c + * Routines for simplifying joins after initial query analysis + * + * While we do a great deal of join simplification in prep/prepjointree.c, + * certain optimizations cannot be performed at that stage for lack of + * detailed information about the query. The routines here are invoked + * after initsplan.c has done its work, and can do additional join removal + * and simplification steps based on the information extracted. The penalty + * is that we have to work harder to clean up after ourselves when we modify + * the query, since the derived data structures have to be updated too. + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.1 2010/03/28 22:59:32 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" + +/* local functions */ +static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static void remove_rel_from_query(PlannerInfo *root, int relid); +static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); + + +/* + * remove_useless_joins + * Check for relations that don't actually need to be joined at all, + * and remove them from the query. + * + * We are passed the current joinlist and return the updated list. Other + * data structures that have to be updated are accessible via "root". + */ +List * +remove_useless_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + /* + * We are only interested in relations that are left-joined to, so we + * can scan the join_info_list to find them easily. + */ +restart: + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int innerrelid; + int nremoved; + + /* Skip if not removable */ + if (!join_is_removable(root, sjinfo)) + continue; + + /* + * Currently, join_is_removable can only succeed when the sjinfo's + * righthand is a single baserel. Remove that rel from the query and + * joinlist. + */ + innerrelid = bms_singleton_member(sjinfo->min_righthand); + + remove_rel_from_query(root, innerrelid); + + /* We verify that exactly one reference gets removed from joinlist */ + nremoved = 0; + joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved); + if (nremoved != 1) + elog(ERROR, "failed to find relation %d in joinlist", innerrelid); + + /* + * We can delete this SpecialJoinInfo from the list too, since it's no + * longer of interest. + */ + root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); + + /* + * Restart the scan. This is necessary to ensure we find all + * removable joins independently of ordering of the join_info_list + * (note that removal of attr_needed bits may make a join appear + * removable that did not before). Also, since we just deleted the + * current list cell, we'd have to have some kluge to continue the + * list scan anyway. + */ + goto restart; + } + + return joinlist; +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static inline bool +clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, + Relids innerrelids) +{ + if (bms_is_subset(rinfo->left_relids, outerrelids) && + bms_is_subset(rinfo->right_relids, innerrelids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrelids) && + bms_is_subset(rinfo->right_relids, outerrelids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + +/* + * join_is_removable + * Check whether we need not perform this special join at all, because + * it will just duplicate its left input. + * + * This is true for a left join for which the join condition cannot match + * more than one inner-side row. (There are other possibly interesting + * cases, but we don't have the infrastructure to prove them.) We also + * have to check that the inner side doesn't generate any variables needed + * above the join. + */ +static bool +join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + List *clause_list = NIL; + ListCell *l; + int attroff; + + /* + * Currently, we only know how to remove left joins to a baserel with + * unique indexes. We can check most of these criteria pretty trivially + * to avoid doing useless extra work. But checking whether any of the + * indexes are unique would require iterating over the indexlist, so for + * now we just make sure there are indexes of some sort or other. If none + * of them are unique, join removal will still fail, just slightly later. + */ + if (sjinfo->jointype != JOIN_LEFT || + sjinfo->delay_upper_joins || + bms_membership(sjinfo->min_righthand) != BMS_SINGLETON) + return false; + + innerrelid = bms_singleton_member(sjinfo->min_righthand); + innerrel = find_base_rel(root, innerrelid); + + if (innerrel->reloptkind != RELOPT_BASEREL || + innerrel->rtekind != RTE_RELATION || + innerrel->indexlist == NIL) + return false; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + + /* + * We can't remove the join if any inner-rel attributes are used above the + * join. + * + * Note that this test only detects use of inner-rel attributes in higher + * join conditions and the target list. There might be such attributes in + * pushed-down conditions at this join, too. We check that case below. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = innerrel->max_attr - innerrel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) + return false; + } + + /* + * Similarly check that the inner rel doesn't produce any PlaceHolderVars + * that will be used above the join. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids) && + !bms_is_subset(phinfo->ph_needed, joinrelids)) + return false; + } + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * either the outer rel or a pseudoconstant. If an operator is + * mergejoinable then it behaves like equality for some btree opclass, so + * it's what we want. The mergejoinability test also eliminates clauses + * containing volatile functions, which we couldn't depend on. + */ + foreach(l, innerrel->joininfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* Ignore clauses not pertinent to this join */ + if (!bms_is_subset(restrictinfo->required_relids, joinrelids)) + continue; + + /* + * If we find a pushed-down clause, it must have come from above the + * outer join and it must contain references to the inner rel. (If it + * had only outer-rel variables, it'd have been pushed down into the + * outer rel.) Therefore, we can conclude that join removal is unsafe + * without any examination of the clause contents. + */ + if (restrictinfo->is_pushed_down) + return false; + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer". + */ + if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, + innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the rel's restriction clauses for var = const clauses */ + foreach(l, innerrel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator and + * doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * The clause certainly doesn't refer to anything but the given rel. + * If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, innerrel, clause_list)) + return true; + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} + + +/* + * Remove the target relid from the planner's data structures, having + * determined that there is no need to include it in the query. + * + * We are not terribly thorough here. We must make sure that the rel is + * no longer treated as a baserel, and that attributes of other baserels + * are no longer marked as being needed at joins involving this rel. + * In particular, we don't bother removing join quals involving the rel from + * the joininfo lists; they'll just get ignored, since we will never form a + * join relation at which they could be evaluated. + */ +static void +remove_rel_from_query(PlannerInfo *root, int relid) +{ + RelOptInfo *rel = find_base_rel(root, relid); + Index rti; + ListCell *l; + + /* + * Mark the rel as "dead" to show it is no longer part of the join tree. + * (Removing it from the baserel array altogether seems too risky.) + */ + rel->reloptkind = RELOPT_DEADREL; + + /* + * Remove references to the rel from other baserels' attr_needed arrays. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *otherrel = root->simple_rel_array[rti]; + int attroff; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (otherrel == NULL) + continue; + + Assert(otherrel->relid == rti); /* sanity check on array */ + + /* no point in processing target rel itself */ + if (otherrel == rel) + continue; + + for (attroff = otherrel->max_attr - otherrel->min_attr; + attroff >= 0; + attroff--) + { + otherrel->attr_needed[attroff] = + bms_del_member(otherrel->attr_needed[attroff], relid); + } + } + + /* + * Likewise remove references from PlaceHolderVar data structures. + * + * Here we have a special case: if a PHV's eval_at set is just the target + * relid, we want to leave it that way instead of reducing it to the empty + * set. An empty eval_at set would confuse later processing since it + * would match every possible eval placement. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); + if (bms_is_empty(phinfo->ph_eval_at)) /* oops, belay that */ + phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid); + + phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); + } +} + +/* + * Remove any occurrences of the target relid from a joinlist structure. + * + * It's easiest to build a whole new list structure, so we handle it that + * way. Efficiency is not a big deal here. + * + * *nremoved is incremented by the number of occurrences removed (there + * should be exactly one, but the caller checks that). + */ +static List * +remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) +{ + List *result = NIL; + ListCell *jl; + + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + + if (varno == relid) + (*nremoved)++; + else + result = lappend(result, jlnode); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + List *sublist; + + sublist = remove_rel_from_joinlist((List *) jlnode, + relid, nremoved); + /* Avoid including empty sub-lists in the result */ + if (sublist) + result = lappend(result, sublist); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + } + } + + return result; +} |