diff options
Diffstat (limited to 'src/backend/optimizer/plan/analyzejoins.c')
-rw-r--r-- | src/backend/optimizer/plan/analyzejoins.c | 295 |
1 files changed, 281 insertions, 14 deletions
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 129fc3dfae6..773f8a458e2 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,17 +22,21 @@ */ #include "postgres.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" -#include "optimizer/var.h" +#include "optimizer/tlist.h" +#include "utils/lsyscache.h" /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); +static Oid distinct_col_search(int colno, List *colnos, List *opids); /* @@ -147,18 +151,15 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; + Query *subquery = NULL; Relids joinrelids; List *clause_list = NIL; ListCell *l; int attroff; /* - * Currently, we only know how to remove left joins to a baserel with - * unique indexes. We can check most of these criteria pretty trivially - * to avoid doing useless extra work. But checking whether any of the - * indexes are unique would require iterating over the indexlist, so for - * now we just make sure there are indexes of some sort or other. If none - * of them are unique, join removal will still fail, just slightly later. + * Must be a non-delaying left join to a single baserel, else we aren't + * going to be able to do anything with it. */ if (sjinfo->jointype != JOIN_LEFT || sjinfo->delay_upper_joins || @@ -168,11 +169,39 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrelid = bms_singleton_member(sjinfo->min_righthand); innerrel = find_base_rel(root, innerrelid); - if (innerrel->reloptkind != RELOPT_BASEREL || - innerrel->rtekind != RTE_RELATION || - innerrel->indexlist == NIL) + if (innerrel->reloptkind != RELOPT_BASEREL) return false; + /* + * Before we go to the effort of checking whether any innerrel variables + * are needed above the join, make a quick check to eliminate cases in + * which we will surely be unable to prove uniqueness of the innerrel. + */ + if (innerrel->rtekind == RTE_RELATION) + { + /* + * For a plain-relation innerrel, we only know how to prove uniqueness + * by reference to unique indexes. If there are no indexes then + * there's certainly no unique indexes so there's no point in going + * further. + */ + if (innerrel->indexlist == NIL) + return false; + } + else if (innerrel->rtekind == RTE_SUBQUERY) + { + subquery = root->simple_rte_array[innerrelid]->subquery; + + /* + * If the subquery has no qualities that support distinctness proofs + * then there's no point in going further. + */ + if (!query_supports_distinctness(subquery)) + return false; + } + else + return false; /* unsupported rtekind */ + /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -272,12 +301,64 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) /* * relation_has_unique_index_for automatically adds any usable restriction - * clauses for the innerrel, so we needn't do that here. + * clauses for the innerrel, so we needn't do that here. (XXX we are not + * considering restriction clauses for subqueries; is that worth doing?) */ - /* Now examine the indexes to see if we have a matching unique index */ - if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) - return true; + if (innerrel->rtekind == RTE_RELATION) + { + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) + return true; + } + else /* innerrel->rtekind == RTE_SUBQUERY */ + { + List *colnos = NIL; + List *opids = NIL; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over, and + * a list of equality operators that the output columns need to be + * distinct according to. + */ + foreach(l, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Oid op; + Var *var; + + /* + * Get the equality operator we need uniqueness according to. + * (This might be a cross-type operator and thus not exactly the + * same operator the subquery would consider; that's all right + * since query_is_distinct_for can resolve such cases.) The + * mergejoinability test above should have selected only OpExprs. + */ + Assert(IsA(rinfo->clause, OpExpr)); + op = ((OpExpr *) rinfo->clause)->opno; + + /* clause_sides_match_join identified the inner side for us */ + if (rinfo->outer_is_left) + var = (Var *) get_rightop(rinfo->clause); + else + var = (Var *) get_leftop(rinfo->clause); + + /* + * If inner side isn't a Var referencing a subquery output column, + * this clause doesn't help us. + */ + if (!var || !IsA(var, Var) || + var->varno != innerrelid || var->varlevelsup != 0) + continue; + + colnos = lappend_int(colnos, var->varattno); + opids = lappend_oid(opids, op); + } + + if (query_is_distinct_for(subquery, colnos, opids)) + return true; + } /* * Some day it would be nice to check for other methods of establishing @@ -481,3 +562,189 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) return result; } + + +/* + * query_supports_distinctness - could the query possibly be proven distinct + * on some set of output columns? + * + * This is effectively a pre-checking function for query_is_distinct_for(). + * It must return TRUE if query_is_distinct_for() could possibly return TRUE + * with this query, but it should not expend a lot of cycles. The idea is + * that callers can avoid doing possibly-expensive processing to compute + * query_is_distinct_for()'s argument lists if the call could not possibly + * succeed. + */ +bool +query_supports_distinctness(Query *query) +{ + if (query->distinctClause != NIL || + query->groupClause != NIL || + query->hasAggs || + query->havingQual || + query->setOperations) + return true; + + return false; +} + +/* + * query_is_distinct_for - does query never return duplicates of the + * specified columns? + * + * query is a not-yet-planned subquery (in current usage, it's always from + * a subquery RTE, which the planner avoids scribbling on). + * + * colnos is an integer list of output column numbers (resno's). We are + * interested in whether rows consisting of just these columns are certain + * to be distinct. "Distinctness" is defined according to whether the + * corresponding upper-level equality operators listed in opids would think + * the values are distinct. (Note: the opids entries could be cross-type + * operators, and thus not exactly the equality operators that the subquery + * would use itself. We use equality_ops_are_compatible() to check + * compatibility. That looks at btree or hash opfamily membership, and so + * should give trustworthy answers for all operators that we might need + * to deal with here.) + */ +bool +query_is_distinct_for(Query *query, List *colnos, List *opids) +{ + ListCell *l; + Oid opid; + + Assert(list_length(colnos) == list_length(opids)); + + /* + * A set-returning function in the query's targetlist can result in + * returning duplicate rows, if the SRF is evaluated after the + * de-duplication step; so we play it safe and say "no" if there are any + * SRFs. (We could be certain that it's okay if SRFs appear only in the + * specified columns, since those must be evaluated before de-duplication; + * but it doesn't presently seem worth the complication to check that.) + */ + if (expression_returns_set((Node *) query->targetList)) + return false; + + /* + * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the + * columns in the DISTINCT clause appear in colnos and operator semantics + * match. + */ + if (query->distinctClause) + { + foreach(l, query->distinctClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, + query->targetList); + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) + break; /* exit early if no match */ + } + if (l == NULL) /* had matches for all? */ + return true; + } + + /* + * Similarly, GROUP BY guarantees uniqueness if all the grouped columns + * appear in colnos and operator semantics match. + */ + if (query->groupClause) + { + foreach(l, query->groupClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, + query->targetList); + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) + break; /* exit early if no match */ + } + if (l == NULL) /* had matches for all? */ + return true; + } + else + { + /* + * If we have no GROUP BY, but do have aggregates or HAVING, then the + * result is at most one row so it's surely unique, for any operators. + */ + if (query->hasAggs || query->havingQual) + return true; + } + + /* + * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, + * except with ALL. + */ + if (query->setOperations) + { + SetOperationStmt *topop = (SetOperationStmt *) query->setOperations; + + Assert(IsA(topop, SetOperationStmt)); + Assert(topop->op != SETOP_NONE); + + if (!topop->all) + { + ListCell *lg; + + /* We're good if all the nonjunk output columns are in colnos */ + lg = list_head(topop->groupClauses); + foreach(l, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + SortGroupClause *sgc; + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* non-resjunk columns should have grouping clauses */ + Assert(lg != NULL); + sgc = (SortGroupClause *) lfirst(lg); + lg = lnext(lg); + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) + break; /* exit early if no match */ + } + if (l == NULL) /* had matches for all? */ + return true; + } + } + + /* + * XXX Are there any other cases in which we can easily see the result + * must be distinct? + * + * If you do add more smarts to this function, be sure to update + * query_supports_distinctness() to match. + */ + + return false; +} + +/* + * distinct_col_search - subroutine for query_is_distinct_for + * + * If colno is in colnos, return the corresponding element of opids, + * else return InvalidOid. (Ordinarily colnos would not contain duplicates, + * but if it does, we arbitrarily select the first match.) + */ +static Oid +distinct_col_search(int colno, List *colnos, List *opids) +{ + ListCell *lc1, + *lc2; + + forboth(lc1, colnos, lc2, opids) + { + if (colno == lfirst_int(lc1)) + return lfirst_oid(lc2); + } + return InvalidOid; +} |