diff options
-rw-r--r-- | doc/src/sgml/config.sgml | 16 | ||||
-rw-r--r-- | src/backend/optimizer/path/equivclass.c | 3 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 39 | ||||
-rw-r--r-- | src/backend/optimizer/plan/analyzejoins.c | 1240 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planmain.c | 5 | ||||
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 9 | ||||
-rw-r--r-- | src/backend/rewrite/rewriteManip.c | 126 | ||||
-rw-r--r-- | src/backend/utils/misc/guc_tables.c | 10 | ||||
-rw-r--r-- | src/include/nodes/pathnodes.h | 40 | ||||
-rw-r--r-- | src/include/optimizer/optimizer.h | 2 | ||||
-rw-r--r-- | src/include/optimizer/paths.h | 3 | ||||
-rw-r--r-- | src/include/optimizer/planmain.h | 6 | ||||
-rw-r--r-- | src/include/rewrite/rewriteManip.h | 4 | ||||
-rw-r--r-- | src/test/regress/expected/equivclass.out | 30 | ||||
-rw-r--r-- | src/test/regress/expected/join.out | 1083 | ||||
-rw-r--r-- | src/test/regress/expected/sysviews.out | 3 | ||||
-rw-r--r-- | src/test/regress/sql/equivclass.sql | 16 | ||||
-rw-r--r-- | src/test/regress/sql/join.sql | 494 | ||||
-rw-r--r-- | src/tools/pgindent/typedefs.list | 2 |
19 files changed, 2983 insertions, 148 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 60829b79d83..336630ce417 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5544,6 +5544,22 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" </listitem> </varlistentry> + <varlistentry id="guc-enable_self_join_elimination" xreflabel="enable_self_join_elimination"> + <term><varname>enable_self_join_elimination</varname> (<type>boolean</type>) + <indexterm> + <primary><varname>enable_self_join_elimination</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Enables or disables the query planner's optimization which analyses + the query tree and replaces self joins with semantically equivalent + single scans. Takes into consideration only plain tables. + The default is <literal>on</literal>. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan"> <term><varname>enable_seqscan</varname> (<type>boolean</type>) <indexterm> diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7cafaca33c5..0f9ecf5ee8b 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -852,7 +852,8 @@ find_computable_ec_member(PlannerInfo *root, exprvars = pull_var_clause((Node *) exprs, PVC_INCLUDE_AGGREGATES | PVC_INCLUDE_WINDOWFUNCS | - PVC_INCLUDE_PLACEHOLDERS); + PVC_INCLUDE_PLACEHOLDERS | + PVC_INCLUDE_CONVERTROWTYPES); foreach(lc, ec->ec_members) { diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 6e2051efc65..a43ca16d683 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -4163,6 +4163,22 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist) { + return relation_has_unique_index_ext(root, rel, restrictlist, + exprlist, oprlist, NULL); +} + +/* + * relation_has_unique_index_ext + * Same as relation_has_unique_index_for(), but supports extra_clauses + * parameter. If extra_clauses isn't NULL, return baserestrictinfo clauses + * which were used to derive uniqueness. + */ +bool +relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist, + List *exprlist, List *oprlist, + List **extra_clauses) +{ ListCell *ic; Assert(list_length(exprlist) == list_length(oprlist)); @@ -4217,6 +4233,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, { IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic); int c; + List *exprs = NIL; /* * If the index is not unique, or not immediately enforced, or if it's @@ -4268,6 +4285,24 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, if (match_index_to_operand(rexpr, c, ind)) { matched = true; /* column is unique */ + + if (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) + { + MemoryContext oldMemCtx = + MemoryContextSwitchTo(root->planner_cxt); + + /* + * Add filter clause into a list allowing caller to + * know if uniqueness have made not only by join + * clauses. + */ + Assert(bms_is_empty(rinfo->left_relids) || + bms_is_empty(rinfo->right_relids)); + if (extra_clauses) + exprs = lappend(exprs, rinfo); + MemoryContextSwitchTo(oldMemCtx); + } + break; } } @@ -4310,7 +4345,11 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, /* Matched all key columns of this index? */ if (c == ind->nkeycolumns) + { + if (extra_clauses) + *extra_clauses = exprs; return true; + } } return false; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index b33fc671775..3aa04d0d4e1 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,6 +22,7 @@ */ #include "postgres.h" +#include "catalog/pg_class.h" #include "nodes/nodeFuncs.h" #include "optimizer/joininfo.h" #include "optimizer/optimizer.h" @@ -30,27 +31,48 @@ #include "optimizer/placeholder.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +/* + * Utility structure. A sorting procedure is needed to simplify the search + * of SJE-candidate baserels referencing the same database relation. Having + * collected all baserels from the query jointree, the planner sorts them + * according to the reloid value, groups them with the next pass and attempts + * to remove self-joins. + * + * Preliminary sorting prevents quadratic behavior that can be harmful in the + * case of numerous joins. + */ +typedef struct +{ + int relid; + Oid reloid; +} SelfJoinCandidate; + +bool enable_self_join_elimination; + /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); -static void remove_rel_from_query(PlannerInfo *root, int relid, - SpecialJoinInfo *sjinfo); +static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid, + SpecialJoinInfo *sjinfo); static void remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid); static void remove_rel_from_eclass(EquivalenceClass *ec, - int relid, int ojrelid); + int relid, int ojrelid, int subst); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, - List *clause_list); + List *clause_list, List **extra_clauses); static Oid distinct_col_search(int colno, List *colnos, List *opids); static bool is_innerrel_unique_for(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist); + List *restrictlist, + List **extra_clauses); +static int self_join_candidates_cmp(const void *a, const void *b); /* @@ -88,7 +110,7 @@ restart: */ innerrelid = bms_singleton_member(sjinfo->min_righthand); - remove_rel_from_query(root, innerrelid, sjinfo); + remove_leftjoinrel_from_query(root, innerrelid, sjinfo); /* We verify that exactly one reference gets removed from joinlist */ nremoved = 0; @@ -276,7 +298,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * Now that we have the relevant equality join clauses, try to prove the * innerrel distinct. */ - if (rel_is_distinct_for(root, innerrel, clause_list)) + if (rel_is_distinct_for(root, innerrel, clause_list, NULL)) return true; /* @@ -288,36 +310,31 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) /* - * Remove the target relid and references to the target join from the + * Remove the target rel->relid and references to the target join from the * planner's data structures, having determined that there is no need - * to include them in the query. + * to include them in the query. Optionally replace them with subst if subst + * is non-negative. * - * We are not terribly thorough here. We only bother to update parts of - * the planner's data structures that will actually be consulted later. + * This function updates only parts needed for both left-join removal and + * self-join removal. */ static void -remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) +remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel, + int subst, SpecialJoinInfo *sjinfo, + Relids joinrelids) { - RelOptInfo *rel = find_base_rel(root, relid); - int ojrelid = sjinfo->ojrelid; - Relids joinrelids; - Relids join_plus_commute; - List *joininfos; + int relid = rel->relid; + int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1; Index rti; ListCell *l; - /* Compute the relid set for the join we are considering */ - joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); - Assert(ojrelid != 0); - joinrelids = bms_add_member(joinrelids, ojrelid); - /* * Update all_baserels and related relid sets. */ - root->all_baserels = bms_del_member(root->all_baserels, relid); - root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid); - root->all_query_rels = bms_del_member(root->all_query_rels, relid); - root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid); + root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst); + root->outer_join_rels = adjust_relid_set(root->outer_join_rels, ojrelid, subst); + root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst); + root->all_query_rels = adjust_relid_set(root->all_query_rels, ojrelid, subst); /* * Likewise remove references from SpecialJoinInfo data structures. @@ -341,20 +358,33 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) sjinf->min_righthand = bms_copy(sjinf->min_righthand); sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand); sjinf->syn_righthand = bms_copy(sjinf->syn_righthand); - /* Now remove relid and ojrelid bits from the sets: */ - sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid); - sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid); - sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid); - sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid); - sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid); - sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid); - sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid); - sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid); - /* relid cannot appear in these fields, but ojrelid can: */ - sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid); - sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid); - sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid); - sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid); + /* Now remove relid from the sets: */ + sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst); + sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst); + sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst); + sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst); + + if (sjinfo != NULL) + { + Assert(subst <= 0 && ojrelid > 0); + + /* Remove ojrelid bits from the sets: */ + sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid); + sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid); + sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid); + sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid); + /* relid cannot appear in these fields, but ojrelid can: */ + sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid); + sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid); + sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid); + sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid); + } + else + { + Assert(subst > 0 && ojrelid == -1); + + ChangeVarNodes((Node *) sjinf->semi_rhs_exprs, relid, subst, 0); + } } /* @@ -375,10 +405,10 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) { PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); - Assert(!bms_is_member(relid, phinfo->ph_lateral)); + Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral)); if (bms_is_subset(phinfo->ph_needed, joinrelids) && bms_is_member(relid, phinfo->ph_eval_at) && - !bms_is_member(ojrelid, phinfo->ph_eval_at)) + (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at))) { root->placeholder_list = foreach_delete_current(root->placeholder_list, l); @@ -388,22 +418,113 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) { PlaceHolderVar *phv = phinfo->ph_var; - phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); - phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid); + phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst); + phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, ojrelid, subst); Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */ /* Reduce ph_needed to contain only "relation 0"; see below */ if (bms_is_member(0, phinfo->ph_needed)) phinfo->ph_needed = bms_make_singleton(0); else phinfo->ph_needed = NULL; - phv->phrels = bms_del_member(phv->phrels, relid); - phv->phrels = bms_del_member(phv->phrels, ojrelid); + + phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst); + + /* + * ph_lateral might contain rels mentioned in ph_eval_at after the + * replacement, remove them. + */ + phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at); + /* ph_lateral might or might not be empty */ + + phv->phrels = adjust_relid_set(phv->phrels, relid, subst); + phv->phrels = adjust_relid_set(phv->phrels, ojrelid, subst); Assert(!bms_is_empty(phv->phrels)); + + ChangeVarNodes((Node *) phv->phexpr, relid, subst, 0); + Assert(phv->phnullingrels == NULL); /* no need to adjust */ } } /* + * Likewise remove references from EquivalenceClasses. + */ + foreach(l, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(l); + + if (bms_is_member(relid, ec->ec_relids) || + (sjinfo == NULL || bms_is_member(ojrelid, ec->ec_relids))) + remove_rel_from_eclass(ec, relid, ojrelid, subst); + } + + /* + * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar + * ph_needed relid sets. These have to be known accurately, else we may + * fail to remove other now-removable outer joins. And our removal of the + * join clause(s) for this outer join may mean that Vars that were + * formerly needed no longer are. So we have to do this honestly by + * repeating the construction of those relid sets. We can cheat to one + * small extent: we can avoid re-examining the targetlist and HAVING qual + * by preserving "relation 0" bits from the existing relid sets. This is + * safe because we'd never remove such references. + * + * So, start by removing all other bits from attr_needed sets and + * lateral_vars lists. (We already did this above for ph_needed.) + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *otherrel = root->simple_rel_array[rti]; + int attroff; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (otherrel == NULL) + continue; + + Assert(otherrel->relid == rti); /* sanity check on array */ + + for (attroff = otherrel->max_attr - otherrel->min_attr; + attroff >= 0; + attroff--) + { + if (bms_is_member(0, otherrel->attr_needed[attroff])) + otherrel->attr_needed[attroff] = bms_make_singleton(0); + else + otherrel->attr_needed[attroff] = NULL; + } + + if (subst > 0) + ChangeVarNodes((Node *) otherrel->lateral_vars, relid, subst, 0); + } +} + +/* + * Remove the target relid and references to the target join from the + * planner's data structures, having determined that there is no need + * to include them in the query. + * + * We are not terribly thorough here. We only bother to update parts of + * the planner's data structures that will actually be consulted later. + */ +static void +remove_leftjoinrel_from_query(PlannerInfo *root, int relid, + SpecialJoinInfo *sjinfo) +{ + RelOptInfo *rel = find_base_rel(root, relid); + int ojrelid = sjinfo->ojrelid; + Relids joinrelids; + Relids join_plus_commute; + List *joininfos; + ListCell *l; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + Assert(ojrelid != 0); + joinrelids = bms_add_member(joinrelids, ojrelid); + + remove_rel_from_query(root, rel, -1, sjinfo, joinrelids); + + /* * Remove any joinquals referencing the rel from the joininfo lists. * * In some cases, a joinqual has to be put back after deleting its @@ -466,18 +587,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) } /* - * Likewise remove references from EquivalenceClasses. - */ - foreach(l, root->eq_classes) - { - EquivalenceClass *ec = (EquivalenceClass *) lfirst(l); - - if (bms_is_member(relid, ec->ec_relids) || - bms_is_member(ojrelid, ec->ec_relids)) - remove_rel_from_eclass(ec, relid, ojrelid); - } - - /* * There may be references to the rel in root->fkey_list, but if so, * match_foreign_keys_to_quals() will get rid of them. */ @@ -493,42 +602,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) pfree(rel); /* - * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar - * ph_needed relid sets. These have to be known accurately, else we may - * fail to remove other now-removable outer joins. And our removal of the - * join clause(s) for this outer join may mean that Vars that were - * formerly needed no longer are. So we have to do this honestly by - * repeating the construction of those relid sets. We can cheat to one - * small extent: we can avoid re-examining the targetlist and HAVING qual - * by preserving "relation 0" bits from the existing relid sets. This is - * safe because we'd never remove such references. - * - * So, start by removing all other bits from attr_needed sets. (We - * already did this above for ph_needed.) - */ - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *otherrel = root->simple_rel_array[rti]; - int attroff; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (otherrel == NULL) - continue; - - Assert(otherrel->relid == rti); /* sanity check on array */ - - for (attroff = otherrel->max_attr - otherrel->min_attr; - attroff >= 0; - attroff--) - { - if (bms_is_member(0, otherrel->attr_needed[attroff])) - otherrel->attr_needed[attroff] = bms_make_singleton(0); - else - otherrel->attr_needed[attroff] = NULL; - } - } - - /* * Now repeat construction of attr_needed bits coming from all other * sources. */ @@ -607,13 +680,13 @@ remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid) * level(s). */ static void -remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid) +remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid, int subst) { ListCell *lc; /* Fix up the EC's overall relids */ - ec->ec_relids = bms_del_member(ec->ec_relids, relid); - ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid); + ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst); + ec->ec_relids = adjust_relid_set(ec->ec_relids, ojrelid, subst); /* * Fix up the member expressions. Any non-const member that ends with @@ -625,11 +698,11 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid) EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); if (bms_is_member(relid, cur_em->em_relids) || - bms_is_member(ojrelid, cur_em->em_relids)) + (ojrelid != -1 && bms_is_member(ojrelid, cur_em->em_relids))) { Assert(!cur_em->em_is_const); - cur_em->em_relids = bms_del_member(cur_em->em_relids, relid); - cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid); + cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst); + cur_em->em_relids = adjust_relid_set(cur_em->em_relids, ojrelid, subst); if (bms_is_empty(cur_em->em_relids)) ec->ec_members = foreach_delete_current(ec->ec_members, lc); } @@ -640,7 +713,10 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - remove_rel_from_restrictinfo(rinfo, relid, ojrelid); + if (ojrelid == -1) + ChangeVarNodes((Node *) rinfo, relid, subst, 0); + else + remove_rel_from_restrictinfo(rinfo, relid, ojrelid); } /* @@ -844,9 +920,15 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) * Note that the passed-in clause_list may be destructively modified! This * is OK for current uses, because the clause_list is built by the caller for * the sole purpose of passing to this function. + * + * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses, + * looking like "x = const" if distinctness is derived from such clauses, not + * joininfo clauses. Pass NULL to the extra_clauses if this value is not + * needed. */ static bool -rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list, + List **extra_clauses) { /* * We could skip a couple of tests here if we assume all callers checked @@ -859,10 +941,11 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) { /* * Examine the indexes to see if we have a matching unique index. - * relation_has_unique_index_for automatically adds any usable + * relation_has_unique_index_ext automatically adds any usable * restriction clauses for the rel, so we needn't do that here. */ - if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL, + extra_clauses)) return true; } else if (rel->rtekind == RTE_SUBQUERY) @@ -1177,8 +1260,34 @@ innerrel_is_unique(PlannerInfo *root, List *restrictlist, bool force_cache) { + return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel, + jointype, restrictlist, force_cache, NULL); +} + +/* + * innerrel_is_unique_ext + * Do the same as innerrel_is_unique(), but also set to (*extra_clauses) + * additional clauses from a baserestrictinfo list used to prove the + * uniqueness. + * + * A non-NULL extra_clauses indicates that we're checking for self-join and + * correspondingly dealing with filtered clauses. + */ +bool +innerrel_is_unique_ext(PlannerInfo *root, + Relids joinrelids, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist, + bool force_cache, + List **extra_clauses) +{ MemoryContext old_context; ListCell *lc; + UniqueRelInfo *uniqueRelInfo; + List *outer_exprs = NIL; + bool self_join = (extra_clauses != NULL); /* Certainly can't prove uniqueness when there are no joinclauses */ if (restrictlist == NIL) @@ -1193,17 +1302,28 @@ innerrel_is_unique(PlannerInfo *root, /* * Query the cache to see if we've managed to prove that innerrel is - * unique for any subset of this outerrel. We don't need an exact match, - * as extra outerrels can't make the innerrel any less unique (or more - * formally, the restrictlist for a join to a superset outerrel must be a - * superset of the conditions we successfully used before). + * unique for any subset of this outerrel. For non-self-join search, we + * don't need an exact match, as extra outerrels can't make the innerrel + * any less unique (or more formally, the restrictlist for a join to a + * superset outerrel must be a superset of the conditions we successfully + * used before). For self-join search, we require an exact match of + * outerrels because we need extra clauses to be valid for our case. Also, + * for self-join checking we've filtered the clauses list. Thus, we can + * match only the result cached for a self-join search for another + * self-join check. */ foreach(lc, innerrel->unique_for_rels) { - Relids unique_for_rels = (Relids) lfirst(lc); + uniqueRelInfo = (UniqueRelInfo *) lfirst(lc); - if (bms_is_subset(unique_for_rels, outerrelids)) + if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) || + (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) && + uniqueRelInfo->self_join)) + { + if (extra_clauses) + *extra_clauses = uniqueRelInfo->extra_clauses; return true; /* Success! */ + } } /* @@ -1220,7 +1340,8 @@ innerrel_is_unique(PlannerInfo *root, /* No cached information, so try to make the proof. */ if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, - jointype, restrictlist)) + jointype, restrictlist, + self_join ? &outer_exprs : NULL)) { /* * Cache the positive result for future probes, being sure to keep it @@ -1233,10 +1354,16 @@ innerrel_is_unique(PlannerInfo *root, * supersets of them anyway. */ old_context = MemoryContextSwitchTo(root->planner_cxt); + uniqueRelInfo = makeNode(UniqueRelInfo); + uniqueRelInfo->outerrelids = bms_copy(outerrelids); + uniqueRelInfo->self_join = self_join; + uniqueRelInfo->extra_clauses = outer_exprs; innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, - bms_copy(outerrelids)); + uniqueRelInfo); MemoryContextSwitchTo(old_context); + if (extra_clauses) + *extra_clauses = outer_exprs; return true; /* Success! */ } else @@ -1282,7 +1409,8 @@ is_innerrel_unique_for(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist) + List *restrictlist, + List **extra_clauses) { List *clause_list = NIL; ListCell *lc; @@ -1312,17 +1440,895 @@ is_innerrel_unique_for(PlannerInfo *root, continue; /* not mergejoinable */ /* - * Check if clause has the form "outer op inner" or "inner op outer", - * and if so mark which side is inner. + * Check if the clause has the form "outer op inner" or "inner op + * outer", and if so mark which side is inner. */ if (!clause_sides_match_join(restrictinfo, outerrelids, innerrel->relids)) continue; /* no good for these input relations */ - /* OK, add to list */ + /* OK, add to the list */ clause_list = lappend(clause_list, restrictinfo); } /* Let rel_is_distinct_for() do the hard work */ - return rel_is_distinct_for(root, innerrel, clause_list); + return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses); +} + +/* + * Update EC members to point to the remaining relation instead of the removed + * one, removing duplicates. + * + * Restriction clauses for base relations are already distributed to + * the respective baserestrictinfo lists (see + * generate_implied_equalities_for_column). The above code has already processed + * this list and updated these clauses to reference the remaining + * relation, so that we can skip them here based on their relids. + * + * Likewise, we have already processed the join clauses that join the + * removed relation to the remaining one. + * + * Finally, there might be join clauses tying the removed relation to + * some third relation. We can't just delete the source clauses and + * regenerate them from the EC because the corresponding equality + * operators might be missing (see the handling of ec_broken). + * Therefore, we will update the references in the source clauses. + * + * Derived clauses can be generated again, so it is simpler just to + * delete them. + */ +static void +update_eclasses(EquivalenceClass *ec, int from, int to) +{ + List *new_members = NIL; + List *new_sources = NIL; + + foreach_node(EquivalenceMember, em, ec->ec_members) + { + bool is_redundant = false; + + if (!bms_is_member(from, em->em_relids)) + { + new_members = lappend(new_members, em); + continue; + } + + em->em_relids = adjust_relid_set(em->em_relids, from, to); + em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to); + + /* We only process inner joins */ + ChangeVarNodes((Node *) em->em_expr, from, to, 0); + + foreach_node(EquivalenceMember, other, new_members) + { + if (!equal(em->em_relids, other->em_relids)) + continue; + + if (equal(em->em_expr, other->em_expr)) + { + is_redundant = true; + break; + } + } + + if (!is_redundant) + new_members = lappend(new_members, em); + } + + list_free(ec->ec_members); + ec->ec_members = new_members; + + list_free(ec->ec_derives); + ec->ec_derives = NULL; + + /* Update EC source expressions */ + foreach_node(RestrictInfo, rinfo, ec->ec_sources) + { + bool is_redundant = false; + + if (!bms_is_member(from, rinfo->required_relids)) + { + new_sources = lappend(new_sources, rinfo); + continue; + } + + ChangeVarNodes((Node *) rinfo, from, to, 0); + + /* + * After switching the clause to the remaining relation, check it for + * redundancy with existing ones. We don't have to check for + * redundancy with derived clauses, because we've just deleted them. + */ + foreach_node(RestrictInfo, other, new_sources) + { + if (!equal(rinfo->clause_relids, other->clause_relids)) + continue; + + if (equal(rinfo->clause, other->clause)) + { + is_redundant = true; + break; + } + } + + if (!is_redundant) + new_sources = lappend(new_sources, rinfo); + } + + list_free(ec->ec_sources); + ec->ec_sources = new_sources; + ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to); +} + +/* + * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field, + * which makes almost every RestrictInfo unique. This type of comparison is + * useful when removing duplicates while moving RestrictInfo's from removed + * relation to remaining relation during self-join elimination. + * + * XXX: In the future, we might remove the 'rinfo_serial' field completely and + * get rid of this function. + */ +static bool +restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b) +{ + int saved_rinfo_serial = a->rinfo_serial; + bool result; + + a->rinfo_serial = b->rinfo_serial; + result = equal(a, b); + a->rinfo_serial = saved_rinfo_serial; + + return result; +} + +/* + * This function adds all non-redundant clauses to the keeping relation + * during self-join elimination. That is a contradictory operation. On the + * one hand, we reduce the length of the `restrict` lists, which can + * impact planning or executing time. Additionally, we improve the + * accuracy of cardinality estimation. On the other hand, it is one more + * place that can make planning time much longer in specific cases. It + * would have been better to avoid calling the equal() function here, but + * it's the only way to detect duplicated inequality expressions. + * + * (*keep_rinfo_list) is given by pointer because it might be altered by + * distribute_restrictinfo_to_rels(). + */ +static void +add_non_redundant_clauses(PlannerInfo *root, + List *rinfo_candidates, + List **keep_rinfo_list, + Index removed_relid) +{ + foreach_node(RestrictInfo, rinfo, rinfo_candidates) + { + bool is_redundant = false; + + Assert(!bms_is_member(removed_relid, rinfo->required_relids)); + + foreach_node(RestrictInfo, src, (*keep_rinfo_list)) + { + if (!bms_equal(src->clause_relids, rinfo->clause_relids)) + /* Can't compare trivially different clauses */ + continue; + + if (src == rinfo || + (rinfo->parent_ec != NULL && + src->parent_ec == rinfo->parent_ec) || + restrict_infos_logically_equal(rinfo, src)) + { + is_redundant = true; + break; + } + } + if (!is_redundant) + distribute_restrictinfo_to_rels(root, rinfo); + } +} + +/* + * Remove a relation after we have proven that it participates only in an + * unneeded unique self-join. + * + * Replace any links in planner info structures. + * + * Transfer join and restriction clauses from the removed relation to the + * remaining one. We change the Vars of the clause to point to the + * remaining relation instead of the removed one. The clauses that require + * a subset of joinrelids become restriction clauses of the remaining + * relation, and others remain join clauses. We append them to + * baserestrictinfo and joininfo, respectively, trying not to introduce + * duplicates. + * + * We also have to process the 'joinclauses' list here, because it + * contains EC-derived join clauses which must become filter clauses. It + * is not enough to just correct the ECs because the EC-derived + * restrictions are generated before join removal (see + * generate_base_implied_equalities). + * + * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all + * cached relids and relid bitmapsets can be correctly cleaned during the + * self-join elimination procedure. + */ +static void +remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, + RelOptInfo *toKeep, RelOptInfo *toRemove, + List *restrictlist) +{ + List *joininfos; + ListCell *lc; + int i; + List *jinfo_candidates = NIL; + List *binfo_candidates = NIL; + + Assert(toKeep->relid > 0); + Assert(toRemove->relid > 0); + + /* + * Replace the index of the removing table with the keeping one. The + * technique of removing/distributing restrictinfo is used here to attach + * just appeared (for keeping relation) join clauses and avoid adding + * duplicates of those that already exist in the joininfo list. + */ + joininfos = list_copy(toRemove->joininfo); + foreach_node(RestrictInfo, rinfo, joininfos) + { + remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); + ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0); + + if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) + jinfo_candidates = lappend(jinfo_candidates, rinfo); + else + binfo_candidates = lappend(binfo_candidates, rinfo); + } + + /* + * Concatenate restrictlist to the list of base restrictions of the + * removing table just to simplify the replacement procedure: all of them + * weren't connected to any keeping relations and need to be added to some + * rels. + */ + toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo, + restrictlist); + foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo) + { + ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0); + + if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) + jinfo_candidates = lappend(jinfo_candidates, rinfo); + else + binfo_candidates = lappend(binfo_candidates, rinfo); + } + + /* + * Now, add all non-redundant clauses to the keeping relation. + */ + add_non_redundant_clauses(root, binfo_candidates, + &toKeep->baserestrictinfo, toRemove->relid); + add_non_redundant_clauses(root, jinfo_candidates, + &toKeep->joininfo, toRemove->relid); + + list_free(binfo_candidates); + list_free(jinfo_candidates); + + /* + * Arrange equivalence classes, mentioned removing a table, with the + * keeping one: varno of removing table should be replaced in members and + * sources lists. Also, remove duplicated elements if this replacement + * procedure created them. + */ + i = -1; + while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + + update_eclasses(ec, toRemove->relid, toKeep->relid); + toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i); + } + + /* + * Transfer the targetlist and attr_needed flags. + */ + + foreach(lc, toRemove->reltarget->exprs) + { + Node *node = lfirst(lc); + + ChangeVarNodes(node, toRemove->relid, toKeep->relid, 0); + if (!list_member(toKeep->reltarget->exprs, node)) + toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node); + } + + for (i = toKeep->min_attr; i <= toKeep->max_attr; i++) + { + int attno = i - toKeep->min_attr; + + toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno], + toRemove->relid, toKeep->relid); + toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno], + toRemove->attr_needed[attno]); + } + + /* + * If the removed relation has a row mark, transfer it to the remaining + * one. + * + * If both rels have row marks, just keep the one corresponding to the + * remaining relation because we verified earlier that they have the same + * strength. + */ + if (rmark) + { + if (kmark) + { + Assert(kmark->markType == rmark->markType); + + root->rowMarks = list_delete_ptr(root->rowMarks, rmark); + } + else + { + /* Shouldn't have inheritance children here. */ + Assert(rmark->rti == rmark->prti); + + rmark->rti = rmark->prti = toKeep->relid; + } + } + + /* + * Replace varno in all the query structures, except nodes RangeTblRef + * otherwise later remove_rel_from_joinlist will yield errors. + */ + ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid, 0, false); + + /* Replace links in the planner info */ + remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL); + + /* At last, replace varno in root targetlist and HAVING clause */ + ChangeVarNodes((Node *) root->processed_tlist, toRemove->relid, toKeep->relid, 0); + ChangeVarNodes((Node *) root->processed_groupClause, toRemove->relid, toKeep->relid, 0); + + adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid); + adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid); + + /* + * There may be references to the rel in root->fkey_list, but if so, + * match_foreign_keys_to_quals() will get rid of them. + */ + + /* + * Finally, remove the rel from the baserel array to prevent it from being + * referenced again. (We can't do this earlier because + * remove_join_clause_from_rels will touch it.) + */ + root->simple_rel_array[toRemove->relid] = NULL; + + /* And nuke the RelOptInfo, just in case there's another access path. */ + pfree(toRemove); + + /* + * Now repeat construction of attr_needed bits coming from all other + * sources. + */ + rebuild_placeholder_attr_needed(root); + rebuild_joinclause_attr_needed(root); + rebuild_eclass_attr_needed(root); + rebuild_lateral_attr_needed(root); +} + +/* + * split_selfjoin_quals + * Processes 'joinquals' by building two lists: one containing the quals + * where the columns/exprs are on either side of the join match and + * another one containing the remaining quals. + * + * 'joinquals' must only contain quals for a RTE_RELATION being joined to + * itself. + */ +static void +split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals, + List **otherjoinquals, int from, int to) +{ + List *sjoinquals = NIL; + List *ojoinquals = NIL; + + foreach_node(RestrictInfo, rinfo, joinquals) + { + OpExpr *expr; + Node *leftexpr; + Node *rightexpr; + + /* In general, clause looks like F(arg1) = G(arg2) */ + if (!rinfo->mergeopfamilies || + bms_num_members(rinfo->clause_relids) != 2 || + bms_membership(rinfo->left_relids) != BMS_SINGLETON || + bms_membership(rinfo->right_relids) != BMS_SINGLETON) + { + ojoinquals = lappend(ojoinquals, rinfo); + continue; + } + + expr = (OpExpr *) rinfo->clause; + + if (!IsA(expr, OpExpr) || list_length(expr->args) != 2) + { + ojoinquals = lappend(ojoinquals, rinfo); + continue; + } + + leftexpr = get_leftop(rinfo->clause); + rightexpr = copyObject(get_rightop(rinfo->clause)); + + if (leftexpr && IsA(leftexpr, RelabelType)) + leftexpr = (Node *) ((RelabelType *) leftexpr)->arg; + if (rightexpr && IsA(rightexpr, RelabelType)) + rightexpr = (Node *) ((RelabelType *) rightexpr)->arg; + + /* + * Quite an expensive operation, narrowing the use case. For example, + * when we have cast of the same var to different (but compatible) + * types. + */ + ChangeVarNodes(rightexpr, bms_singleton_member(rinfo->right_relids), + bms_singleton_member(rinfo->left_relids), 0); + + if (equal(leftexpr, rightexpr)) + sjoinquals = lappend(sjoinquals, rinfo); + else + ojoinquals = lappend(ojoinquals, rinfo); + } + + *selfjoinquals = sjoinquals; + *otherjoinquals = ojoinquals; +} + +/* + * Check for a case when uniqueness is at least partly derived from a + * baserestrictinfo clause. In this case, we have a chance to return only + * one row (if such clauses on both sides of SJ are equal) or nothing (if they + * are different). + */ +static bool +match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses, + Index relid) +{ + foreach_node(RestrictInfo, rinfo, uclauses) + { + Expr *clause; + Node *iclause; + Node *c1; + bool matched = false; + + Assert(outer->relid > 0 && relid > 0); + + /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */ + Assert(bms_is_empty(rinfo->left_relids) ^ + bms_is_empty(rinfo->right_relids)); + + clause = (Expr *) copyObject(rinfo->clause); + ChangeVarNodes((Node *) clause, relid, outer->relid, 0); + + iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) : + get_leftop(clause); + c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) : + get_rightop(clause); + + /* + * Compare these left and right sides with the corresponding sides of + * the outer's filters. If no one is detected - return immediately. + */ + foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo) + { + Node *oclause; + Node *c2; + + if (orinfo->mergeopfamilies == NIL) + /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */ + continue; + + Assert(is_opclause(orinfo->clause)); + + oclause = bms_is_empty(orinfo->left_relids) ? + get_rightop(orinfo->clause) : get_leftop(orinfo->clause); + c2 = (bms_is_empty(orinfo->left_relids) ? + get_leftop(orinfo->clause) : get_rightop(orinfo->clause)); + + if (equal(iclause, oclause) && equal(c1, c2)) + { + matched = true; + break; + } + } + + if (!matched) + return false; + } + + return true; +} + +/* + * Find and remove unique self-joins in a group of base relations that have + * the same Oid. + * + * Returns a set of relids that were removed. + */ +static Relids +remove_self_joins_one_group(PlannerInfo *root, Relids relids) +{ + Relids result = NULL; + int k; /* Index of kept relation */ + int r = -1; /* Index of removed relation */ + + while ((r = bms_next_member(relids, r)) > 0) + { + RelOptInfo *inner = root->simple_rel_array[r]; + + k = r; + + while ((k = bms_next_member(relids, k)) > 0) + { + Relids joinrelids = NULL; + RelOptInfo *outer = root->simple_rel_array[k]; + List *restrictlist; + List *selfjoinquals; + List *otherjoinquals; + ListCell *lc; + bool jinfo_check = true; + PlanRowMark *omark = NULL; + PlanRowMark *imark = NULL; + List *uclauses = NIL; + + /* A sanity check: the relations have the same Oid. */ + Assert(root->simple_rte_array[k]->relid == + root->simple_rte_array[r]->relid); + + /* + * It is impossible to eliminate the join of two relations if they + * belong to different rules of order. Otherwise, the planner + * can't find any variants of the correct query plan. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc); + + if ((bms_is_member(k, info->syn_lefthand) ^ + bms_is_member(r, info->syn_lefthand)) || + (bms_is_member(k, info->syn_righthand) ^ + bms_is_member(r, info->syn_righthand))) + { + jinfo_check = false; + break; + } + } + if (!jinfo_check) + continue; + + /* + * Check Row Marks equivalence. We can't remove the join if the + * relations have row marks of different strength (e.g., one is + * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for + * EvalPlanQual rechecking). + */ + foreach(lc, root->rowMarks) + { + PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc); + + if (rowMark->rti == k) + { + Assert(imark == NULL); + imark = rowMark; + } + else if (rowMark->rti == r) + { + Assert(omark == NULL); + omark = rowMark; + } + + if (omark && imark) + break; + } + if (omark && imark && omark->markType != imark->markType) + continue; + + /* + * We only deal with base rels here, so their relids bitset + * contains only one member -- their relid. + */ + joinrelids = bms_add_member(joinrelids, r); + joinrelids = bms_add_member(joinrelids, k); + + /* + * PHVs should not impose any constraints on removing self-joins. + */ + + /* + * At this stage, joininfo lists of inner and outer can contain + * only clauses required for a superior outer join that can't + * influence this optimization. So, we can avoid to call the + * build_joinrel_restrictlist() routine. + */ + restrictlist = generate_join_implied_equalities(root, joinrelids, + inner->relids, + outer, NULL); + if (restrictlist == NIL) + continue; + + /* + * Process restrictlist to separate the self-join quals from the + * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to + * otherjoinquals. + */ + split_selfjoin_quals(root, restrictlist, &selfjoinquals, + &otherjoinquals, inner->relid, outer->relid); + + Assert(list_length(restrictlist) == + (list_length(selfjoinquals) + list_length(otherjoinquals))); + + /* + * To enable SJE for the only degenerate case without any self + * join clauses at all, add baserestrictinfo to this list. The + * degenerate case works only if both sides have the same clause. + * So doesn't matter which side to add. + */ + selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo); + + /* + * Determine if the inner table can duplicate outer rows. We must + * bypass the unique rel cache here since we're possibly using a + * subset of join quals. We can use 'force_cache' == true when all + * join quals are self-join quals. Otherwise, we could end up + * putting false negatives in the cache. + */ + if (!innerrel_is_unique_ext(root, joinrelids, inner->relids, + outer, JOIN_INNER, selfjoinquals, + list_length(otherjoinquals) == 0, + &uclauses)) + continue; + + /* + * 'uclauses' is the copy of outer->baserestrictinfo that are + * associated with an index. We proved by matching selfjoinquals + * to a unique index that the outer relation has at most one + * matching row for each inner row. Sometimes that is not enough. + * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the + * unique index is (a,b). Having non-empty uclauses, we must + * validate that the inner baserestrictinfo contains the same + * expressions, or we won't match the same row on each side of the + * join. + */ + if (!match_unique_clauses(root, inner, uclauses, outer->relid)) + continue; + + /* + * We can remove either relation, so remove the inner one in order + * to simplify this loop. + */ + remove_self_join_rel(root, omark, imark, outer, inner, restrictlist); + + result = bms_add_member(result, r); + + /* We have removed the outer relation, try the next one. */ + break; + } + } + + return result; +} + +/* + * Gather indexes of base relations from the joinlist and try to eliminate self + * joins. + */ +static Relids +remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove) +{ + ListCell *jl; + Relids relids = NULL; + SelfJoinCandidate *candidates = NULL; + int i; + int j; + int numRels; + + /* Collect indexes of base relations of the join tree */ + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + RangeTblEntry *rte = root->simple_rte_array[varno]; + + /* + * We only consider ordinary relations as candidates to be + * removed, and these relations should not have TABLESAMPLE + * clauses specified. Removing a relation with TABLESAMPLE clause + * could potentially change the syntax of the query. Because of + * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or + * Query->mergeTargetRelation associated rel cannot be eliminated. + */ + if (rte->rtekind == RTE_RELATION && + rte->relkind == RELKIND_RELATION && + rte->tablesample == NULL && + varno != root->parse->resultRelation && + varno != root->parse->mergeTargetRelation) + { + Assert(!bms_is_member(varno, relids)); + relids = bms_add_member(relids, varno); + } + } + else if (IsA(jlnode, List)) + { + /* Recursively go inside the sub-joinlist */ + toRemove = remove_self_joins_recurse(root, (List *) jlnode, + toRemove); + } + else + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + } + + numRels = bms_num_members(relids); + + /* Need at least two relations for the join */ + if (numRels < 2) + return toRemove; + + /* + * In order to find relations with the same oid we first build an array of + * candidates and then sort it by oid. + */ + candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) * + numRels); + i = -1; + j = 0; + while ((i = bms_next_member(relids, i)) >= 0) + { + candidates[j].relid = i; + candidates[j].reloid = root->simple_rte_array[i]->relid; + j++; + } + + qsort(candidates, numRels, sizeof(SelfJoinCandidate), + self_join_candidates_cmp); + + /* + * Iteratively form a group of relation indexes with the same oid and + * launch the routine that detects self-joins in this group and removes + * excessive range table entries. + * + * At the end of the iteration, exclude the group from the overall relids + * list. So each next iteration of the cycle will involve less and less + * value of relids. + */ + i = 0; + for (j = 1; j < numRels + 1; j++) + { + if (j == numRels || candidates[j].reloid != candidates[i].reloid) + { + if (j - i >= 2) + { + /* Create a group of relation indexes with the same oid */ + Relids group = NULL; + Relids removed; + + while (i < j) + { + group = bms_add_member(group, candidates[i].relid); + i++; + } + relids = bms_del_members(relids, group); + + /* + * Try to remove self-joins from a group of identical entries. + * Make the next attempt iteratively - if something is deleted + * from a group, changes in clauses and equivalence classes + * can give us a chance to find more candidates. + */ + do + { + Assert(!bms_overlap(group, toRemove)); + removed = remove_self_joins_one_group(root, group); + toRemove = bms_add_members(toRemove, removed); + group = bms_del_members(group, removed); + } while (!bms_is_empty(removed) && + bms_membership(group) == BMS_MULTIPLE); + bms_free(removed); + bms_free(group); + } + else + { + /* Single relation, just remove it from the set */ + relids = bms_del_member(relids, candidates[i].relid); + i = j; + } + } + } + + Assert(bms_is_empty(relids)); + + return toRemove; +} + +/* + * Compare self-join candidates by their oids. + */ +static int +self_join_candidates_cmp(const void *a, const void *b) +{ + const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a; + const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b; + + if (ca->reloid != cb->reloid) + return (ca->reloid < cb->reloid ? -1 : 1); + else + return 0; +} + +/* + * Find and remove useless self joins. + * + * Search for joins where a relation is joined to itself. If the join clause + * for each tuple from one side of the join is proven to match the same + * physical row (or nothing) on the other side, that self-join can be + * eliminated from the query. Suitable join clauses are assumed to be in the + * form of X = X, and can be replaced with NOT NULL clauses. + * + * For the sake of simplicity, we don't apply this optimization to special + * joins. Here is a list of what we could do in some particular cases: + * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins, + * and then removed normally. + * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND + * (IS NULL on join columns OR NOT inner quals)'. + * 'a a1 left join a a2': could simplify to a scan like inner but without + * NOT NULL conditions on join columns. + * 'a a1 left join (a a2 join b)': can't simplify this, because join to b + * can both remove rows and introduce duplicates. + * + * To search for removable joins, we order all the relations on their Oid, + * go over each set with the same Oid, and consider each pair of relations + * in this set. + * + * To remove the join, we mark one of the participating relations as dead + * and rewrite all references to it to point to the remaining relation. + * This includes modifying RestrictInfos, EquivalenceClasses, and + * EquivalenceMembers. We also have to modify the row marks. The join clauses + * of the removed relation become either restriction or join clauses, based on + * whether they reference any relations not participating in the removed join. + * + * 'joinlist' is the top-level joinlist of the query. If it has any + * references to the removed relations, we update them to point to the + * remaining ones. + */ +List * +remove_useless_self_joins(PlannerInfo *root, List *joinlist) +{ + Relids toRemove = NULL; + int relid = -1; + + if (!enable_self_join_elimination || joinlist == NIL || + (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List))) + return joinlist; + + /* + * Merge pairs of relations participated in self-join. Remove unnecessary + * range table entries. + */ + toRemove = remove_self_joins_recurse(root, joinlist, toRemove); + + if (unlikely(toRemove != NULL)) + { + /* At the end, remove orphaned relation links */ + while ((relid = bms_next_member(toRemove, relid)) >= 0) + { + int nremoved = 0; + + joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved); + if (nremoved != 1) + elog(ERROR, "failed to find relation %d in joinlist", relid); + } + } + + return joinlist; } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index ade23fd9d56..5467e094ca7 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -234,6 +234,11 @@ query_planner(PlannerInfo *root, reduce_unique_semijoins(root); /* + * Remove self joins on a unique column. + */ + joinlist = remove_useless_self_joins(root, joinlist); + + /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a * placeholder is evaluable at a base rel. diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 7c27dc24e21..eab44da65b8 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -639,14 +639,17 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, * otherwise do statistical estimation. * * XXX you don't really want to know about this: we do the estimation - * using the subquery's original targetlist expressions, not the + * using the subroot->parse's original targetlist expressions, not the * subroot->processed_tlist which might seem more appropriate. The reason * is that if the subquery is itself a setop, it may return a * processed_tlist containing "varno 0" Vars generated by * generate_append_tlist, and those would confuse estimate_num_groups * mightily. We ought to get rid of the "varno 0" hack, but that requires * a redesign of the parsetree representation of setops, so that there can - * be an RTE corresponding to each setop's output. + * be an RTE corresponding to each setop's output. Note, we use this not + * subquery's targetlist but subroot->parse's targetlist, because it was + * revised by self-join removal. subquery's targetlist might contain the + * references to the removed relids. */ if (pNumGroups) { @@ -659,7 +662,7 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, *pNumGroups = rel->cheapest_total_path->rows; else *pNumGroups = estimate_num_groups(subroot, - get_tlist_exprs(subquery->targetList, false), + get_tlist_exprs(subroot->parse->targetList, false), rel->cheapest_total_path->rows, NULL, NULL); diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index a115b217c91..9433548d279 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -64,7 +64,6 @@ static bool locate_windowfunc_walker(Node *node, locate_windowfunc_context *context); static bool checkExprHasSubLink_walker(Node *node, void *context); static Relids offset_relid_set(Relids relids, int offset); -static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid); static Node *add_nulling_relids_mutator(Node *node, add_nulling_relids_context *context); static Node *remove_nulling_relids_mutator(Node *node, @@ -543,6 +542,8 @@ offset_relid_set(Relids relids, int offset) * (identified by sublevels_up and rt_index), and change their varno fields * to 'new_index'. The varnosyn fields are changed too. Also, adjust other * nodes that contain rangetable indexes, such as RangeTblRef and JoinExpr. + * Specifying 'change_RangeTblRef' to false allows skipping RangeTblRef. + * See ChangeVarNodesExtended for details. * * NOTE: although this has the form of a walker, we cheat and modify the * nodes in-place. The given expression tree should have been copied @@ -554,6 +555,7 @@ typedef struct int rt_index; int new_index; int sublevels_up; + bool change_RangeTblRef; } ChangeVarNodes_context; static bool @@ -586,7 +588,7 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) cexpr->cvarno = context->new_index; return false; } - if (IsA(node, RangeTblRef)) + if (IsA(node, RangeTblRef) && context->change_RangeTblRef) { RangeTblRef *rtr = (RangeTblRef *) node; @@ -633,6 +635,75 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) } return false; } + if (IsA(node, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) node; + int relid = -1; + bool is_req_equal = + (rinfo->required_relids == rinfo->clause_relids); + bool clause_relids_is_multiple = + (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); + + if (bms_is_member(context->rt_index, rinfo->clause_relids)) + { + expression_tree_walker((Node *) rinfo->clause, ChangeVarNodes_walker, (void *) context); + expression_tree_walker((Node *) rinfo->orclause, ChangeVarNodes_walker, (void *) context); + + rinfo->clause_relids = + adjust_relid_set(rinfo->clause_relids, context->rt_index, context->new_index); + rinfo->num_base_rels = bms_num_members(rinfo->clause_relids); + rinfo->left_relids = + adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index); + rinfo->right_relids = + adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index); + } + + if (is_req_equal) + rinfo->required_relids = rinfo->clause_relids; + else + rinfo->required_relids = + adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index); + + rinfo->outer_relids = + adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index); + rinfo->incompatible_relids = + adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index); + + if (rinfo->mergeopfamilies && + bms_get_singleton_member(rinfo->clause_relids, &relid) && + clause_relids_is_multiple && + relid == context->new_index && IsA(rinfo->clause, OpExpr)) + { + Expr *leftOp; + Expr *rightOp; + + leftOp = (Expr *) get_leftop(rinfo->clause); + rightOp = (Expr *) get_rightop(rinfo->clause); + + /* + * For self-join elimination, changing varnos could transform + * "t1.a = t2.a" into "t1.a = t1.a". That is always true as long + * as "t1.a" is not null. We use qual() to check for such a case, + * and then we replace the qual for a check for not null + * (NullTest). + */ + if (leftOp != NULL && equal(leftOp, rightOp)) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = leftOp; + ntest->nulltesttype = IS_NOT_NULL; + ntest->argisrow = false; + ntest->location = -1; + rinfo->clause = (Expr *) ntest; + rinfo->mergeopfamilies = NIL; + rinfo->left_em = NULL; + rinfo->right_em = NULL; + } + Assert(rinfo->orclause == NULL); + } + return false; + } if (IsA(node, AppendRelInfo)) { AppendRelInfo *appinfo = (AppendRelInfo *) node; @@ -665,32 +736,32 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) return expression_tree_walker(node, ChangeVarNodes_walker, context); } +/* + * ChangeVarNodesExtended - similar to ChangeVarNodes, but has additional + * 'change_RangeTblRef' param + * + * ChangeVarNodes changes a given node and all of its underlying nodes. + * However, self-join elimination (SJE) needs to skip the RangeTblRef node + * type. During SJE's last step, remove_rel_from_joinlist() removes + * remaining RangeTblRefs with target relid. If ChangeVarNodes() replaces + * the target relid before, remove_rel_from_joinlist() fails to identify + * the nodes to delete. + */ void -ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up) +ChangeVarNodesExtended(Node *node, int rt_index, int new_index, + int sublevels_up, bool change_RangeTblRef) { ChangeVarNodes_context context; context.rt_index = rt_index; context.new_index = new_index; context.sublevels_up = sublevels_up; + context.change_RangeTblRef = change_RangeTblRef; - /* - * Must be prepared to start with a Query or a bare expression tree; if - * it's a Query, go straight to query_tree_walker to make sure that - * sublevels_up doesn't get incremented prematurely. - */ if (node && IsA(node, Query)) { Query *qry = (Query *) node; - /* - * If we are starting at a Query, and sublevels_up is zero, then we - * must also fix rangetable indexes in the Query itself --- namely - * resultRelation, mergeTargetRelation, exclRelIndex and rowMarks - * entries. sublevels_up cannot be zero when recursing into a - * subquery, so there's no need to have the same logic inside - * ChangeVarNodes_walker. - */ if (sublevels_up == 0) { ListCell *l; @@ -701,7 +772,6 @@ ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up) if (qry->mergeTargetRelation == rt_index) qry->mergeTargetRelation = new_index; - /* this is unlikely to ever be used, but ... */ if (qry->onConflict && qry->onConflict->exclRelIndex == rt_index) qry->onConflict->exclRelIndex = new_index; @@ -719,15 +789,22 @@ ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up) ChangeVarNodes_walker(node, &context); } +void +ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up) +{ + ChangeVarNodesExtended(node, rt_index, new_index, sublevels_up, true); +} + /* - * Substitute newrelid for oldrelid in a Relid set + * adjust_relid_set - substitute newrelid for oldrelid in a Relid set * - * Note: some extensions may pass a special varno such as INDEX_VAR for - * oldrelid. bms_is_member won't like that, but we should tolerate it. - * (Perhaps newrelid could also be a special varno, but there had better - * not be a reason to inject that into a nullingrels or phrels set.) + * Attempt to remove oldrelid from a Relid set (as long as it's not a special + * varno). If oldrelid was found and removed, insert newrelid into a Relid + * set (as long as it's not a special varno). Therefore, when oldrelid is + * a special varno, this function does nothing. When newrelid is a special + * varno, this function behaves as delete. */ -static Relids +Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid) { if (!IS_SPECIAL_VARNO(oldrelid) && bms_is_member(oldrelid, relids)) @@ -736,7 +813,8 @@ adjust_relid_set(Relids relids, int oldrelid, int newrelid) relids = bms_copy(relids); /* Remove old, add new */ relids = bms_del_member(relids, oldrelid); - relids = bms_add_member(relids, newrelid); + if (!IS_SPECIAL_VARNO(newrelid)) + relids = bms_add_member(relids, newrelid); } return relids; } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 42728189322..cce73314609 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -989,6 +989,16 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, { + {"enable_self_join_elimination", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enable removal of unique self-joins."), + NULL, + GUC_EXPLAIN | GUC_NOT_IN_SAMPLE + }, + &enable_self_join_elimination, + true, + NULL, NULL, NULL + }, + { {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables reordering of GROUP BY keys."), NULL, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 00c700cc3e7..fbf05322c75 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -201,6 +201,11 @@ typedef struct PlannerGlobal * Not all fields are printed. (In some cases, there is no print support for * the field type; in others, doing so would lead to infinite recursion or * bloat dump output more than seems useful.) + * + * NOTE: When adding new entries containing relids and relid bitmapsets, + * remember to check that they will be correctly processed by + * the remove_self_join_rel function - relid of removing relation will be + * correctly replaced with the keeping one. *---------- */ #ifndef HAVE_PLANNERINFO_TYPEDEF @@ -753,7 +758,7 @@ typedef struct PartitionSchemeData *PartitionScheme; * populate these fields, for base rels; but someday they might be used for * join rels too: * - * unique_for_rels - list of Relid sets, each one being a set of other + * unique_for_rels - list of UniqueRelInfo, each one being a set of other * rels for which this one has been proven unique * non_unique_for_rels - list of Relid sets, each one being a set of * other rels for which we have tried and failed to prove @@ -992,7 +997,7 @@ typedef struct RelOptInfo /* * cache space for remembering if we have proven this relation unique */ - /* known unique for these other relid set(s) */ + /* known unique for these other relid set(s) given in UniqueRelInfo(s) */ List *unique_for_rels; /* known not unique for these set(s) */ List *non_unique_for_rels; @@ -3463,4 +3468,35 @@ typedef struct AggTransInfo bool initValueIsNull; } AggTransInfo; +/* + * UniqueRelInfo caches a fact that a relation is unique when being joined + * to other relation(s). + */ +typedef struct UniqueRelInfo +{ + pg_node_attr(no_copy_equal, no_read, no_query_jumble) + + NodeTag type; + + /* + * The relation in consideration is unique when being joined with this set + * of other relation(s). + */ + Relids outerrelids; + + /* + * The relation in consideration is unique when considering only clauses + * suitable for self-join (passed split_selfjoin_quals()). + */ + bool self_join; + + /* + * Additional clauses from a baserestrictinfo list that were used to prove + * the uniqueness. We cache it for the self-join checking procedure: a + * self-join can be removed if the outer relation contains strictly the + * same set of clauses. + */ + List *extra_clauses; +} UniqueRelInfo; + #endif /* PATHNODES_H */ diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index bcf8ed645c2..78e05d88c8e 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -192,6 +192,8 @@ extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref, * output list */ #define PVC_RECURSE_PLACEHOLDERS 0x0020 /* recurse into PlaceHolderVar * arguments */ +#define PVC_INCLUDE_CONVERTROWTYPES 0x0040 /* include ConvertRowtypeExprs in + * output list */ extern Bitmapset *pull_varnos(PlannerInfo *root, Node *node); extern Bitmapset *pull_varnos_of_level(PlannerInfo *root, Node *node, int levelsup); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 46955d128f0..bc5dfd7db41 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -72,6 +72,9 @@ extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel); extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist); +extern bool relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist, List *exprlist, + List *oprlist, List **extra_clauses); extern bool indexcol_is_bool_constant_for_query(PlannerInfo *root, IndexOptInfo *index, int indexcol); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index fee3378bbe3..5a930199611 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -20,6 +20,7 @@ /* GUC parameters */ #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1 extern PGDLLIMPORT double cursor_tuple_fraction; +extern PGDLLIMPORT bool enable_self_join_elimination; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); @@ -113,6 +114,11 @@ extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); extern bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache); +extern bool innerrel_is_unique_ext(PlannerInfo *root, Relids joinrelids, + Relids outerrelids, RelOptInfo *innerrel, + JoinType jointype, List *restrictlist, + bool force_cache, List **uclauses); +extern List *remove_useless_self_joins(PlannerInfo *root, List *jointree); /* * prototypes for plan/setrefs.c diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index 512823033b9..5ec475c63e9 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -15,6 +15,7 @@ #define REWRITEMANIP_H #include "nodes/parsenodes.h" +#include "nodes/pathnodes.h" struct AttrMap; /* avoid including attmap.h here */ @@ -41,11 +42,14 @@ typedef enum ReplaceVarsNoMatchOption } ReplaceVarsNoMatchOption; +extern Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid); extern void CombineRangeTables(List **dst_rtable, List **dst_perminfos, List *src_rtable, List *src_perminfos); extern void OffsetVarNodes(Node *node, int offset, int sublevels_up); extern void ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up); +extern void ChangeVarNodesExtended(Node *node, int rt_index, int new_index, + int sublevels_up, bool change_RangeTblRef); extern void IncrementVarSublevelsUp(Node *node, int delta_sublevels_up, int min_sublevels_up); extern void IncrementVarSublevelsUp_rtable(List *rtable, diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index 56227505009..ad8ab294ff6 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -434,6 +434,36 @@ explain (costs off) Filter: ((unique1 IS NOT NULL) AND (unique2 IS NOT NULL)) (2 rows) +-- Test that broken ECs are processed correctly during self join removal. +-- Disable merge joins so that we don't get an error about missing commutator. +-- Test both orientations of the join clause, because only one of them breaks +-- the EC. +set enable_mergejoin to off; +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on m.ff + n.ff = p.f1; + QUERY PLAN +--------------------------------------- + Nested Loop + Join Filter: ((n.ff + n.ff) = p.f1) + -> Seq Scan on ec0 n + -> Materialize + -> Seq Scan on ec1 p +(5 rows) + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1; + QUERY PLAN +--------------------------------------------------------------- + Nested Loop + Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1) + -> Seq Scan on ec0 n + -> Materialize + -> Seq Scan on ec1 p +(5 rows) + +reset enable_mergejoin; -- this could be converted, but isn't at present explain (costs off) select * from tenk1 where unique1 = unique1 or unique2 = unique2; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 3ffc066b1f8..a57bb18c24f 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5966,6 +5966,27 @@ select c.id, ss.a from c -> Seq Scan on c (7 rows) +-- check the case when the placeholder relates to an outer join and its +-- inner in the press field but actually uses only the outer side of the join +explain (costs off) +SELECT q.val FROM b LEFT JOIN ( + SELECT (q1.z IS NOT NULL) AS val + FROM b LEFT JOIN ( + SELECT (t1.b_id IS NOT NULL) AS z FROM a t1 LEFT JOIN a t2 USING (id) + ) AS q1 + ON true +) AS q ON true; + QUERY PLAN +------------------------------------------ + Nested Loop Left Join + -> Seq Scan on b + -> Materialize + -> Nested Loop Left Join + -> Seq Scan on b b_1 + -> Materialize + -> Seq Scan on a t1 +(7 rows) + CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id); CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10); -- test join removals on a partitioned table @@ -6378,6 +6399,1068 @@ select * from (0 rows) -- +-- test that semi- or inner self-joins on a unique column are removed +-- +-- enable only nestloop to get more predictable plans +set enable_hashjoin to off; +set enable_mergejoin to off; +create table sj (a int unique, b int, c int unique); +insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1); +analyze sj; +-- Trivial self-join case. +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + QUERY PLAN +----------------------------------------------- + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b = (a - 1))) +(2 rows) + +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + a | b | c +---+---+--- + 2 | 1 | 1 +(1 row) + +-- Self-join removal performs after a subquery pull-up process and could remove +-- such kind of self-join too. Check this option. +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + QUERY PLAN +------------------------------------------ + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b < 10)) +(2 rows) + +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + a | b | c +---+---+--- + 2 | 1 | 1 +(1 row) + +-- Don't remove self-join for the case of equality of two different unique columns. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null; + QUERY PLAN +--------------------------------------- + Nested Loop + Join Filter: (t1.a = t2.c) + -> Seq Scan on sj t2 + -> Materialize + -> Seq Scan on sj t1 + Filter: (b IS NOT NULL) +(6 rows) + +-- Ensure that relations with TABLESAMPLE clauses are not considered as +-- candidates to be removed +explain (costs off) +select * from sj t1 + join lateral + (select * from sj tablesample system(t1.b)) s + on t1.a = s.a; + QUERY PLAN +--------------------------------------- + Nested Loop + -> Seq Scan on sj t1 + -> Memoize + Cache Key: t1.a, t1.b + Cache Mode: binary + -> Sample Scan on sj + Sampling: system (t1.b) + Filter: (t1.a = a) +(8 rows) + +-- Ensure that SJE does not form a self-referential lateral dependency +explain (costs off) +select * from sj t1 + left join lateral + (select t1.a as t1a, * from sj t2) s + on true +where t1.a = s.a; + QUERY PLAN +--------------------------- + Seq Scan on sj t2 + Filter: (a IS NOT NULL) +(2 rows) + +-- Degenerated case. +explain (costs off) +select * from + (select a as x from sj where false) as q1, + (select a as y from sj where false) as q2 +where q1.x = q2.y; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +-- We can't use a cross-EC generated self join qual because of current logic of +-- the generate_join_implied_equalities routine. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (t1.a = t2.b) + -> Seq Scan on sj t1 + Filter: (a = b) + -> Seq Scan on sj t2 + Filter: (b = a) +(6 rows) + +explain (costs off) +select * from sj t1, sj t2, sj t3 +where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and + t1.b = t3.b and t3.b = t3.a; + QUERY PLAN +------------------------------------ + Nested Loop + Join Filter: (t1.a = t3.b) + -> Nested Loop + Join Filter: (t1.a = t2.b) + -> Seq Scan on sj t1 + Filter: (a = b) + -> Seq Scan on sj t2 + Filter: (b = a) + -> Seq Scan on sj t3 + Filter: (b = a) +(10 rows) + +-- Double self-join removal. +-- Use a condition on "b + 1", not on "b", for the second join, so that +-- the equivalence class is different from the first one, and we can +-- test the non-ec code path. +explain (costs off) +select * +from sj t1 + join sj t2 on t1.a = t2.a and t1.b = t2.b + join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1; + QUERY PLAN +--------------------------------------------------------------------------- + Seq Scan on sj t3 + Filter: ((a IS NOT NULL) AND (b IS NOT NULL) AND ((b + 1) IS NOT NULL)) +(2 rows) + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + QUERY PLAN +------------------------------------------ + Seq Scan on sj t2 + Filter: (a IS NOT NULL) + SubPlan 1 + -> Result + One-Time Filter: (t2.a = t2.a) + -> Seq Scan on sj + Filter: (a = t2.a) +(7 rows) + +-- self-join under outer join +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on x.a = z.q1; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: (y.a = z.q1) + -> Seq Scan on sj y + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl z +(6 rows) + +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on y.a = z.q1; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: (y.a = z.q1) + -> Seq Scan on sj y + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl z +(6 rows) + +explain (costs off) +select * from ( + select t1.*, t2.a as ax from sj t1 join sj t2 + on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null) +) as q1 +left join + (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2 +on q1.ax = q2.a; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Left Join + Join Filter: (t2.a = t4.a) + -> Seq Scan on sj t2 + Filter: ((b IS NULL) AND (a IS NOT NULL) AND ((c * c) = (c + 2))) + -> Seq Scan on sj t4 + Filter: (c IS NOT NULL) +(6 rows) + +-- Test that placeholders are updated correctly after join removal +explain (costs off) +select * from (values (1)) x +left join (select coalesce(y.q1, 1) from int8_tbl y + right join sj j1 inner join sj j2 on j1.a = j2.a + on true) z +on true; + QUERY PLAN +------------------------------------------ + Nested Loop Left Join + -> Result + -> Nested Loop Left Join + -> Seq Scan on sj j2 + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl y +(7 rows) + +-- Test that references to the removed rel in lateral subqueries are replaced +-- correctly after join removal +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t1.a offset 0) t3 on true; + QUERY PLAN +------------------------------------ + Nested Loop + Output: (t2.a) + -> Seq Scan on public.sj t2 + Output: t2.a, t2.b, t2.c + Filter: (t2.a IS NOT NULL) + -> Result + Output: t2.a +(7 rows) + +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select * from (select t1.a offset 0) offset 0) t3 on true; + QUERY PLAN +------------------------------------ + Nested Loop + Output: (t2.a) + -> Seq Scan on public.sj t2 + Output: t2.a, t2.b, t2.c + Filter: (t2.a IS NOT NULL) + -> Result + Output: t2.a +(7 rows) + +explain (verbose, costs off) +select t4.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true; + QUERY PLAN +------------------------------------ + Nested Loop + Output: t3.a + -> Seq Scan on public.sj t2 + Output: t2.a, t2.b, t2.c + Filter: (t2.a IS NOT NULL) + -> Seq Scan on public.sj t3 + Output: t3.a +(7 rows) + +-- Check updating of semi_rhs_exprs links from upper-level semi join to +-- the removing relation +explain (verbose, costs off) +select t1.a from sj t1 where t1.b in ( + select t2.b from sj t2 join sj t3 on t2.c=t3.c); + QUERY PLAN +------------------------------------------ + Nested Loop Semi Join + Output: t1.a + Join Filter: (t1.b = t3.b) + -> Seq Scan on public.sj t1 + Output: t1.a, t1.b, t1.c + -> Materialize + Output: t3.c, t3.b + -> Seq Scan on public.sj t3 + Output: t3.c, t3.b + Filter: (t3.c IS NOT NULL) +(10 rows) + +-- +-- SJE corner case: uniqueness of an inner is [partially] derived from +-- baserestrictinfo clauses. +-- XXX: We really should allow SJE for these corner cases? +-- +INSERT INTO sj VALUES (3, 1, 3); +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: (a = 2) + -> Seq Scan on sj j2 + Filter: (a = 3) +(6 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; + a | b | c | a | b | c +---+---+---+---+---+--- + 2 | 1 | 1 | 3 | 1 | 3 +(1 row) + +-- Remove SJ, define uniqueness by a constant +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; + QUERY PLAN +----------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 2)) +(2 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; + a | b | c | a | b | c +---+---+---+---+---+--- + 2 | 1 | 1 | 2 | 1 | 1 +(1 row) + +-- Remove SJ, define uniqueness by a constant expression +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer)) +(2 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; + a | b | c | a | b | c +---+---+---+---+---+--- + 3 | 1 | 3 | 3 | 1 | 3 +(1 row) + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; + QUERY PLAN +----------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 1)) +(2 rows) + +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; + a | b | c | a | b | c +---+---+---+---+---+--- +(0 rows) + +-- Shuffle a clause. Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; + QUERY PLAN +----------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 1)) +(2 rows) + +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; + a | b | c | a | b | c +---+---+---+---+---+--- +(0 rows) + +-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL' +-- after SJ elimination it shouldn't be a mergejoinable clause. +EXPLAIN (COSTS OFF) +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; + QUERY PLAN +--------------------------------- + Nested Loop + Join Filter: (t1.b = t2.b) + -> Seq Scan on sj t2 + Filter: (a = 42) + -> Seq Scan on sj t1 + Filter: (a IS NOT NULL) +(6 rows) + +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; + a | b | c +---+---+--- +(0 rows) + +-- Functional index +CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a)); +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1; + QUERY PLAN +----------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND ((a * a) = 1)) +(2 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2; + QUERY PLAN +------------------------------- + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: ((a * a) = 1) + -> Seq Scan on sj j2 + Filter: ((a * a) = 2) +(6 rows) + +-- Restriction contains expressions in both sides, Remove SJ. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND ((a * a) = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer)) +(2 rows) + +-- Empty set of rows should be returned +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); + a | b | c | a | b | c +---+---+---+---+---+--- +(0 rows) + +-- Restriction contains volatile function - disable SJE feature. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: (((a * c) / 3) = (((random() / '3'::double precision) + '3'::double precision))::integer) + -> Seq Scan on sj j2 + Filter: ((((random() / '3'::double precision) + '3'::double precision))::integer = ((a * c) / 3)) +(6 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); + a | b | c | a | b | c +---+---+---+---+---+--- + 3 | 1 | 3 | 3 | 1 | 3 +(1 row) + +-- Multiple filters +CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c); +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c; + QUERY PLAN +----------------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 2) AND (c = 3)) +(2 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) + SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c; + QUERY PLAN +--------------------------------------- + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: ((2 = a) AND (c = 3)) + -> Seq Scan on sj j2 + Filter: ((c = 3) AND (a = 1)) +(6 rows) + +CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b); +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: (a = 2) + -> Seq Scan on sj j2 +(5 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j2 + Filter: (2 = a) + -> Seq Scan on sj j1 +(5 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1); + QUERY PLAN +--------------------------------------------------------------- + Nested Loop + Join Filter: ((j1.b = j2.b) AND ((j1.a = 1) OR (j2.a = 1))) + -> Seq Scan on sj j1 + -> Materialize + -> Seq Scan on sj j2 +(5 rows) + +DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx; +-- Test that OR predicated are updated correctly after join removal +CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT); +CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag); +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM tab_with_flag +WHERE + (is_flag IS NULL OR is_flag = 0) + AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3)); + QUERY PLAN +----------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tab_with_flag + Recheck Cond: (id = ANY ('{2,3}'::integer[])) + Filter: ((is_flag IS NULL) OR (is_flag = 0)) + -> Bitmap Index Scan on tab_with_flag_pkey + Index Cond: (id = ANY ('{2,3}'::integer[])) +(6 rows) + +DROP TABLE tab_with_flag; +-- HAVING clause +explain (costs off) +select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1; + QUERY PLAN +--------------------------------- + HashAggregate + Group Key: q.b + Filter: (sum(q.a) = 1) + -> Seq Scan on sj q + Filter: (a IS NOT NULL) +(5 rows) + +-- update lateral references and range table entry reference +explain (verbose, costs off) +select 1 from (select x.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + QUERY PLAN +------------------------------------------------------ + Nested Loop + Output: 1 + -> Seq Scan on public.sj y + Output: y.a, y.b, y.c + Filter: (y.a IS NOT NULL) + -> Function Scan on pg_catalog.generate_series gs + Output: gs.i + Function Call: generate_series(1, y.a) +(8 rows) + +explain (verbose, costs off) +select 1 from (select y.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + QUERY PLAN +------------------------------------------------------ + Nested Loop + Output: 1 + -> Seq Scan on public.sj y + Output: y.a, y.b, y.c + Filter: (y.a IS NOT NULL) + -> Function Scan on pg_catalog.generate_series gs + Output: gs.i + Function Call: generate_series(1, y.a) +(8 rows) + +-- Test that a non-EC-derived join clause is processed correctly. Use an +-- outer join so that we can't form an EC. +explain (costs off) select * from sj p join sj q on p.a = q.a + left join sj r on p.a + q.a = r.a; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: ((q.a + q.a) = r.a) + -> Seq Scan on sj q + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on sj r +(6 rows) + +-- FIXME this constant false filter doesn't look good. Should we merge +-- equivalence classes? +explain (costs off) +select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2; + QUERY PLAN +----------------------------------------------------- + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1)) +(2 rows) + +-- Check that attr_needed is updated correctly after self-join removal. In this +-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2. +-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b. +-- Use index scan for k1 so that we don't get 'b' from physical tlist used for +-- seqscan. Also disable reordering of joins because this test depends on a +-- particular join tree. +create table sk (a int, b int); +create index on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; + QUERY PLAN +----------------------------------------------------- + Nested Loop + Join Filter: (k1.b = j2.b) + -> Nested Loop + -> Index Scan using sk_a_idx on sk k1 + -> Index Only Scan using sk_a_idx on sk k2 + Index Cond: (a = k1.a) + -> Materialize + -> Index Scan using sj_a_key on sj j2 + Index Cond: (a IS NOT NULL) +(9 rows) + +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; + QUERY PLAN +----------------------------------------------------- + Nested Loop + Join Filter: (k1.b = j2.b) + -> Nested Loop + -> Index Scan using sk_a_idx on sk k1 + -> Index Only Scan using sk_a_idx on sk k2 + Index Cond: (a = k1.a) + -> Materialize + -> Index Scan using sj_a_key on sj j2 + Index Cond: (a IS NOT NULL) +(9 rows) + +reset join_collapse_limit; +reset enable_seqscan; +-- Check that clauses from the join filter list is not lost on the self-join removal +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; + QUERY PLAN +------------------------------------------ + Seq Scan on public.emp1 e2 + Output: e2.id, e2.code, e2.id, e2.code + Filter: (e2.code <> e2.code) +(3 rows) + +-- Shuffle self-joined relations. Only in the case of iterative deletion +-- attempts explains of these queries will be identical. +CREATE UNIQUE INDEX ON emp1((id*id)); +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id; + QUERY PLAN +----------------------------------------- + Aggregate + -> Seq Scan on emp1 c3 + Filter: ((id * id) IS NOT NULL) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id; + QUERY PLAN +----------------------------------------- + Aggregate + -> Seq Scan on emp1 c3 + Filter: ((id * id) IS NOT NULL) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id; + QUERY PLAN +----------------------------------------- + Aggregate + -> Seq Scan on emp1 c3 + Filter: ((id * id) IS NOT NULL) +(3 rows) + +-- Check the usage of a parse tree by the set operations (bug #18170) +EXPLAIN (COSTS OFF) +SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id +WHERE c2.id IS NOT NULL +EXCEPT ALL +SELECT c3.code FROM emp1 c3; + QUERY PLAN +--------------------------- + HashSetOp Except All + -> Seq Scan on emp1 c2 + -> Seq Scan on emp1 c3 +(3 rows) + +-- Check that SJE removes references from PHVs correctly +explain (costs off) +select * from emp1 t1 left join + (select coalesce(t3.code, 1) from emp1 t2 + left join (emp1 t3 join emp1 t4 on t3.id = t4.id) + on true) +on true; + QUERY PLAN +--------------------------------------------- + Nested Loop Left Join + -> Seq Scan on emp1 t1 + -> Materialize + -> Nested Loop Left Join + -> Seq Scan on emp1 t2 + -> Materialize + -> Seq Scan on emp1 t4 +(7 rows) + +-- Check that SJE removes the whole PHVs correctly +explain (verbose, costs off) +select 1 from emp1 t1 left join + ((select 1 as x, * from emp1 t2) s1 inner join + (select * from emp1 t3) s2 on s1.id = s2.id) + on true +where s1.x = 1; + QUERY PLAN +---------------------------------------- + Nested Loop + Output: 1 + -> Seq Scan on public.emp1 t1 + Output: t1.id, t1.code + -> Materialize + Output: t3.id + -> Seq Scan on public.emp1 t3 + Output: t3.id + Filter: (1 = 1) +(9 rows) + +-- Check that PHVs do not impose any constraints on removing self joins +explain (verbose, costs off) +select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join + lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true; + QUERY PLAN +---------------------------------------------------------- + Nested Loop Left Join + Output: t2.id, t2.code, t2.id, t2.code, (t2.id), t3.t3 + -> Seq Scan on public.emp1 t2 + Output: t2.id, t2.code + -> Function Scan on pg_catalog.generate_series t3 + Output: t3.t3, t2.id + Function Call: generate_series(1, 1) +(7 rows) + +explain (verbose, costs off) +select * from generate_series(1,10) t1(id) left join + lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id) +on true; + QUERY PLAN +------------------------------------------------------ + Nested Loop Left Join + Output: t1.id, (t1.id), t3.id + -> Function Scan on pg_catalog.generate_series t1 + Output: t1.id + Function Call: generate_series(1, 10) + -> Seq Scan on public.emp1 t3 + Output: t3.id, t1.id +(7 rows) + +-- Check that SJE replaces join clauses involving the removed rel correctly +explain (costs off) +select * from emp1 t1 + inner join emp1 t2 on t1.id = t2.id + left join emp1 t3 on t1.id > 1 and t1.id < 2; + QUERY PLAN +---------------------------------------------- + Nested Loop Left Join + Join Filter: ((t2.id > 1) AND (t2.id < 2)) + -> Seq Scan on emp1 t2 + -> Materialize + -> Seq Scan on emp1 t3 +(5 rows) + +-- Check that SJE doesn't replace the target relation +EXPLAIN (COSTS OFF) +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + QUERY PLAN +------------------------------------------------------- + Update on emp1 + -> Nested Loop + -> Seq Scan on emp1 + -> Index Scan using emp1_pkey on emp1 emp1_1 + Index Cond: (id = emp1.id) +(5 rows) + +INSERT INTO emp1 VALUES (1, 1), (2, 1); +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + id | code | code +----+------+------ + 1 | 2 | 1 + 2 | 2 | 1 +(2 rows) + +TRUNCATE emp1; +EXPLAIN (COSTS OFF) +UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a; + QUERY PLAN +------------------------------------- + Update on sj sq + -> Nested Loop + Join Filter: (sq.a = sz.a) + -> Seq Scan on sj sq + -> Materialize + -> Seq Scan on sj sz +(6 rows) + +CREATE RULE sj_del_rule AS ON DELETE TO sj + DO INSTEAD + UPDATE sj SET a = 1 WHERE a = old.a; +EXPLAIN (COSTS OFF) DELETE FROM sj; + QUERY PLAN +-------------------------------------- + Update on sj sj_1 + -> Nested Loop + Join Filter: (sj.a = sj_1.a) + -> Seq Scan on sj sj_1 + -> Materialize + -> Seq Scan on sj +(6 rows) + +DROP RULE sj_del_rule ON sj CASCADE; +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +insert into emp1 values (1, 1); +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + ?column? +---------- +(0 rows) + +-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness +-- made with different set of quals +insert into emp1 values (2, 1); +explain (costs off) +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + QUERY PLAN +--------------------------------------------- + Nested Loop + -> Seq Scan on emp1 t1 + -> Index Scan using emp1_pkey on emp1 t2 + Index Cond: (id = t1.code) + Filter: (code > 0) +(5 rows) + +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + id | code +----+------ + 1 | 1 + 2 | 1 +(2 rows) + +-- We can remove the join even if we find the join can't duplicate rows and +-- the base quals of each side are different. In the following case we end up +-- moving quals over to s1 to make it so it can't match any rows. +create table sl(a int, b int, c int); +create unique index on sl(a, b); +vacuum analyze sl; +-- Both sides are unique, but base quals are different +explain (costs off) +select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on sl t1 + Filter: (b = 1) + -> Seq Scan on sl t2 + Filter: (b = 2) +(6 rows) + +-- Check NullTest in baserestrictinfo list +explain (costs off) +select * from sl t1, sl t2 +where t1.a = t2.a and t1.b = 1 and t2.b = 2 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; + QUERY PLAN +--------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on sl t1 + Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 1)) + -> Seq Scan on sl t2 + Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 2)) +(6 rows) + +explain (verbose, costs off) +select * from sl t1, sl t2 +where t1.b = t2.b and t2.a = 3 and t1.a = 3 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Seq Scan on public.sl t2 + Output: t2.a, t2.b, t2.c, t2.a, t2.b, t2.c + Filter: ((t2.c IS NOT NULL) AND (t2.b IS NOT NULL) AND (t2.a IS NOT NULL) AND (t2.a = 3)) +(3 rows) + +-- Join qual isn't mergejoinable, but inner is unique. +EXPLAIN (COSTS OFF) +SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1; + QUERY PLAN +------------------------------- + Nested Loop + Join Filter: (n1.a <> n2.a) + -> Seq Scan on sj n2 + Filter: (a = 1) + -> Seq Scan on sj n1 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM +(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl +WHERE q0.a = 1; + QUERY PLAN +------------------------------- + Nested Loop + Join Filter: (n1.a <> n2.a) + -> Nested Loop + -> Seq Scan on sl + -> Seq Scan on sj n2 + Filter: (a = 1) + -> Seq Scan on sj n1 +(7 rows) + +-- Check optimization disabling if it will violate special join conditions. +-- Two identical joined relations satisfies self join removal conditions but +-- stay in different special join infos. +CREATE TABLE sj_t1 (id serial, a int); +CREATE TABLE sj_t2 (id serial, a int); +CREATE TABLE sj_t3 (id serial, a int); +CREATE TABLE sj_t4 (id serial, a int); +CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id); +CREATE UNIQUE INDEX ON sj_t2 USING btree (id); +EXPLAIN (COSTS OFF) +SELECT * FROM sj_t1 +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) t2t3t4 +ON sj_t1.id = t2t3t4.id +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) _t2t3t4 +ON sj_t1.id = _t2t3t4.id; + QUERY PLAN +------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (sj_t3.id = sj_t1.id) + -> Nested Loop + Join Filter: (sj_t2.id = sj_t3.id) + -> Nested Loop Semi Join + -> Nested Loop + -> HashAggregate + Group Key: sj_t3.id + -> Nested Loop + -> Seq Scan on sj_t4 + -> Materialize + -> Bitmap Heap Scan on sj_t3 + Recheck Cond: (a = 1) + -> Bitmap Index Scan on sj_t3_a_id_idx + Index Cond: (a = 1) + -> Index Only Scan using sj_t2_id_idx on sj_t2 sj_t2_1 + Index Cond: (id = sj_t3.id) + -> Nested Loop + -> Index Only Scan using sj_t3_a_id_idx on sj_t3 sj_t3_1 + Index Cond: ((a = 1) AND (id = sj_t3.id)) + -> Seq Scan on sj_t4 sj_t4_1 + -> Index Only Scan using sj_t2_id_idx on sj_t2 + Index Cond: (id = sj_t2_1.id) + -> Seq Scan on sj_t1 +(24 rows) + +-- +-- Test RowMarks-related code +-- +-- Both sides have explicit LockRows marks +EXPLAIN (COSTS OFF) +SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE; + QUERY PLAN +--------------------------------- + LockRows + -> Seq Scan on sj a2 + Filter: (a IS NOT NULL) +(3 rows) + +reset enable_hashjoin; +reset enable_mergejoin; +-- -- Test hints given on incorrect column references are useful -- select t1.uunique1 from diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 352abc0bd42..83228cfca29 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -168,10 +168,11 @@ select name, setting from pg_settings where name like 'enable%'; enable_partitionwise_aggregate | off enable_partitionwise_join | off enable_presorted_aggregate | on + enable_self_join_elimination | on enable_seqscan | on enable_sort | on enable_tidscan | on -(23 rows) +(24 rows) -- There are always wait event descriptions for various types. InjectionPoint -- may be present or absent, depending on history since last postmaster start. diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql index 28ed7910d01..7fc2159349b 100644 --- a/src/test/regress/sql/equivclass.sql +++ b/src/test/regress/sql/equivclass.sql @@ -259,6 +259,22 @@ drop user regress_user_ectest; explain (costs off) select * from tenk1 where unique1 = unique1 and unique2 = unique2; +-- Test that broken ECs are processed correctly during self join removal. +-- Disable merge joins so that we don't get an error about missing commutator. +-- Test both orientations of the join clause, because only one of them breaks +-- the EC. +set enable_mergejoin to off; + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on m.ff + n.ff = p.f1; + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1; + +reset enable_mergejoin; + -- this could be converted, but isn't at present explain (costs off) select * from tenk1 where unique1 = unique1 or unique2 = unique2; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index c7349eab933..c29d13b9fed 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2175,6 +2175,17 @@ select c.id, ss.a from c left join (select d.a from onerow, d left join b on d.a = b.id) ss on c.id = ss.a; +-- check the case when the placeholder relates to an outer join and its +-- inner in the press field but actually uses only the outer side of the join +explain (costs off) +SELECT q.val FROM b LEFT JOIN ( + SELECT (q1.z IS NOT NULL) AS val + FROM b LEFT JOIN ( + SELECT (t1.b_id IS NOT NULL) AS z FROM a t1 LEFT JOIN a t2 USING (id) + ) AS q1 + ON true +) AS q ON true; + CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id); CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10); @@ -2410,6 +2421,489 @@ select * from int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok -- +-- test that semi- or inner self-joins on a unique column are removed +-- + +-- enable only nestloop to get more predictable plans +set enable_hashjoin to off; +set enable_mergejoin to off; + +create table sj (a int unique, b int, c int unique); +insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1); +analyze sj; + +-- Trivial self-join case. +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + +-- Self-join removal performs after a subquery pull-up process and could remove +-- such kind of self-join too. Check this option. +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + +-- Don't remove self-join for the case of equality of two different unique columns. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null; + +-- Ensure that relations with TABLESAMPLE clauses are not considered as +-- candidates to be removed +explain (costs off) +select * from sj t1 + join lateral + (select * from sj tablesample system(t1.b)) s + on t1.a = s.a; + +-- Ensure that SJE does not form a self-referential lateral dependency +explain (costs off) +select * from sj t1 + left join lateral + (select t1.a as t1a, * from sj t2) s + on true +where t1.a = s.a; + +-- Degenerated case. +explain (costs off) +select * from + (select a as x from sj where false) as q1, + (select a as y from sj where false) as q2 +where q1.x = q2.y; + +-- We can't use a cross-EC generated self join qual because of current logic of +-- the generate_join_implied_equalities routine. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a; +explain (costs off) +select * from sj t1, sj t2, sj t3 +where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and + t1.b = t3.b and t3.b = t3.a; + +-- Double self-join removal. +-- Use a condition on "b + 1", not on "b", for the second join, so that +-- the equivalence class is different from the first one, and we can +-- test the non-ec code path. +explain (costs off) +select * +from sj t1 + join sj t2 on t1.a = t2.a and t1.b = t2.b + join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1; + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + +-- self-join under outer join +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on x.a = z.q1; + +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on y.a = z.q1; + +explain (costs off) +select * from ( + select t1.*, t2.a as ax from sj t1 join sj t2 + on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null) +) as q1 +left join + (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2 +on q1.ax = q2.a; + +-- Test that placeholders are updated correctly after join removal +explain (costs off) +select * from (values (1)) x +left join (select coalesce(y.q1, 1) from int8_tbl y + right join sj j1 inner join sj j2 on j1.a = j2.a + on true) z +on true; + +-- Test that references to the removed rel in lateral subqueries are replaced +-- correctly after join removal +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t1.a offset 0) t3 on true; + +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select * from (select t1.a offset 0) offset 0) t3 on true; + +explain (verbose, costs off) +select t4.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true; + +-- Check updating of semi_rhs_exprs links from upper-level semi join to +-- the removing relation +explain (verbose, costs off) +select t1.a from sj t1 where t1.b in ( + select t2.b from sj t2 join sj t3 on t2.c=t3.c); + +-- +-- SJE corner case: uniqueness of an inner is [partially] derived from +-- baserestrictinfo clauses. +-- XXX: We really should allow SJE for these corner cases? +-- + +INSERT INTO sj VALUES (3, 1, 3); + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; + +-- Remove SJ, define uniqueness by a constant +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; + +-- Remove SJ, define uniqueness by a constant expression +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; + +-- Shuffle a clause. Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; + +-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL' +-- after SJ elimination it shouldn't be a mergejoinable clause. +EXPLAIN (COSTS OFF) +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; + +-- Functional index +CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a)); + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1; +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2; + +-- Restriction contains expressions in both sides, Remove SJ. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); +-- Empty set of rows should be returned +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); + +-- Restriction contains volatile function - disable SJE feature. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); + +-- Multiple filters +CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c); + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c; + +-- Don't remove SJ +EXPLAIN (COSTS OFF) + SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c; + +CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b); + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2; + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a; + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1); + +DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx; + +-- Test that OR predicated are updated correctly after join removal +CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT); +CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag); + +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM tab_with_flag +WHERE + (is_flag IS NULL OR is_flag = 0) + AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3)); +DROP TABLE tab_with_flag; + +-- HAVING clause +explain (costs off) +select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1; + +-- update lateral references and range table entry reference +explain (verbose, costs off) +select 1 from (select x.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + +explain (verbose, costs off) +select 1 from (select y.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + +-- Test that a non-EC-derived join clause is processed correctly. Use an +-- outer join so that we can't form an EC. +explain (costs off) select * from sj p join sj q on p.a = q.a + left join sj r on p.a + q.a = r.a; + +-- FIXME this constant false filter doesn't look good. Should we merge +-- equivalence classes? +explain (costs off) +select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2; + +-- Check that attr_needed is updated correctly after self-join removal. In this +-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2. +-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b. +-- Use index scan for k1 so that we don't get 'b' from physical tlist used for +-- seqscan. Also disable reordering of joins because this test depends on a +-- particular join tree. +create table sk (a int, b int); +create index on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; +reset join_collapse_limit; +reset enable_seqscan; + +-- Check that clauses from the join filter list is not lost on the self-join removal +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; + +-- Shuffle self-joined relations. Only in the case of iterative deletion +-- attempts explains of these queries will be identical. +CREATE UNIQUE INDEX ON emp1((id*id)); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id; + +-- Check the usage of a parse tree by the set operations (bug #18170) +EXPLAIN (COSTS OFF) +SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id +WHERE c2.id IS NOT NULL +EXCEPT ALL +SELECT c3.code FROM emp1 c3; + +-- Check that SJE removes references from PHVs correctly +explain (costs off) +select * from emp1 t1 left join + (select coalesce(t3.code, 1) from emp1 t2 + left join (emp1 t3 join emp1 t4 on t3.id = t4.id) + on true) +on true; + +-- Check that SJE removes the whole PHVs correctly +explain (verbose, costs off) +select 1 from emp1 t1 left join + ((select 1 as x, * from emp1 t2) s1 inner join + (select * from emp1 t3) s2 on s1.id = s2.id) + on true +where s1.x = 1; + +-- Check that PHVs do not impose any constraints on removing self joins +explain (verbose, costs off) +select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join + lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true; + +explain (verbose, costs off) +select * from generate_series(1,10) t1(id) left join + lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id) +on true; + +-- Check that SJE replaces join clauses involving the removed rel correctly +explain (costs off) +select * from emp1 t1 + inner join emp1 t2 on t1.id = t2.id + left join emp1 t3 on t1.id > 1 and t1.id < 2; + +-- Check that SJE doesn't replace the target relation +EXPLAIN (COSTS OFF) +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + +INSERT INTO emp1 VALUES (1, 1), (2, 1); + +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + +TRUNCATE emp1; + +EXPLAIN (COSTS OFF) +UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a; + +CREATE RULE sj_del_rule AS ON DELETE TO sj + DO INSTEAD + UPDATE sj SET a = 1 WHERE a = old.a; +EXPLAIN (COSTS OFF) DELETE FROM sj; +DROP RULE sj_del_rule ON sj CASCADE; + +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +insert into emp1 values (1, 1); +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + +-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness +-- made with different set of quals +insert into emp1 values (2, 1); +explain (costs off) +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + +-- We can remove the join even if we find the join can't duplicate rows and +-- the base quals of each side are different. In the following case we end up +-- moving quals over to s1 to make it so it can't match any rows. +create table sl(a int, b int, c int); +create unique index on sl(a, b); +vacuum analyze sl; + +-- Both sides are unique, but base quals are different +explain (costs off) +select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2; + +-- Check NullTest in baserestrictinfo list +explain (costs off) +select * from sl t1, sl t2 +where t1.a = t2.a and t1.b = 1 and t2.b = 2 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; +explain (verbose, costs off) +select * from sl t1, sl t2 +where t1.b = t2.b and t2.a = 3 and t1.a = 3 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; + +-- Join qual isn't mergejoinable, but inner is unique. +EXPLAIN (COSTS OFF) +SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1; + +EXPLAIN (COSTS OFF) +SELECT * FROM +(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl +WHERE q0.a = 1; + +-- Check optimization disabling if it will violate special join conditions. +-- Two identical joined relations satisfies self join removal conditions but +-- stay in different special join infos. +CREATE TABLE sj_t1 (id serial, a int); +CREATE TABLE sj_t2 (id serial, a int); +CREATE TABLE sj_t3 (id serial, a int); +CREATE TABLE sj_t4 (id serial, a int); + +CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id); +CREATE UNIQUE INDEX ON sj_t2 USING btree (id); + +EXPLAIN (COSTS OFF) +SELECT * FROM sj_t1 +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) t2t3t4 +ON sj_t1.id = t2t3t4.id +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) _t2t3t4 +ON sj_t1.id = _t2t3t4.id; + +-- +-- Test RowMarks-related code +-- + +-- Both sides have explicit LockRows marks +EXPLAIN (COSTS OFF) +SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE; + +reset enable_hashjoin; +reset enable_mergejoin; + +-- -- Test hints given on incorrect column references are useful -- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index b6c170ac249..bce4214503d 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2593,6 +2593,7 @@ SeenRelsEntry SelectLimit SelectStmt Selectivity +SelfJoinCandidate SemTPadded SemiAntiJoinFactors SeqScan @@ -4056,6 +4057,7 @@ unicodeStyleColumnFormat unicodeStyleFormat unicodeStyleRowFormat unicode_linestyle +UniqueRelInfo unit_conversion unlogged_relation_entry utf_local_conversion_func |