aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/optimizer/path/joinpath.c32
-rw-r--r--src/backend/optimizer/plan/analyzejoins.c121
-rw-r--r--src/backend/optimizer/plan/planmain.c6
-rw-r--r--src/include/optimizer/planmain.h5
-rw-r--r--src/test/regress/expected/join.out28
-rw-r--r--src/test/regress/expected/updatable_views.out2
-rw-r--r--src/test/regress/sql/join.sql14
7 files changed, 181 insertions, 27 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 39e2ddda906..c130d2f17f2 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -126,13 +126,15 @@ add_paths_to_joinrel(PlannerInfo *root,
*
* We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
* matter since the executor can make the equivalent optimization anyway;
- * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, if
- * the LHS covers all of the associated semijoin's min_lefthand, then it's
- * appropriate to set inner_unique because the path produced by
- * create_unique_path will be unique relative to the LHS. (If we have an
- * LHS that's only part of the min_lefthand, that is *not* true.) For
- * JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid letting that value escape
- * this module.
+ * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
+ * must be considering a semijoin whose inner side is not provably unique
+ * (else reduce_unique_semijoins would've simplified it), so there's no
+ * point in calling innerrel_is_unique. However, if the LHS covers all of
+ * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+ * because the path produced by create_unique_path will be unique relative
+ * to the LHS. (If we have an LHS that's only part of the min_lefthand,
+ * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
+ * letting that value escape this module.
*/
switch (jointype)
{
@@ -145,12 +147,20 @@ add_paths_to_joinrel(PlannerInfo *root,
outerrel->relids);
break;
case JOIN_UNIQUE_OUTER:
- extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
- JOIN_INNER, restrictlist);
+ extra.inner_unique = innerrel_is_unique(root,
+ outerrel->relids,
+ innerrel,
+ JOIN_INNER,
+ restrictlist,
+ false);
break;
default:
- extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
- jointype, restrictlist);
+ extra.inner_unique = innerrel_is_unique(root,
+ outerrel->relids,
+ innerrel,
+ jointype,
+ restrictlist,
+ false);
break;
}
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 69b9be4d76b..34317fe7782 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
List *clause_list);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
static bool is_innerrel_unique_for(PlannerInfo *root,
- RelOptInfo *outerrel,
+ Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist);
@@ -496,6 +496,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
/*
+ * reduce_unique_semijoins
+ * Check for semijoins that can be simplified to plain inner joins
+ * because the inner relation is provably unique for the join clauses.
+ *
+ * Ideally this would happen during reduce_outer_joins, but we don't have
+ * enough information at that point.
+ *
+ * To perform the strength reduction when applicable, we need only delete
+ * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
+ * bother fixing the join type attributed to it in the query jointree,
+ * since that won't be consulted again.)
+ */
+void
+reduce_unique_semijoins(PlannerInfo *root)
+{
+ ListCell *lc;
+ ListCell *next;
+
+ /*
+ * Scan the join_info_list to find semijoins. We can't use foreach
+ * because we may delete the current cell.
+ */
+ for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+ int innerrelid;
+ RelOptInfo *innerrel;
+ Relids joinrelids;
+ List *restrictlist;
+
+ next = lnext(lc);
+
+ /*
+ * Must be a non-delaying semijoin to a single baserel, else we aren't
+ * going to be able to do anything with it. (It's probably not
+ * possible for delay_upper_joins to be set on a semijoin, but we
+ * might as well check.)
+ */
+ if (sjinfo->jointype != JOIN_SEMI ||
+ sjinfo->delay_upper_joins)
+ continue;
+
+ if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+ continue;
+
+ innerrel = find_base_rel(root, innerrelid);
+
+ /*
+ * Before we trouble to run generate_join_implied_equalities, make a
+ * quick check to eliminate cases in which we will surely be unable to
+ * prove uniqueness of the innerrel.
+ */
+ if (!rel_supports_distinctness(root, innerrel))
+ continue;
+
+ /* Compute the relid set for the join we are considering */
+ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+ /*
+ * Since we're only considering a single-rel RHS, any join clauses it
+ * has must be clauses linking it to the semijoin's min_lefthand. We
+ * can also consider EC-derived join clauses.
+ */
+ restrictlist =
+ list_concat(generate_join_implied_equalities(root,
+ joinrelids,
+ sjinfo->min_lefthand,
+ innerrel),
+ innerrel->joininfo);
+
+ /* Test whether the innerrel is unique for those clauses. */
+ if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
+ JOIN_SEMI, restrictlist, true))
+ continue;
+
+ /* OK, remove the SpecialJoinInfo from the list. */
+ root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
+ }
+}
+
+
+/*
* rel_supports_distinctness
* Could the relation possibly be proven distinct on some set of columns?
*
@@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
* Check if the innerrel provably contains at most one tuple matching any
* tuple from the outerrel, based on join clauses in the 'restrictlist'.
*
+ * We need an actual RelOptInfo for the innerrel, but it's sufficient to
+ * identify the outerrel by its Relids. This asymmetry supports use of this
+ * function before joinrels have been built.
+ *
* The proof must be made based only on clauses that will be "joinquals"
* rather than "otherquals" at execution. For an inner join there's no
* difference; but if the join is outer, we must ignore pushed-down quals,
@@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids)
*
* The actual proof is undertaken by is_innerrel_unique_for(); this function
* is a frontend that is mainly concerned with caching the answers.
+ * In particular, the force_cache argument allows overriding the internal
+ * heuristic about whether to cache negative answers; it should be "true"
+ * if making an inquiry that is not part of the normal bottom-up join search
+ * sequence.
*/
bool
innerrel_is_unique(PlannerInfo *root,
- RelOptInfo *outerrel,
+ Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist)
+ List *restrictlist,
+ bool force_cache)
{
MemoryContext old_context;
ListCell *lc;
@@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root,
{
Relids unique_for_rels = (Relids) lfirst(lc);
- if (bms_is_subset(unique_for_rels, outerrel->relids))
+ if (bms_is_subset(unique_for_rels, outerrelids))
return true; /* Success! */
}
@@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root,
{
Relids unique_for_rels = (Relids) lfirst(lc);
- if (bms_is_subset(outerrel->relids, unique_for_rels))
+ if (bms_is_subset(outerrelids, unique_for_rels))
return false;
}
/* No cached information, so try to make the proof. */
- if (is_innerrel_unique_for(root, outerrel, innerrel,
+ if (is_innerrel_unique_for(root, outerrelids, innerrel,
jointype, restrictlist))
{
/*
@@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root,
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
- bms_copy(outerrel->relids));
+ bms_copy(outerrelids));
MemoryContextSwitchTo(old_context);
return true; /* Success! */
@@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root,
* from smaller to larger. It is useful in GEQO mode, where the
* knowledge can be carried across successive planning attempts; and
* it's likely to be useful when using join-search plugins, too. Hence
- * cache only when join_search_private is non-NULL. (Yeah, that's a
- * hack, but it seems reasonable.)
+ * cache when join_search_private is non-NULL. (Yeah, that's a hack,
+ * but it seems reasonable.)
+ *
+ * Also, allow callers to override that heuristic and force caching;
+ * that's useful for reduce_unique_semijoins, which calls here before
+ * the normal join search starts.
*/
- if (root->join_search_private)
+ if (force_cache || root->join_search_private)
{
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->non_unique_for_rels =
lappend(innerrel->non_unique_for_rels,
- bms_copy(outerrel->relids));
+ bms_copy(outerrelids));
MemoryContextSwitchTo(old_context);
}
@@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root,
*/
static bool
is_innerrel_unique_for(PlannerInfo *root,
- RelOptInfo *outerrel,
+ Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist)
@@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root,
* Check if clause has the form "outer op inner" or "inner op outer",
* and if so mark which side is inner.
*/
- if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+ if (!clause_sides_match_join(restrictinfo, outerrelids,
innerrel->relids))
continue; /* no good for these input relations */
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index ef0de3fb1a9..74de3b818f7 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -193,6 +193,12 @@ query_planner(PlannerInfo *root, List *tlist,
joinlist = remove_useless_joins(root, joinlist);
/*
+ * Also, reduce any semijoins with unique inner rels to plain inner joins.
+ * Likewise, this can't be done until now for lack of needed info.
+ */
+ reduce_unique_semijoins(root);
+
+ /*
* Now distribute "placeholders" to base rels as needed. This has to be
* done after join removal because removal could change whether a
* placeholder is evaluable at a base rel.
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 5df68a22a60..e773c0f7eda 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -103,11 +103,12 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
* prototypes for plan/analyzejoins.c
*/
extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern void reduce_unique_semijoins(PlannerInfo *root);
extern bool query_supports_distinctness(Query *query);
extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
extern bool innerrel_is_unique(PlannerInfo *root,
- RelOptInfo *outerrel, RelOptInfo *innerrel,
- JoinType jointype, List *restrictlist);
+ Relids outerrelids, RelOptInfo *innerrel,
+ JoinType jointype, List *restrictlist, bool force_cache);
/*
* prototypes for plan/setrefs.c
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 87ff3657a34..d08b1e1ae53 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5663,3 +5663,31 @@ where exists (select 1 from tenk1 t3
Index Cond: (t2.hundred = t3.tenthous)
(18 rows)
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+ where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+ and t1.unique1 < 1;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Nested Loop
+ Output: t1.unique1, t2.hundred
+ -> Nested Loop
+ Output: t1.unique1, j3.tenthous
+ -> Index Only Scan using onek_unique1 on public.onek t1
+ Output: t1.unique1
+ Index Cond: (t1.unique1 < 1)
+ -> Index Only Scan using j3_unique1_tenthous_idx on public.j3
+ Output: j3.unique1, j3.tenthous
+ Index Cond: (j3.unique1 = t1.unique1)
+ -> Index Only Scan using tenk1_hundred on public.tenk1 t2
+ Output: t2.hundred
+ Index Cond: (t2.hundred = j3.tenthous)
+(13 rows)
+
+drop table j3;
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index aa06d1d454e..f6b51a54c31 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -1673,7 +1673,7 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
QUERY PLAN
-----------------------------------------------------------------
Update on base_tbl b
- -> Hash Semi Join
+ -> Hash Join
Hash Cond: (b.a = r.a)
-> Seq Scan on base_tbl b
-> Hash
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index a36e29f462e..c3994ea531c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1864,3 +1864,17 @@ from onek t1, tenk1 t2
where exists (select 1 from tenk1 t3
where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
and t1.unique1 < 1;
+
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+ where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+ and t1.unique1 < 1;
+
+drop table j3;