7 files changed, 181 insertions, 27 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 39e2ddda906..c130d2f17f2 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -126,13 +126,15 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 *
 	 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
 	 * matter since the executor can make the equivalent optimization anyway;
-	 * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, if
-	 * the LHS covers all of the associated semijoin's min_lefthand, then it's
-	 * appropriate to set inner_unique because the path produced by
-	 * create_unique_path will be unique relative to the LHS.  (If we have an
-	 * LHS that's only part of the min_lefthand, that is *not* true.)  For
-	 * JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid letting that value escape
-	 * this module.
+	 * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
+	 * must be considering a semijoin whose inner side is not provably unique
+	 * (else reduce_unique_semijoins would've simplified it), so there's no
+	 * point in calling innerrel_is_unique.  However, if the LHS covers all of
+	 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+	 * because the path produced by create_unique_path will be unique relative
+	 * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
+	 * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
+	 * letting that value escape this module.
 	 */
 	switch (jointype)
 	{
@@ -145,12 +147,20 @@ add_paths_to_joinrel(PlannerInfo *root,
 											   outerrel->relids);
 			break;
 		case JOIN_UNIQUE_OUTER:
-			extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
-													JOIN_INNER, restrictlist);
+			extra.inner_unique = innerrel_is_unique(root,
+													outerrel->relids,
+													innerrel,
+													JOIN_INNER,
+													restrictlist,
+													false);
 			break;
 		default:
-			extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
-													jointype, restrictlist);
+			extra.inner_unique = innerrel_is_unique(root,
+													outerrel->relids,
+													innerrel,
+													jointype,
+													restrictlist,
+													false);
 			break;
 	}
 
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 69b9be4d76b..34317fe7782 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
 					List *clause_list);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
 static bool is_innerrel_unique_for(PlannerInfo *root,
-					   RelOptInfo *outerrel,
+					   Relids outerrelids,
 					   RelOptInfo *innerrel,
 					   JoinType jointype,
 					   List *restrictlist);
@@ -496,6 +496,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
 
 
 /*
+ * reduce_unique_semijoins
+ *		Check for semijoins that can be simplified to plain inner joins
+ *		because the inner relation is provably unique for the join clauses.
+ *
+ * Ideally this would happen during reduce_outer_joins, but we don't have
+ * enough information at that point.
+ *
+ * To perform the strength reduction when applicable, we need only delete
+ * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
+ * bother fixing the join type attributed to it in the query jointree,
+ * since that won't be consulted again.)
+ */
+void
+reduce_unique_semijoins(PlannerInfo *root)
+{
+	ListCell   *lc;
+	ListCell   *next;
+
+	/*
+	 * Scan the join_info_list to find semijoins.  We can't use foreach
+	 * because we may delete the current cell.
+	 */
+	for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+		int			innerrelid;
+		RelOptInfo *innerrel;
+		Relids		joinrelids;
+		List	   *restrictlist;
+
+		next = lnext(lc);
+
+		/*
+		 * Must be a non-delaying semijoin to a single baserel, else we aren't
+		 * going to be able to do anything with it.  (It's probably not
+		 * possible for delay_upper_joins to be set on a semijoin, but we
+		 * might as well check.)
+		 */
+		if (sjinfo->jointype != JOIN_SEMI ||
+			sjinfo->delay_upper_joins)
+			continue;
+
+		if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+			continue;
+
+		innerrel = find_base_rel(root, innerrelid);
+
+		/*
+		 * Before we trouble to run generate_join_implied_equalities, make a
+		 * quick check to eliminate cases in which we will surely be unable to
+		 * prove uniqueness of the innerrel.
+		 */
+		if (!rel_supports_distinctness(root, innerrel))
+			continue;
+
+		/* Compute the relid set for the join we are considering */
+		joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+		/*
+		 * Since we're only considering a single-rel RHS, any join clauses it
+		 * has must be clauses linking it to the semijoin's min_lefthand.  We
+		 * can also consider EC-derived join clauses.
+		 */
+		restrictlist =
+			list_concat(generate_join_implied_equalities(root,
+														 joinrelids,
+														 sjinfo->min_lefthand,
+														 innerrel),
+						innerrel->joininfo);
+
+		/* Test whether the innerrel is unique for those clauses. */
+		if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
+								JOIN_SEMI, restrictlist, true))
+			continue;
+
+		/* OK, remove the SpecialJoinInfo from the list. */
+		root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
+	}
+}
+
+
+/*
  * rel_supports_distinctness
  *		Could the relation possibly be proven distinct on some set of columns?
  *
@@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
  *	  Check if the innerrel provably contains at most one tuple matching any
  *	  tuple from the outerrel, based on join clauses in the 'restrictlist'.
  *
+ * We need an actual RelOptInfo for the innerrel, but it's sufficient to
+ * identify the outerrel by its Relids.  This asymmetry supports use of this
+ * function before joinrels have been built.
+ *
  * The proof must be made based only on clauses that will be "joinquals"
  * rather than "otherquals" at execution.  For an inner join there's no
  * difference; but if the join is outer, we must ignore pushed-down quals,
@@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids)
  *
  * The actual proof is undertaken by is_innerrel_unique_for(); this function
  * is a frontend that is mainly concerned with caching the answers.
+ * In particular, the force_cache argument allows overriding the internal
+ * heuristic about whether to cache negative answers; it should be "true"
+ * if making an inquiry that is not part of the normal bottom-up join search
+ * sequence.
  */
 bool
 innerrel_is_unique(PlannerInfo *root,
-				   RelOptInfo *outerrel,
+				   Relids outerrelids,
 				   RelOptInfo *innerrel,
 				   JoinType jointype,
-				   List *restrictlist)
+				   List *restrictlist,
+				   bool force_cache)
 {
 	MemoryContext old_context;
 	ListCell   *lc;
@@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root,
 	{
 		Relids		unique_for_rels = (Relids) lfirst(lc);
 
-		if (bms_is_subset(unique_for_rels, outerrel->relids))
+		if (bms_is_subset(unique_for_rels, outerrelids))
 			return true;		/* Success! */
 	}
 
@@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root,
 	{
 		Relids		unique_for_rels = (Relids) lfirst(lc);
 
-		if (bms_is_subset(outerrel->relids, unique_for_rels))
+		if (bms_is_subset(outerrelids, unique_for_rels))
 			return false;
 	}
 
 	/* No cached information, so try to make the proof. */
-	if (is_innerrel_unique_for(root, outerrel, innerrel,
+	if (is_innerrel_unique_for(root, outerrelids, innerrel,
 							   jointype, restrictlist))
 	{
 		/*
@@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root,
 		 */
 		old_context = MemoryContextSwitchTo(root->planner_cxt);
 		innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
-											bms_copy(outerrel->relids));
+											bms_copy(outerrelids));
 		MemoryContextSwitchTo(old_context);
 
 		return true;			/* Success! */
@@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root,
 		 * from smaller to larger.  It is useful in GEQO mode, where the
 		 * knowledge can be carried across successive planning attempts; and
 		 * it's likely to be useful when using join-search plugins, too. Hence
-		 * cache only when join_search_private is non-NULL.  (Yeah, that's a
-		 * hack, but it seems reasonable.)
+		 * cache when join_search_private is non-NULL.  (Yeah, that's a hack,
+		 * but it seems reasonable.)
+		 *
+		 * Also, allow callers to override that heuristic and force caching;
+		 * that's useful for reduce_unique_semijoins, which calls here before
+		 * the normal join search starts.
 		 */
-		if (root->join_search_private)
+		if (force_cache || root->join_search_private)
 		{
 			old_context = MemoryContextSwitchTo(root->planner_cxt);
 			innerrel->non_unique_for_rels =
 				lappend(innerrel->non_unique_for_rels,
-						bms_copy(outerrel->relids));
+						bms_copy(outerrelids));
 			MemoryContextSwitchTo(old_context);
 		}
 
@@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root,
  */
 static bool
 is_innerrel_unique_for(PlannerInfo *root,
-					   RelOptInfo *outerrel,
+					   Relids outerrelids,
 					   RelOptInfo *innerrel,
 					   JoinType jointype,
 					   List *restrictlist)
@@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root,
 		 * Check if clause has the form "outer op inner" or "inner op outer",
 		 * and if so mark which side is inner.
 		 */
-		if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+		if (!clause_sides_match_join(restrictinfo, outerrelids,
 									 innerrel->relids))
 			continue;			/* no good for these input relations */
 
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index ef0de3fb1a9..74de3b818f7 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -193,6 +193,12 @@ query_planner(PlannerInfo *root, List *tlist,
 	joinlist = remove_useless_joins(root, joinlist);
 
 	/*
+	 * Also, reduce any semijoins with unique inner rels to plain inner joins.
+	 * Likewise, this can't be done until now for lack of needed info.
+	 */
+	reduce_unique_semijoins(root);
+
+	/*
 	 * Now distribute "placeholders" to base rels as needed.  This has to be
 	 * done after join removal because removal could change whether a
 	 * placeholder is evaluable at a base rel.
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 5df68a22a60..e773c0f7eda 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -103,11 +103,12 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
  * prototypes for plan/analyzejoins.c
  */
 extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern void reduce_unique_semijoins(PlannerInfo *root);
 extern bool query_supports_distinctness(Query *query);
 extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 extern bool innerrel_is_unique(PlannerInfo *root,
-				   RelOptInfo *outerrel, RelOptInfo *innerrel,
-				   JoinType jointype, List *restrictlist);
+				   Relids outerrelids, RelOptInfo *innerrel,
+				   JoinType jointype, List *restrictlist, bool force_cache);
 
 /*
  * prototypes for plan/setrefs.c
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 87ff3657a34..d08b1e1ae53 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5663,3 +5663,31 @@ where exists (select 1 from tenk1 t3
          Index Cond: (t2.hundred = t3.tenthous)
 (18 rows)
 
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+              where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+      and t1.unique1 < 1;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop
+   Output: t1.unique1, t2.hundred
+   ->  Nested Loop
+         Output: t1.unique1, j3.tenthous
+         ->  Index Only Scan using onek_unique1 on public.onek t1
+               Output: t1.unique1
+               Index Cond: (t1.unique1 < 1)
+         ->  Index Only Scan using j3_unique1_tenthous_idx on public.j3
+               Output: j3.unique1, j3.tenthous
+               Index Cond: (j3.unique1 = t1.unique1)
+   ->  Index Only Scan using tenk1_hundred on public.tenk1 t2
+         Output: t2.hundred
+         Index Cond: (t2.hundred = j3.tenthous)
+(13 rows)
+
+drop table j3;
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index aa06d1d454e..f6b51a54c31 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -1673,7 +1673,7 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
                            QUERY PLAN                            
 -----------------------------------------------------------------
  Update on base_tbl b
-   ->  Hash Semi Join
+   ->  Hash Join
          Hash Cond: (b.a = r.a)
          ->  Seq Scan on base_tbl b
          ->  Hash
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index a36e29f462e..c3994ea531c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1864,3 +1864,17 @@ from onek t1, tenk1 t2
 where exists (select 1 from tenk1 t3
               where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
       and t1.unique1 < 1;
+
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+              where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+      and t1.unique1 < 1;
+
+drop table j3;