2 files changed, 42 insertions, 38 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index cdb18d978db..f062c6b9f1e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2214,6 +2214,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 		Cost		inner_run_cost = workspace->inner_run_cost;
 		Cost		inner_rescan_run_cost = workspace->inner_rescan_run_cost;
 		double		outer_matched_rows;
+		double		outer_unmatched_rows;
 		Selectivity inner_scan_frac;
 
 		/*
@@ -2226,6 +2227,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 		 * least 1, no such clamp is needed now.)
 		 */
 		outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
+		outer_unmatched_rows = outer_path_rows - outer_matched_rows;
 		inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
 
 		/*
@@ -2269,7 +2271,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 			 * of a nonempty scan.  We consider that these are all rescans,
 			 * since we used inner_run_cost once already.
 			 */
-			run_cost += (outer_path_rows - outer_matched_rows) *
+			run_cost += outer_unmatched_rows *
 				inner_rescan_run_cost / inner_path_rows;
 
 			/*
@@ -2287,20 +2289,28 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 			 * difficult to estimate whether that will happen (and it could
 			 * not happen if there are any unmatched outer rows!), so be
 			 * conservative and always charge the whole first-scan cost once.
+			 * We consider this charge to correspond to the first unmatched
+			 * outer row, unless there isn't one in our estimate, in which
+			 * case blame it on the first matched row.
 			 */
+
+			/* First, count all unmatched join tuples as being processed */
+			ntuples += outer_unmatched_rows * inner_path_rows;
+
+			/* Now add the forced full scan, and decrement appropriate count */
 			run_cost += inner_run_cost;
+			if (outer_unmatched_rows >= 1)
+				outer_unmatched_rows -= 1;
+			else
+				outer_matched_rows -= 1;
 
 			/* Add inner run cost for additional outer tuples having matches */
-			if (outer_matched_rows > 1)
-				run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
-
-			/* Add inner run cost for unmatched outer tuples */
-			run_cost += (outer_path_rows - outer_matched_rows) *
-				inner_rescan_run_cost;
+			if (outer_matched_rows > 0)
+				run_cost += outer_matched_rows * inner_rescan_run_cost * inner_scan_frac;
 
-			/* And count the unmatched join tuples as being processed */
-			ntuples += (outer_path_rows - outer_matched_rows) *
-				inner_path_rows;
+			/* Add inner run cost for additional unmatched outer tuples */
+			if (outer_unmatched_rows > 0)
+				run_cost += outer_unmatched_rows * inner_rescan_run_cost;
 		}
 	}
 	else
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d08b1e1ae53..9cad4e69924 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5476,48 +5476,44 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-                  QUERY PLAN                   
------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Seq Scan on public.j1
-         Output: j1.id
-   ->  Materialize
+   ->  Unique
          Output: j3.id
-         ->  Unique
+         ->  Sort
                Output: j3.id
-               ->  Sort
+               Sort Key: j3.id
+               ->  Seq Scan on public.j3
                      Output: j3.id
-                     Sort Key: j3.id
-                     ->  Seq Scan on public.j3
-                           Output: j3.id
-(15 rows)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+(13 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-                  QUERY PLAN                   
------------------------------------------------
+               QUERY PLAN                
+-----------------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Seq Scan on public.j1
-         Output: j1.id
-   ->  Materialize
+   ->  Group
          Output: j3.id
-         ->  Group
+         Group Key: j3.id
+         ->  Sort
                Output: j3.id
-               Group Key: j3.id
-               ->  Sort
+               Sort Key: j3.id
+               ->  Seq Scan on public.j3
                      Output: j3.id
-                     Sort Key: j3.id
-                     ->  Seq Scan on public.j3
-                           Output: j3.id
-(16 rows)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+(14 rows)
 
 drop table j1;
 drop table j2;
@@ -5558,13 +5554,11 @@ inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
    Output: j1.id1, j1.id2, j2.id1, j2.id2
    Inner Unique: true
    Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2))
+   ->  Seq Scan on public.j2
+         Output: j2.id1, j2.id2
    ->  Seq Scan on public.j1
          Output: j1.id1, j1.id2
-   ->  Materialize
-         Output: j2.id1, j2.id2
-         ->  Seq Scan on public.j2
-               Output: j2.id1, j2.id2
-(10 rows)
+(8 rows)
 
 -- ensure we don't detect the join to be unique when quals are not part of the
 -- join condition