diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 30 | ||||
-rw-r--r-- | src/test/regress/expected/join.out | 50 |
2 files changed, 42 insertions, 38 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index cdb18d978db..f062c6b9f1e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2214,6 +2214,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, Cost inner_run_cost = workspace->inner_run_cost; Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost; double outer_matched_rows; + double outer_unmatched_rows; Selectivity inner_scan_frac; /* @@ -2226,6 +2227,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * least 1, no such clamp is needed now.) */ outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac); + outer_unmatched_rows = outer_path_rows - outer_matched_rows; inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); /* @@ -2269,7 +2271,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * of a nonempty scan. We consider that these are all rescans, * since we used inner_run_cost once already. */ - run_cost += (outer_path_rows - outer_matched_rows) * + run_cost += outer_unmatched_rows * inner_rescan_run_cost / inner_path_rows; /* @@ -2287,20 +2289,28 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * difficult to estimate whether that will happen (and it could * not happen if there are any unmatched outer rows!), so be * conservative and always charge the whole first-scan cost once. + * We consider this charge to correspond to the first unmatched + * outer row, unless there isn't one in our estimate, in which + * case blame it on the first matched row. */ + + /* First, count all unmatched join tuples as being processed */ + ntuples += outer_unmatched_rows * inner_path_rows; + + /* Now add the forced full scan, and decrement appropriate count */ run_cost += inner_run_cost; + if (outer_unmatched_rows >= 1) + outer_unmatched_rows -= 1; + else + outer_matched_rows -= 1; /* Add inner run cost for additional outer tuples having matches */ - if (outer_matched_rows > 1) - run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac; - - /* Add inner run cost for unmatched outer tuples */ - run_cost += (outer_path_rows - outer_matched_rows) * - inner_rescan_run_cost; + if (outer_matched_rows > 0) + run_cost += outer_matched_rows * inner_rescan_run_cost * inner_scan_frac; - /* And count the unmatched join tuples as being processed */ - ntuples += (outer_path_rows - outer_matched_rows) * - inner_path_rows; + /* Add inner run cost for additional unmatched outer tuples */ + if (outer_unmatched_rows > 0) + run_cost += outer_unmatched_rows * inner_rescan_run_cost; } } else diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index d08b1e1ae53..9cad4e69924 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5476,48 +5476,44 @@ select * from j1 natural join j2; explain (verbose, costs off) select * from j1 inner join (select distinct id from j3) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------------ + QUERY PLAN +----------------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Seq Scan on public.j1 - Output: j1.id - -> Materialize + -> Unique Output: j3.id - -> Unique + -> Sort Output: j3.id - -> Sort + Sort Key: j3.id + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -(15 rows) + -> Seq Scan on public.j1 + Output: j1.id +(13 rows) -- ensure group by clause allows the inner to become unique explain (verbose, costs off) select * from j1 inner join (select id from j3 group by id) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------------ + QUERY PLAN +----------------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Seq Scan on public.j1 - Output: j1.id - -> Materialize + -> Group Output: j3.id - -> Group + Group Key: j3.id + -> Sort Output: j3.id - Group Key: j3.id - -> Sort + Sort Key: j3.id + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -(16 rows) + -> Seq Scan on public.j1 + Output: j1.id +(14 rows) drop table j1; drop table j2; @@ -5558,13 +5554,11 @@ inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; Output: j1.id1, j1.id2, j2.id1, j2.id2 Inner Unique: true Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 -> Seq Scan on public.j1 Output: j1.id1, j1.id2 - -> Materialize - Output: j2.id1, j2.id2 - -> Seq Scan on public.j2 - Output: j2.id1, j2.id2 -(10 rows) +(8 rows) -- ensure we don't detect the join to be unique when quals are not part of the -- join condition |