aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2023-11-04 03:30:18 +0200
committerAlexander Korotkov <akorotkov@postgresql.org>2023-11-04 03:30:18 +0200
commitec63622c03ff7be36d32c08aaa7d853856451488 (patch)
treea43f0a94c18fdf23a5890348493b6b3ab484c450 /src
parenta237a07d5351d7bb0afc6e0c41410d52915e47c8 (diff)
downloadpostgresql-ec63622c03ff7be36d32c08aaa7d853856451488.tar.gz
postgresql-ec63622c03ff7be36d32c08aaa7d853856451488.zip
Fix usage of the parse tree for estimate_num_groups() in set operations
recurse_set_operations() uses the parse tree for the group number estimation, because of the "varno 0" hack. At the same time 2489d76c49 made root->parse and corresponding parent_root->simple_rte_array[]->subquery distinct copies of the parse tree, while d3d55ce571 introduced self-join removal replacing relid of removed relation only in one of the copies. The present commit fixes this bug by making recurse_set_operations() call estimate_num_groups() with the copy of the parse tree processed by self-join removal. In future, we may think about maintaining just one copy of the parse tree and/or keeping removed relids as aliases. Reported-by: Zuming Jiang Bug: #18170 Discussion: https://postgr.es/m/flat/18170-f1d17bf9a0d58b24%40postgresql.org Author: Richard Guo, Alexander Korotkov Reviewed-by: Andrei Lepikhov
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/prep/prepunion.c7
-rw-r--r--src/test/regress/expected/join.out17
-rw-r--r--src/test/regress/sql/join.sql7
3 files changed, 29 insertions, 2 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 0c68ec011be..8eaa734916d 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -324,7 +324,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
* anyway; otherwise do statistical estimation.
*
* XXX you don't really want to know about this: we do the estimation
- * using the subquery's original targetlist expressions, not the
+ * using the subroot->parse's original targetlist expressions, not the
* subroot->processed_tlist which might seem more appropriate. The
* reason is that if the subquery is itself a setop, it may return a
* processed_tlist containing "varno 0" Vars generated by
@@ -332,6 +332,9 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
* mightily. We ought to get rid of the "varno 0" hack, but that
* requires a redesign of the parsetree representation of setops, so
* that there can be an RTE corresponding to each setop's output.
+ * Note, we use this not subquery's targetlist but subroot->parse's
+ * targetlist, because it was revised by self-join removal. subquery's
+ * targetlist might contain the references to the removed relids.
*/
if (pNumGroups)
{
@@ -341,7 +344,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
*pNumGroups = subpath->rows;
else
*pNumGroups = estimate_num_groups(subroot,
- get_tlist_exprs(subquery->targetList, false),
+ get_tlist_exprs(subroot->parse->targetList, false),
subpath->rows,
NULL,
NULL);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 446959e3c5f..892ea5f1702 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6759,6 +6759,23 @@ WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
Filter: ((id IS NOT NULL) AND ((id * id) IS NOT NULL))
(3 rows)
+-- Check the usage of a parse tree by the set operations (bug #18170)
+explain (costs off)
+SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id
+WHERE c2.id IS NOT NULL
+EXCEPT ALL
+SELECT c3.code FROM emp1 c3;
+ QUERY PLAN
+----------------------------------------------
+ HashSetOp Except All
+ -> Append
+ -> Subquery Scan on "*SELECT* 1"
+ -> Seq Scan on emp1 c2
+ Filter: (id IS NOT NULL)
+ -> Subquery Scan on "*SELECT* 2"
+ -> Seq Scan on emp1 c3
+(7 rows)
+
-- We can remove the join even if we find the join can't duplicate rows and
-- the base quals of each side are different. In the following case we end up
-- moving quals over to s1 to make it so it can't match any rows.
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 66dd03dd88b..559c36dc074 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2573,6 +2573,13 @@ explain (costs off)
SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
+-- Check the usage of a parse tree by the set operations (bug #18170)
+explain (costs off)
+SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id
+WHERE c2.id IS NOT NULL
+EXCEPT ALL
+SELECT c3.code FROM emp1 c3;
+
-- We can remove the join even if we find the join can't duplicate rows and
-- the base quals of each side are different. In the following case we end up
-- moving quals over to s1 to make it so it can't match any rows.