Don't assume a subquery's output is unique if there's a SRF in its tlist.

While the x output of "select x from t group by x" can be presumed unique, this does not hold for "select x, generate_series(1,10) from t group by x", because we may expand the set-returning function after the grouping step. (Perhaps that should be re-thought; but considering all the other oddities involved with SRFs in targetlists, it seems unlikely we'll change it.) Put a check in query_is_distinct_for() so it's not fooled by such cases. Back-patch to all supported branches. David Rowley
author: Tom Lane <tgl@sss.pgh.pa.us> 2014-07-08 14:03:26 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2014-07-08 14:03:26 -0400
commit: fa21a760b2e16b68196da685f29033304b41d4bc (patch)
tree: 08b3c3272a3781d8add82698b65bfee1bd1a2243
parent: d9d125d92ae933d27d6522405b2c7a2002619615 (diff)
download: postgresql-fa21a760b2e16b68196da685f29033304b41d4bc.tar.gz
postgresql-fa21a760b2e16b68196da685f29033304b41d4bc.zip
3 files changed, 55 insertions, 0 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index df4e9247d79..53f085a1fbc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1205,6 +1205,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	Assert(list_length(colnos) == list_length(opids));
 
 	/*
+	 * A set-returning function in the query's targetlist can result in
+	 * returning duplicate rows, if the SRF is evaluated after the
+	 * de-duplication step; so we play it safe and say "no" if there are any
+	 * SRFs.  (We could be certain that it's okay if SRFs appear only in the
+	 * specified columns, since those must be evaluated before de-duplication;
+	 * but it doesn't presently seem worth the complication to check that.)
+	 */
+	if (expression_returns_set((Node *) query->targetList))
+		return false;
+
+	/*
 	 * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
 	 * columns in the DISTINCT clause appear in colnos and operator semantics
 	 * match.
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index d25ff91fde7..83ce9384a81 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -742,3 +742,36 @@ select * from int4_tbl where
   0
 (1 row)
 
+--
+-- Check for incorrect optimization when IN subquery contains a SRF
+--
+set enable_hashjoin to 0;
+explain (verbose, costs off)
+select * from int4_tbl o where (f1, f1) in
+  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Nested Loop Semi Join
+   Output: o.f1
+   Join Filter: (o.f1 = "ANY_subquery".f1)
+   ->  Seq Scan on public.int4_tbl o
+         Output: o.f1
+   ->  Materialize
+         Output: "ANY_subquery".f1, "ANY_subquery".g
+         ->  Subquery Scan on "ANY_subquery"
+               Output: "ANY_subquery".f1, "ANY_subquery".g
+               Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
+               ->  HashAggregate
+                     Output: i.f1, (generate_series(1, 2) / 10)
+                     ->  Seq Scan on public.int4_tbl i
+                           Output: i.f1
+(14 rows)
+
+select * from int4_tbl o where (f1, f1) in
+  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+ f1 
+----
+  0
+(1 row)
+
+reset enable_hashjoin;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 326fd70e4a0..53e5f0c89a3 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -422,3 +422,14 @@ select * from int4_tbl where
 select * from int4_tbl where
   (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
   (select ten from tenk1 b);
+
+--
+-- Check for incorrect optimization when IN subquery contains a SRF
+--
+set enable_hashjoin to 0;
+explain (verbose, costs off)
+select * from int4_tbl o where (f1, f1) in
+  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+select * from int4_tbl o where (f1, f1) in
+  (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
+reset enable_hashjoin;
author	Tom Lane <tgl@sss.pgh.pa.us>	2014-07-08 14:03:26 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2014-07-08 14:03:26 -0400
commit	fa21a760b2e16b68196da685f29033304b41d4bc (patch)
tree	08b3c3272a3781d8add82698b65bfee1bd1a2243
parent	d9d125d92ae933d27d6522405b2c7a2002619615 (diff)
download	postgresql-fa21a760b2e16b68196da685f29033304b41d4bc.tar.gz postgresql-fa21a760b2e16b68196da685f29033304b41d4bc.zip