3 files changed, 257 insertions, 91 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 7e3f166d152..08acb1d01be 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -69,13 +69,14 @@ static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					 RangeTblEntry *rte);
 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
-						  bool *differentTypes);
+						  bool *unsafeColumns);
 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
-					  bool *differentTypes);
+					  bool *unsafeColumns);
+static void check_output_expressions(Query *subquery, bool *unsafeColumns);
 static void compare_tlist_datatypes(List *tlist, List *colTypes,
-						bool *differentTypes);
+						bool *unsafeColumns);
 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
-					  bool *differentTypes);
+					  bool *unsafeColumns);
 static void subquery_push_qual(Query *subquery,
 				   RangeTblEntry *rte, Index rti, Node *qual);
 static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -705,7 +706,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 {
 	Query	   *parse = root->parse;
 	Query	   *subquery = rte->subquery;
-	bool	   *differentTypes;
+	bool	   *unsafeColumns;
 	double		tuple_fraction;
 	PlannerInfo *subroot;
 	List	   *pathkeys;
@@ -717,8 +718,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	 */
 	subquery = copyObject(subquery);
 
-	/* We need a workspace for keeping track of set-op type coercions */
-	differentTypes = (bool *)
+	/*
+	 * We need a workspace for keeping track of unsafe-to-reference columns.
+	 * unsafeColumns[i] is set TRUE if we've found that output column i of the
+	 * subquery is unsafe to use in a pushed-down qual.
+	 */
+	unsafeColumns = (bool *)
 		palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
 
 	/*
@@ -742,7 +747,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	 * push down a pushable qual, because it'd result in a worse plan?
 	 */
 	if (rel->baserestrictinfo != NIL &&
-		subquery_is_pushdown_safe(subquery, subquery, differentTypes))
+		subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
 	{
 		/* OK to consider pushing down individual quals */
 		List	   *upperrestrictlist = NIL;
@@ -754,7 +759,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 			Node	   *clause = (Node *) rinfo->clause;
 
 			if (!rinfo->pseudoconstant &&
-				qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
+				qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
 			{
 				/* Push it down */
 				subquery_push_qual(subquery, rte, rti, clause);
@@ -768,7 +773,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		rel->baserestrictinfo = upperrestrictlist;
 	}
 
-	pfree(differentTypes);
+	pfree(unsafeColumns);
 
 	/*
 	 * We can safely pass the outer tuple_fraction down to the subquery if the
@@ -1169,17 +1174,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
  * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
  * quals into it, because that could change the results.
  *
- * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
- * push quals into each component query, but the quals can only reference
- * subquery columns that suffer no type coercions in the set operation.
- * Otherwise there are possible semantic gotchas.  So, we check the
- * component queries to see if any of them have different output types;
- * differentTypes[k] is set true if column k has different type in any
- * component.
+ * In addition, we make several checks on the subquery's output columns
+ * to see if it is safe to reference them in pushed-down quals.  If output
+ * column k is found to be unsafe to reference, we set unsafeColumns[k] to
+ * TRUE, but we don't reject the subquery overall since column k might
+ * not be referenced by some/all quals.  The unsafeColumns[] array will be
+ * consulted later by qual_is_pushdown_safe().	It's better to do it this
+ * way than to make the checks directly in qual_is_pushdown_safe(), because
+ * when the subquery involves set operations we have to check the output
+ * expressions in each arm of the set op.
  */
 static bool
 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
-						  bool *differentTypes)
+						  bool *unsafeColumns)
 {
 	SetOperationStmt *topop;
 
@@ -1191,13 +1198,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
 	if (subquery->hasWindowFuncs)
 		return false;
 
+	/*
+	 * If we're at a leaf query, check for unsafe expressions in its target
+	 * list, and mark any unsafe ones in unsafeColumns[].  (Non-leaf nodes in
+	 * setop trees have only simple Vars in their tlists, so no need to check
+	 * them.)
+	 */
+	if (subquery->setOperations == NULL)
+		check_output_expressions(subquery, unsafeColumns);
+
 	/* Are we at top level, or looking at a setop component? */
 	if (subquery == topquery)
 	{
 		/* Top level, so check any component queries */
 		if (subquery->setOperations != NULL)
 			if (!recurse_pushdown_safe(subquery->setOperations, topquery,
-									   differentTypes))
+									   unsafeColumns))
 				return false;
 	}
 	else
@@ -1210,7 +1226,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
 		Assert(topop && IsA(topop, SetOperationStmt));
 		compare_tlist_datatypes(subquery->targetList,
 								topop->colTypes,
-								differentTypes);
+								unsafeColumns);
 	}
 	return true;
 }
@@ -1220,7 +1236,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
  */
 static bool
 recurse_pushdown_safe(Node *setOp, Query *topquery,
-					  bool *differentTypes)
+					  bool *unsafeColumns)
 {
 	if (IsA(setOp, RangeTblRef))
 	{
@@ -1229,19 +1245,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
 		Query	   *subquery = rte->subquery;
 
 		Assert(subquery != NULL);
-		return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
+		return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
 	}
 	else if (IsA(setOp, SetOperationStmt))
 	{
 		SetOperationStmt *op = (SetOperationStmt *) setOp;
 
-		/* EXCEPT is no good */
+		/* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */
 		if (op->op == SETOP_EXCEPT)
 			return false;
 		/* Else recurse */
-		if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
+		if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
 			return false;
-		if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
+		if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
 			return false;
 	}
 	else
@@ -1253,17 +1269,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
 }
 
 /*
- * Compare tlist's datatypes against the list of set-operation result types.
- * For any items that are different, mark the appropriate element of
- * differentTypes[] to show that this column will have type conversions.
+ * check_output_expressions - check subquery's output expressions for safety
+ *
+ * There are several cases in which it's unsafe to push down an upper-level
+ * qual if it references a particular output column of a subquery.	We check
+ * each output column of the subquery and set unsafeColumns[k] to TRUE if
+ * that column is unsafe for a pushed-down qual to reference.  The conditions
+ * checked here are:
+ *
+ * 1. We must not push down any quals that refer to subselect outputs that
+ * return sets, else we'd introduce functions-returning-sets into the
+ * subquery's WHERE/HAVING quals.
+ *
+ * 2. We must not push down any quals that refer to subselect outputs that
+ * contain volatile functions, for fear of introducing strange results due
+ * to multiple evaluation of a volatile function.
+ *
+ * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
+ * refer to non-DISTINCT output columns, because that could change the set
+ * of rows returned.  (This condition is vacuous for DISTINCT, because then
+ * there are no non-DISTINCT output columns, so we needn't check.  But note
+ * we are assuming that the qual can't distinguish values that the DISTINCT
+ * operator sees as equal.	This is a bit shaky but we have no way to test
+ * for the case, and it's unlikely enough that we shouldn't refuse the
+ * optimization just because it could theoretically happen.)
+ */
+static void
+check_output_expressions(Query *subquery, bool *unsafeColumns)
+{
+	ListCell   *lc;
+
+	foreach(lc, subquery->targetList)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+		if (tle->resjunk)
+			continue;			/* ignore resjunk columns */
+
+		/* We need not check further if output col is already known unsafe */
+		if (unsafeColumns[tle->resno])
+			continue;
+
+		/* Functions returning sets are unsafe (point 1) */
+		if (expression_returns_set((Node *) tle->expr))
+		{
+			unsafeColumns[tle->resno] = true;
+			continue;
+		}
+
+		/* Volatile functions are unsafe (point 2) */
+		if (contain_volatile_functions((Node *) tle->expr))
+		{
+			unsafeColumns[tle->resno] = true;
+			continue;
+		}
+
+		/* If subquery uses DISTINCT ON, check point 3 */
+		if (subquery->hasDistinctOn &&
+			!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
+		{
+			/* non-DISTINCT column, so mark it unsafe */
+			unsafeColumns[tle->resno] = true;
+			continue;
+		}
+	}
+}
+
+/*
+ * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
+ * push quals into each component query, but the quals can only reference
+ * subquery columns that suffer no type coercions in the set operation.
+ * Otherwise there are possible semantic gotchas.  So, we check the
+ * component queries to see if any of them have output types different from
+ * the top-level setop outputs.  unsafeColumns[k] is set true if column k
+ * has different type in any component.
  *
  * We don't have to care about typmods here: the only allowed difference
  * between set-op input and output typmods is input is a specific typmod
  * and output is -1, and that does not require a coercion.
+ *
+ * tlist is a subquery tlist.
+ * colTypes is an OID list of the top-level setop's output column types.
+ * unsafeColumns[] is the result array.
  */
 static void
 compare_tlist_datatypes(List *tlist, List *colTypes,
-						bool *differentTypes)
+						bool *unsafeColumns)
 {
 	ListCell   *l;
 	ListCell   *colType = list_head(colTypes);
@@ -1277,7 +1368,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
 		if (colType == NULL)
 			elog(ERROR, "wrong number of tlist entries");
 		if (exprType((Node *) tle->expr) != lfirst_oid(colType))
-			differentTypes[tle->resno] = true;
+			unsafeColumns[tle->resno] = true;
 		colType = lnext(colType);
 	}
 	if (colType != NULL)
@@ -1300,34 +1391,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
  * (since there is no easy way to name that within the subquery itself).
  *
  * 3. The qual must not refer to any subquery output columns that were
- * found to have inconsistent types across a set operation tree by
- * subquery_is_pushdown_safe().
- *
- * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
- * refer to non-DISTINCT output columns, because that could change the set
- * of rows returned.  (This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, so we needn't check.  But note
- * we are assuming that the qual can't distinguish values that the DISTINCT
- * operator sees as equal.	This is a bit shaky but we have no way to test
- * for the case, and it's unlikely enough that we shouldn't refuse the
- * optimization just because it could theoretically happen.)
- *
- * 5. We must not push down any quals that refer to subselect outputs that
- * return sets, else we'd introduce functions-returning-sets into the
- * subquery's WHERE/HAVING quals.
- *
- * 6. We must not push down any quals that refer to subselect outputs that
- * contain volatile functions, for fear of introducing strange results due
- * to multiple evaluation of a volatile function.
+ * found to be unsafe to reference by subquery_is_pushdown_safe().
  */
 static bool
 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
-					  bool *differentTypes)
+					  bool *unsafeColumns)
 {
 	bool		safe = true;
 	List	   *vars;
 	ListCell   *vl;
-	Bitmapset  *tested = NULL;
 
 	/* Refuse subselects (point 1) */
 	if (contain_subplans(qual))
@@ -1350,7 +1422,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
 	foreach(vl, vars)
 	{
 		Var		   *var = (Var *) lfirst(vl);
-		TargetEntry *tle;
 
 		/*
 		 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
@@ -1366,6 +1437,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
 		}
 
 		Assert(var->varno == rti);
+		Assert(var->varattno >= 0);
 
 		/* Check point 2 */
 		if (var->varattno == 0)
@@ -1374,45 +1446,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
 			break;
 		}
 
-		/*
-		 * We use a bitmapset to avoid testing the same attno more than once.
-		 * (NB: this only works because subquery outputs can't have negative
-		 * attnos.)
-		 */
-		if (bms_is_member(var->varattno, tested))
-			continue;
-		tested = bms_add_member(tested, var->varattno);
-
 		/* Check point 3 */
-		if (differentTypes[var->varattno])
-		{
-			safe = false;
-			break;
-		}
-
-		/* Must find the tlist element referenced by the Var */
-		tle = get_tle_by_resno(subquery->targetList, var->varattno);
-		Assert(tle != NULL);
-		Assert(!tle->resjunk);
-
-		/* If subquery uses DISTINCT ON, check point 4 */
-		if (subquery->hasDistinctOn &&
-			!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
-		{
-			/* non-DISTINCT column, so fail */
-			safe = false;
-			break;
-		}
-
-		/* Refuse functions returning sets (point 5) */
-		if (expression_returns_set((Node *) tle->expr))
-		{
-			safe = false;
-			break;
-		}
-
-		/* Refuse volatile functions (point 6) */
-		if (contain_volatile_functions((Node *) tle->expr))
+		if (unsafeColumns[var->varattno])
 		{
 			safe = false;
 			break;
@@ -1420,7 +1455,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
 	}
 
 	list_free(vars);
-	bms_free(tested);
 
 	return safe;
 }
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 8db4b6d02a2..2c30e485c82 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -517,3 +517,93 @@ explain (costs off)
          ->  Seq Scan on tenk1 b
 (3 rows)
 
+-- Test that we push quals into UNION sub-selects only when it's safe
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+                 QUERY PLAN                 
+--------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: (1), (2)
+         ->  Append
+               ->  Result
+               ->  Result
+                     One-Time Filter: false
+(7 rows)
+
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+ t | x 
+---+---
+ 1 | 2
+(1 row)
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Sort
+   Sort Key: ss.x
+   ->  Subquery Scan on ss
+         Filter: (ss.x < 4)
+         ->  Unique
+               ->  Sort
+                     Sort Key: (1), (generate_series(1, 10))
+                     ->  Append
+                           ->  Result
+                           ->  Result
+(10 rows)
+
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+ t | x 
+---+---
+ 1 | 1
+ 1 | 2
+ 1 | 3
+(3 rows)
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Subquery Scan on ss
+   Filter: (ss.x > 3)
+   ->  Unique
+         ->  Sort
+               Sort Key: (1), (((random() * 3::double precision))::integer)
+               ->  Append
+                     ->  Result
+                     ->  Result
+(8 rows)
+
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+ t | x 
+---+---
+ 2 | 4
+(1 row)
+
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 752ae470f0d..f3c9d113827 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -207,3 +207,45 @@ explain (costs off)
    UNION ALL
    SELECT 2 AS t, * FROM tenk1 b) c
  WHERE t = 2;
+
+-- Test that we push quals into UNION sub-selects only when it's safe
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;