aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/allpaths.c216
-rw-r--r--src/test/regress/expected/union.out90
-rw-r--r--src/test/regress/sql/union.sql42
3 files changed, 257 insertions, 91 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 7e3f166d152..08acb1d01be 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -69,13 +69,14 @@ static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
- bool *differentTypes);
+ bool *unsafeColumns);
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
- bool *differentTypes);
+ bool *unsafeColumns);
+static void check_output_expressions(Query *subquery, bool *unsafeColumns);
static void compare_tlist_datatypes(List *tlist, List *colTypes,
- bool *differentTypes);
+ bool *unsafeColumns);
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
- bool *differentTypes);
+ bool *unsafeColumns);
static void subquery_push_qual(Query *subquery,
RangeTblEntry *rte, Index rti, Node *qual);
static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -705,7 +706,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
{
Query *parse = root->parse;
Query *subquery = rte->subquery;
- bool *differentTypes;
+ bool *unsafeColumns;
double tuple_fraction;
PlannerInfo *subroot;
List *pathkeys;
@@ -717,8 +718,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
*/
subquery = copyObject(subquery);
- /* We need a workspace for keeping track of set-op type coercions */
- differentTypes = (bool *)
+ /*
+ * We need a workspace for keeping track of unsafe-to-reference columns.
+ * unsafeColumns[i] is set TRUE if we've found that output column i of the
+ * subquery is unsafe to use in a pushed-down qual.
+ */
+ unsafeColumns = (bool *)
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
/*
@@ -742,7 +747,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
* push down a pushable qual, because it'd result in a worse plan?
*/
if (rel->baserestrictinfo != NIL &&
- subquery_is_pushdown_safe(subquery, subquery, differentTypes))
+ subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
{
/* OK to consider pushing down individual quals */
List *upperrestrictlist = NIL;
@@ -754,7 +759,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Node *clause = (Node *) rinfo->clause;
if (!rinfo->pseudoconstant &&
- qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
+ qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
{
/* Push it down */
subquery_push_qual(subquery, rte, rti, clause);
@@ -768,7 +773,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
rel->baserestrictinfo = upperrestrictlist;
}
- pfree(differentTypes);
+ pfree(unsafeColumns);
/*
* We can safely pass the outer tuple_fraction down to the subquery if the
@@ -1169,17 +1174,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that could change the results.
*
- * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
- * push quals into each component query, but the quals can only reference
- * subquery columns that suffer no type coercions in the set operation.
- * Otherwise there are possible semantic gotchas. So, we check the
- * component queries to see if any of them have different output types;
- * differentTypes[k] is set true if column k has different type in any
- * component.
+ * In addition, we make several checks on the subquery's output columns
+ * to see if it is safe to reference them in pushed-down quals. If output
+ * column k is found to be unsafe to reference, we set unsafeColumns[k] to
+ * TRUE, but we don't reject the subquery overall since column k might
+ * not be referenced by some/all quals. The unsafeColumns[] array will be
+ * consulted later by qual_is_pushdown_safe(). It's better to do it this
+ * way than to make the checks directly in qual_is_pushdown_safe(), because
+ * when the subquery involves set operations we have to check the output
+ * expressions in each arm of the set op.
*/
static bool
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
- bool *differentTypes)
+ bool *unsafeColumns)
{
SetOperationStmt *topop;
@@ -1191,13 +1198,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
if (subquery->hasWindowFuncs)
return false;
+ /*
+ * If we're at a leaf query, check for unsafe expressions in its target
+ * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
+ * setop trees have only simple Vars in their tlists, so no need to check
+ * them.)
+ */
+ if (subquery->setOperations == NULL)
+ check_output_expressions(subquery, unsafeColumns);
+
/* Are we at top level, or looking at a setop component? */
if (subquery == topquery)
{
/* Top level, so check any component queries */
if (subquery->setOperations != NULL)
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
- differentTypes))
+ unsafeColumns))
return false;
}
else
@@ -1210,7 +1226,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
Assert(topop && IsA(topop, SetOperationStmt));
compare_tlist_datatypes(subquery->targetList,
topop->colTypes,
- differentTypes);
+ unsafeColumns);
}
return true;
}
@@ -1220,7 +1236,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
*/
static bool
recurse_pushdown_safe(Node *setOp, Query *topquery,
- bool *differentTypes)
+ bool *unsafeColumns)
{
if (IsA(setOp, RangeTblRef))
{
@@ -1229,19 +1245,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
Query *subquery = rte->subquery;
Assert(subquery != NULL);
- return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
+ return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
}
else if (IsA(setOp, SetOperationStmt))
{
SetOperationStmt *op = (SetOperationStmt *) setOp;
- /* EXCEPT is no good */
+ /* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */
if (op->op == SETOP_EXCEPT)
return false;
/* Else recurse */
- if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
+ if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
return false;
- if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
+ if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
return false;
}
else
@@ -1253,17 +1269,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
}
/*
- * Compare tlist's datatypes against the list of set-operation result types.
- * For any items that are different, mark the appropriate element of
- * differentTypes[] to show that this column will have type conversions.
+ * check_output_expressions - check subquery's output expressions for safety
+ *
+ * There are several cases in which it's unsafe to push down an upper-level
+ * qual if it references a particular output column of a subquery. We check
+ * each output column of the subquery and set unsafeColumns[k] to TRUE if
+ * that column is unsafe for a pushed-down qual to reference. The conditions
+ * checked here are:
+ *
+ * 1. We must not push down any quals that refer to subselect outputs that
+ * return sets, else we'd introduce functions-returning-sets into the
+ * subquery's WHERE/HAVING quals.
+ *
+ * 2. We must not push down any quals that refer to subselect outputs that
+ * contain volatile functions, for fear of introducing strange results due
+ * to multiple evaluation of a volatile function.
+ *
+ * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
+ * refer to non-DISTINCT output columns, because that could change the set
+ * of rows returned. (This condition is vacuous for DISTINCT, because then
+ * there are no non-DISTINCT output columns, so we needn't check. But note
+ * we are assuming that the qual can't distinguish values that the DISTINCT
+ * operator sees as equal. This is a bit shaky but we have no way to test
+ * for the case, and it's unlikely enough that we shouldn't refuse the
+ * optimization just because it could theoretically happen.)
+ */
+static void
+check_output_expressions(Query *subquery, bool *unsafeColumns)
+{
+ ListCell *lc;
+
+ foreach(lc, subquery->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+ if (tle->resjunk)
+ continue; /* ignore resjunk columns */
+
+ /* We need not check further if output col is already known unsafe */
+ if (unsafeColumns[tle->resno])
+ continue;
+
+ /* Functions returning sets are unsafe (point 1) */
+ if (expression_returns_set((Node *) tle->expr))
+ {
+ unsafeColumns[tle->resno] = true;
+ continue;
+ }
+
+ /* Volatile functions are unsafe (point 2) */
+ if (contain_volatile_functions((Node *) tle->expr))
+ {
+ unsafeColumns[tle->resno] = true;
+ continue;
+ }
+
+ /* If subquery uses DISTINCT ON, check point 3 */
+ if (subquery->hasDistinctOn &&
+ !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
+ {
+ /* non-DISTINCT column, so mark it unsafe */
+ unsafeColumns[tle->resno] = true;
+ continue;
+ }
+ }
+}
+
+/*
+ * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
+ * push quals into each component query, but the quals can only reference
+ * subquery columns that suffer no type coercions in the set operation.
+ * Otherwise there are possible semantic gotchas. So, we check the
+ * component queries to see if any of them have output types different from
+ * the top-level setop outputs. unsafeColumns[k] is set true if column k
+ * has different type in any component.
*
* We don't have to care about typmods here: the only allowed difference
* between set-op input and output typmods is input is a specific typmod
* and output is -1, and that does not require a coercion.
+ *
+ * tlist is a subquery tlist.
+ * colTypes is an OID list of the top-level setop's output column types.
+ * unsafeColumns[] is the result array.
*/
static void
compare_tlist_datatypes(List *tlist, List *colTypes,
- bool *differentTypes)
+ bool *unsafeColumns)
{
ListCell *l;
ListCell *colType = list_head(colTypes);
@@ -1277,7 +1368,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
if (colType == NULL)
elog(ERROR, "wrong number of tlist entries");
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
- differentTypes[tle->resno] = true;
+ unsafeColumns[tle->resno] = true;
colType = lnext(colType);
}
if (colType != NULL)
@@ -1300,34 +1391,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
* (since there is no easy way to name that within the subquery itself).
*
* 3. The qual must not refer to any subquery output columns that were
- * found to have inconsistent types across a set operation tree by
- * subquery_is_pushdown_safe().
- *
- * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
- * refer to non-DISTINCT output columns, because that could change the set
- * of rows returned. (This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, so we needn't check. But note
- * we are assuming that the qual can't distinguish values that the DISTINCT
- * operator sees as equal. This is a bit shaky but we have no way to test
- * for the case, and it's unlikely enough that we shouldn't refuse the
- * optimization just because it could theoretically happen.)
- *
- * 5. We must not push down any quals that refer to subselect outputs that
- * return sets, else we'd introduce functions-returning-sets into the
- * subquery's WHERE/HAVING quals.
- *
- * 6. We must not push down any quals that refer to subselect outputs that
- * contain volatile functions, for fear of introducing strange results due
- * to multiple evaluation of a volatile function.
+ * found to be unsafe to reference by subquery_is_pushdown_safe().
*/
static bool
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
- bool *differentTypes)
+ bool *unsafeColumns)
{
bool safe = true;
List *vars;
ListCell *vl;
- Bitmapset *tested = NULL;
/* Refuse subselects (point 1) */
if (contain_subplans(qual))
@@ -1350,7 +1422,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
foreach(vl, vars)
{
Var *var = (Var *) lfirst(vl);
- TargetEntry *tle;
/*
* XXX Punt if we find any PlaceHolderVars in the restriction clause.
@@ -1366,6 +1437,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
}
Assert(var->varno == rti);
+ Assert(var->varattno >= 0);
/* Check point 2 */
if (var->varattno == 0)
@@ -1374,45 +1446,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
break;
}
- /*
- * We use a bitmapset to avoid testing the same attno more than once.
- * (NB: this only works because subquery outputs can't have negative
- * attnos.)
- */
- if (bms_is_member(var->varattno, tested))
- continue;
- tested = bms_add_member(tested, var->varattno);
-
/* Check point 3 */
- if (differentTypes[var->varattno])
- {
- safe = false;
- break;
- }
-
- /* Must find the tlist element referenced by the Var */
- tle = get_tle_by_resno(subquery->targetList, var->varattno);
- Assert(tle != NULL);
- Assert(!tle->resjunk);
-
- /* If subquery uses DISTINCT ON, check point 4 */
- if (subquery->hasDistinctOn &&
- !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
- {
- /* non-DISTINCT column, so fail */
- safe = false;
- break;
- }
-
- /* Refuse functions returning sets (point 5) */
- if (expression_returns_set((Node *) tle->expr))
- {
- safe = false;
- break;
- }
-
- /* Refuse volatile functions (point 6) */
- if (contain_volatile_functions((Node *) tle->expr))
+ if (unsafeColumns[var->varattno])
{
safe = false;
break;
@@ -1420,7 +1455,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
}
list_free(vars);
- bms_free(tested);
return safe;
}
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 8db4b6d02a2..2c30e485c82 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -517,3 +517,93 @@ explain (costs off)
-> Seq Scan on tenk1 b
(3 rows)
+-- Test that we push quals into UNION sub-selects only when it's safe
+explain (costs off)
+SELECT * FROM
+ (SELECT 1 AS t, 2 AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+ QUERY PLAN
+--------------------------------------------
+ Unique
+ -> Sort
+ Sort Key: (1), (2)
+ -> Append
+ -> Result
+ -> Result
+ One-Time Filter: false
+(7 rows)
+
+SELECT * FROM
+ (SELECT 1 AS t, 2 AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+ t | x
+---+---
+ 1 | 2
+(1 row)
+
+explain (costs off)
+SELECT * FROM
+ (SELECT 1 AS t, generate_series(1,10) AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+ QUERY PLAN
+-------------------------------------------------------------
+ Sort
+ Sort Key: ss.x
+ -> Subquery Scan on ss
+ Filter: (ss.x < 4)
+ -> Unique
+ -> Sort
+ Sort Key: (1), (generate_series(1, 10))
+ -> Append
+ -> Result
+ -> Result
+(10 rows)
+
+SELECT * FROM
+ (SELECT 1 AS t, generate_series(1,10) AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+ t | x
+---+---
+ 1 | 1
+ 1 | 2
+ 1 | 3
+(3 rows)
+
+explain (costs off)
+SELECT * FROM
+ (SELECT 1 AS t, (random()*3)::int AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+ QUERY PLAN
+----------------------------------------------------------------------------
+ Subquery Scan on ss
+ Filter: (ss.x > 3)
+ -> Unique
+ -> Sort
+ Sort Key: (1), (((random() * 3::double precision))::integer)
+ -> Append
+ -> Result
+ -> Result
+(8 rows)
+
+SELECT * FROM
+ (SELECT 1 AS t, (random()*3)::int AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+ t | x
+---+---
+ 2 | 4
+(1 row)
+
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 752ae470f0d..f3c9d113827 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -207,3 +207,45 @@ explain (costs off)
UNION ALL
SELECT 2 AS t, * FROM tenk1 b) c
WHERE t = 2;
+
+-- Test that we push quals into UNION sub-selects only when it's safe
+explain (costs off)
+SELECT * FROM
+ (SELECT 1 AS t, 2 AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+
+SELECT * FROM
+ (SELECT 1 AS t, 2 AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+
+explain (costs off)
+SELECT * FROM
+ (SELECT 1 AS t, generate_series(1,10) AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+
+SELECT * FROM
+ (SELECT 1 AS t, generate_series(1,10) AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+
+explain (costs off)
+SELECT * FROM
+ (SELECT 1 AS t, (random()*3)::int AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+
+SELECT * FROM
+ (SELECT 1 AS t, (random()*3)::int AS x
+ UNION
+ SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;