aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2014-11-22 19:12:38 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2014-11-22 19:12:38 -0500
commitb62f94c60386796fd88256c5b7b1e8301c345166 (patch)
treebdc238f5b1cd5d4c4b8d551f1b90e8a58b3de93f /src
parent9c58101117d25f174c8d8013befdc33c632922d0 (diff)
downloadpostgresql-b62f94c60386796fd88256c5b7b1e8301c345166.tar.gz
postgresql-b62f94c60386796fd88256c5b7b1e8301c345166.zip
Allow simplification of EXISTS() subqueries containing LIMIT.
The locution "EXISTS(SELECT ... LIMIT 1)" seems to be rather common among people who don't realize that the database already performs optimizations equivalent to putting LIMIT 1 in the sub-select. Unfortunately, this was actually making things worse, because it prevented us from optimizing such EXISTS clauses into semi or anti joins. Teach simplify_EXISTS_query() to suppress constant-positive LIMIT clauses. That fixes the semi/anti-join case, and may help marginally even for cases that have to be left as sub-SELECTs. Marti Raudsepp, reviewed by David Rowley
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/plan/subselect.c52
-rw-r--r--src/test/regress/expected/subselect.out40
-rw-r--r--src/test/regress/sql/subselect.sql13
3 files changed, 96 insertions, 9 deletions
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 9111b56e92a..42b6d0a2cb9 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node,
static bool subplan_is_hashable(Plan *plan);
static bool testexpr_is_hashable(Node *testexpr);
static bool hash_ok_operator(OpExpr *expr);
-static bool simplify_EXISTS_query(Query *query);
+static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
Node **testexpr, List **paramIds);
static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
@@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
* If it's an EXISTS subplan, we might be able to simplify it.
*/
if (subLinkType == EXISTS_SUBLINK)
- simple_exists = simplify_EXISTS_query(subquery);
+ simple_exists = simplify_EXISTS_query(root, subquery);
/*
* For an EXISTS subplan, tell lower-level planner to expect that only the
@@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* Make a second copy of the original subquery */
subquery = (Query *) copyObject(orig_subquery);
/* and re-simplify */
- simple_exists = simplify_EXISTS_query(subquery);
+ simple_exists = simplify_EXISTS_query(root, subquery);
Assert(simple_exists);
/* See if it can be converted to an ANY query */
subquery = convert_EXISTS_to_ANY(root, subquery,
@@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
* targetlist, we have to fail, because the pullup operation leaves us
* with noplace to evaluate the targetlist.
*/
- if (!simplify_EXISTS_query(subselect))
+ if (!simplify_EXISTS_query(root, subselect))
return NULL;
/*
@@ -1486,13 +1486,14 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
* Returns TRUE if was able to discard the targetlist, else FALSE.
*/
static bool
-simplify_EXISTS_query(Query *query)
+simplify_EXISTS_query(PlannerInfo *root, Query *query)
{
/*
* We don't try to simplify at all if the query uses set operations,
- * aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE;
- * none of these seem likely in normal usage and their possible effects
- * are complex.
+ * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; none
+ * of these seem likely in normal usage and their possible effects are
+ * complex. (Note: we could ignore an "OFFSET 0" clause, but that
+ * traditionally is used as an optimization fence, so we don't.)
*/
if (query->commandType != CMD_SELECT ||
query->setOperations ||
@@ -1501,11 +1502,44 @@ simplify_EXISTS_query(Query *query)
query->hasModifyingCTE ||
query->havingQual ||
query->limitOffset ||
- query->limitCount ||
query->rowMarks)
return false;
/*
+ * LIMIT with a constant positive (or NULL) value doesn't affect the
+ * semantics of EXISTS, so let's ignore such clauses. This is worth doing
+ * because people accustomed to certain other DBMSes may be in the habit
+ * of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a
+ * LIMIT with anything else as argument, though, we can't simplify.
+ */
+ if (query->limitCount)
+ {
+ /*
+ * The LIMIT clause has not yet been through eval_const_expressions,
+ * so we have to apply that here. It might seem like this is a waste
+ * of cycles, since the only case plausibly worth worrying about is
+ * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)",
+ * so we have to fold constants or we're not going to recognize it.
+ */
+ Node *node = eval_const_expressions(root, query->limitCount);
+ Const *limit;
+
+ /* Might as well update the query if we simplified the clause. */
+ query->limitCount = node;
+
+ if (!IsA(node, Const))
+ return false;
+
+ limit = (Const *) node;
+ Assert(limit->consttype == INT8OID);
+ if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
+ return false;
+
+ /* Whether or not the targetlist is safe, we can drop the LIMIT. */
+ query->limitCount = NULL;
+ }
+
+ /*
* Mustn't throw away the targetlist if it contains set-returning
* functions; those could affect whether zero rows are returned!
*/
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 01c91308f33..b14410fd222 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -222,6 +222,46 @@ from int8_tbl group by q1 order by q1;
(2 rows)
--
+-- Check EXISTS simplification with LIMIT
+--
+explain (costs off)
+select * from int4_tbl o where exists
+ (select 1 from int4_tbl i where i.f1=o.f1 limit null);
+ QUERY PLAN
+------------------------------------
+ Hash Semi Join
+ Hash Cond: (o.f1 = i.f1)
+ -> Seq Scan on int4_tbl o
+ -> Hash
+ -> Seq Scan on int4_tbl i
+(5 rows)
+
+explain (costs off)
+select * from int4_tbl o where not exists
+ (select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+ QUERY PLAN
+------------------------------------
+ Hash Anti Join
+ Hash Cond: (o.f1 = i.f1)
+ -> Seq Scan on int4_tbl o
+ -> Hash
+ -> Seq Scan on int4_tbl i
+(5 rows)
+
+explain (costs off)
+select * from int4_tbl o where exists
+ (select 1 from int4_tbl i where i.f1=o.f1 limit 0);
+ QUERY PLAN
+--------------------------------------
+ Seq Scan on int4_tbl o
+ Filter: (SubPlan 1)
+ SubPlan 1
+ -> Limit
+ -> Seq Scan on int4_tbl i
+ Filter: (f1 = o.f1)
+(6 rows)
+
+--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.
--
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 56707e26bbf..4be2e40a000 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -93,6 +93,19 @@ select q1, float8(count(*)) / (select count(*) from int8_tbl)
from int8_tbl group by q1 order by q1;
--
+-- Check EXISTS simplification with LIMIT
+--
+explain (costs off)
+select * from int4_tbl o where exists
+ (select 1 from int4_tbl i where i.f1=o.f1 limit null);
+explain (costs off)
+select * from int4_tbl o where not exists
+ (select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+explain (costs off)
+select * from int4_tbl o where exists
+ (select 1 from int4_tbl i where i.f1=o.f1 limit 0);
+
+--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.
--