aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-01-05 18:04:39 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-01-05 18:04:39 +0000
commit5c74ce23db52ec862b9f35cfa5f6b327820dda47 (patch)
tree4fc58b16fa517e704b76c89eed4f451a2de6c2a4 /src
parentcce442da6d6c047b9b86133eb449d3cfbb0fa713 (diff)
downloadpostgresql-5c74ce23db52ec862b9f35cfa5f6b327820dda47.tar.gz
postgresql-5c74ce23db52ec862b9f35cfa5f6b327820dda47.zip
Improve UniquePath logic to detect the case where the input is already
known unique (eg, it is a SELECT DISTINCT ... subquery), and not do a redundant unique-ification step.
Diffstat (limited to 'src')
-rw-r--r--src/backend/nodes/outfuncs.c4
-rw-r--r--src/backend/optimizer/plan/createplan.c8
-rw-r--r--src/backend/optimizer/util/clauses.c17
-rw-r--r--src/backend/optimizer/util/pathnode.c34
-rw-r--r--src/include/nodes/relation.h21
-rw-r--r--src/include/optimizer/clauses.h3
6 files changed, 72 insertions, 15 deletions
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index e91e8e0d170..cc466f5ae00 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.227 2004/01/05 18:04:38 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node)
_outPathInfo(str, (Path *) node);
WRITE_NODE_FIELD(subpath);
- WRITE_BOOL_FIELD(use_hash);
+ WRITE_ENUM_FIELD(umethod, UniquePathMethod);
WRITE_FLOAT_FIELD(rows, "%.0f");
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index d20b967b05c..ace9029ce68 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.163 2004/01/05 18:04:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path)
subplan->targetlist = newtlist;
}
+ /* Done if we don't need to do any actual unique-ifying */
+ if (best_path->umethod == UNIQUE_PATH_NOOP)
+ return subplan;
+
/* Copy tlist again to make one we can put sorting labels on */
my_tlist = copyObject(subplan->targetlist);
- if (best_path->use_hash)
+ if (best_path->umethod == UNIQUE_PATH_HASH)
{
long numGroups;
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 38312314703..b2b2d7d02bb 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.159 2004/01/04 03:51:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.160 2004/01/05 18:04:39 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query)
return false;
}
+/*
+ * Test whether a query uses simple DISTINCT, ie, has a distinct-list that
+ * is the same as the set of output columns.
+ */
+bool
+has_distinct_clause(Query *query)
+{
+ /* Is there a DISTINCT clause at all? */
+ if (query->distinctClause == NIL)
+ return false;
+
+ /* It's DISTINCT if it's not DISTINCT ON */
+ return !has_distinct_on_clause(query);
+}
+
/*****************************************************************************
* *
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 94109ee01e3..a2a6b35cd32 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.98 2004/01/05 18:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,12 +20,14 @@
#include "executor/executor.h"
#include "miscadmin.h"
#include "nodes/plannodes.h"
+#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/restrictinfo.h"
#include "parser/parse_expr.h"
#include "parser/parse_oper.h"
+#include "parser/parsetree.h"
#include "utils/memutils.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
@@ -547,6 +549,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
pathnode->subpath = subpath;
/*
+ * If the input is a subquery that uses DISTINCT, we don't need to do
+ * anything; its output is already unique. (Are there any other cases
+ * in which we can easily prove the input must be distinct?)
+ */
+ if (rel->rtekind == RTE_SUBQUERY)
+ {
+ RangeTblEntry *rte = rt_fetch(rel->relid, root->rtable);
+ Query *subquery = rte->subquery;
+
+ if (has_distinct_clause(subquery))
+ {
+ pathnode->umethod = UNIQUE_PATH_NOOP;
+ pathnode->rows = rel->rows;
+ pathnode->path.startup_cost = subpath->startup_cost;
+ pathnode->path.total_cost = subpath->total_cost;
+ pathnode->path.pathkeys = subpath->pathkeys;
+
+ rel->cheapest_unique_path = (Path *) pathnode;
+
+ return pathnode;
+ }
+ }
+
+ /*
* Try to identify the targetlist that will actually be unique-ified.
* In current usage, this routine is only used for sub-selects of IN
* clauses, so we should be able to find the tlist in in_info_list.
@@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
* compare costs. We only try this if we know the targetlist for sure
* (else we can't be sure about the datatypes involved).
*/
- pathnode->use_hash = false;
+ pathnode->umethod = UNIQUE_PATH_SORT;
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
{
/*
@@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
subpath->total_cost,
rel->rows);
if (agg_path.total_cost < sort_path.total_cost)
- pathnode->use_hash = true;
+ pathnode->umethod = UNIQUE_PATH_HASH;
}
}
- if (pathnode->use_hash)
+ if (pathnode->umethod == UNIQUE_PATH_HASH)
{
pathnode->path.startup_cost = agg_path.startup_cost;
pathnode->path.total_cost = agg_path.total_cost;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 14486591da9..166f1242b0e 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.92 2004/01/05 18:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -442,15 +442,26 @@ typedef struct MaterialPath
* its subpath.
*
* This is unlike the other Path nodes in that it can actually generate
- * two different plans: either hash-based or sort-based implementation.
- * The decision is sufficiently localized that it's not worth having two
- * separate Path node types.
+ * different plans: either hash-based or sort-based implementation, or a
+ * no-op if the input path can be proven distinct already. The decision
+ * is sufficiently localized that it's not worth having separate Path node
+ * types. (Note: in the no-op case, we could eliminate the UniquePath node
+ * entirely and just return the subpath; but it's convenient to have a
+ * UniquePath in the path tree to signal upper-level routines that the input
+ * is known distinct.)
*/
+typedef enum
+{
+ UNIQUE_PATH_NOOP, /* input is known unique already */
+ UNIQUE_PATH_HASH, /* use hashing */
+ UNIQUE_PATH_SORT /* use sorting */
+} UniquePathMethod;
+
typedef struct UniquePath
{
Path path;
Path *subpath;
- bool use_hash;
+ UniquePathMethod umethod;
double rows; /* estimated number of result tuples */
} UniquePath;
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index c948a9039fb..947c4467e7b 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.71 2004/01/04 03:51:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.72 2004/01/05 18:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause);
extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
extern List *pull_constant_clauses(List *quals, List **constantQual);
+extern bool has_distinct_clause(Query *query);
extern bool has_distinct_on_clause(Query *query);
extern int NumRelids(Node *clause);