diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2004-01-05 18:04:39 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2004-01-05 18:04:39 +0000 |
commit | 5c74ce23db52ec862b9f35cfa5f6b327820dda47 (patch) | |
tree | 4fc58b16fa517e704b76c89eed4f451a2de6c2a4 /src | |
parent | cce442da6d6c047b9b86133eb449d3cfbb0fa713 (diff) | |
download | postgresql-5c74ce23db52ec862b9f35cfa5f6b327820dda47.tar.gz postgresql-5c74ce23db52ec862b9f35cfa5f6b327820dda47.zip |
Improve UniquePath logic to detect the case where the input is already
known unique (eg, it is a SELECT DISTINCT ... subquery), and not do a
redundant unique-ification step.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/nodes/outfuncs.c | 4 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 8 | ||||
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 17 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 34 | ||||
-rw-r--r-- | src/include/nodes/relation.h | 21 | ||||
-rw-r--r-- | src/include/optimizer/clauses.h | 3 |
6 files changed, 72 insertions, 15 deletions
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e91e8e0d170..cc466f5ae00 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.227 2004/01/05 18:04:38 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node) _outPathInfo(str, (Path *) node); WRITE_NODE_FIELD(subpath); - WRITE_BOOL_FIELD(use_hash); + WRITE_ENUM_FIELD(umethod, UniquePathMethod); WRITE_FLOAT_FIELD(rows, "%.0f"); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index d20b967b05c..ace9029ce68 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.163 2004/01/05 18:04:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path) subplan->targetlist = newtlist; } + /* Done if we don't need to do any actual unique-ifying */ + if (best_path->umethod == UNIQUE_PATH_NOOP) + return subplan; + /* Copy tlist again to make one we can put sorting labels on */ my_tlist = copyObject(subplan->targetlist); - if (best_path->use_hash) + if (best_path->umethod == UNIQUE_PATH_HASH) { long numGroups; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 38312314703..b2b2d7d02bb 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.159 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.160 2004/01/05 18:04:39 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query) return false; } +/* + * Test whether a query uses simple DISTINCT, ie, has a distinct-list that + * is the same as the set of output columns. + */ +bool +has_distinct_clause(Query *query) +{ + /* Is there a DISTINCT clause at all? */ + if (query->distinctClause == NIL) + return false; + + /* It's DISTINCT if it's not DISTINCT ON */ + return !has_distinct_on_clause(query); +} + /***************************************************************************** * * diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 94109ee01e3..a2a6b35cd32 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.98 2004/01/05 18:04:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,12 +20,14 @@ #include "executor/executor.h" #include "miscadmin.h" #include "nodes/plannodes.h" +#include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" #include "parser/parse_expr.h" #include "parser/parse_oper.h" +#include "parser/parsetree.h" #include "utils/memutils.h" #include "utils/selfuncs.h" #include "utils/syscache.h" @@ -547,6 +549,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) pathnode->subpath = subpath; /* + * If the input is a subquery that uses DISTINCT, we don't need to do + * anything; its output is already unique. (Are there any other cases + * in which we can easily prove the input must be distinct?) + */ + if (rel->rtekind == RTE_SUBQUERY) + { + RangeTblEntry *rte = rt_fetch(rel->relid, root->rtable); + Query *subquery = rte->subquery; + + if (has_distinct_clause(subquery)) + { + pathnode->umethod = UNIQUE_PATH_NOOP; + pathnode->rows = rel->rows; + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + pathnode->path.pathkeys = subpath->pathkeys; + + rel->cheapest_unique_path = (Path *) pathnode; + + return pathnode; + } + } + + /* * Try to identify the targetlist that will actually be unique-ified. * In current usage, this routine is only used for sub-selects of IN * clauses, so we should be able to find the tlist in in_info_list. @@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) * compare costs. We only try this if we know the targetlist for sure * (else we can't be sure about the datatypes involved). */ - pathnode->use_hash = false; + pathnode->umethod = UNIQUE_PATH_SORT; if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist)) { /* @@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) subpath->total_cost, rel->rows); if (agg_path.total_cost < sort_path.total_cost) - pathnode->use_hash = true; + pathnode->umethod = UNIQUE_PATH_HASH; } } - if (pathnode->use_hash) + if (pathnode->umethod == UNIQUE_PATH_HASH) { pathnode->path.startup_cost = agg_path.startup_cost; pathnode->path.total_cost = agg_path.total_cost; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 14486591da9..166f1242b0e 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.92 2004/01/05 18:04:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -442,15 +442,26 @@ typedef struct MaterialPath * its subpath. * * This is unlike the other Path nodes in that it can actually generate - * two different plans: either hash-based or sort-based implementation. - * The decision is sufficiently localized that it's not worth having two - * separate Path node types. + * different plans: either hash-based or sort-based implementation, or a + * no-op if the input path can be proven distinct already. The decision + * is sufficiently localized that it's not worth having separate Path node + * types. (Note: in the no-op case, we could eliminate the UniquePath node + * entirely and just return the subpath; but it's convenient to have a + * UniquePath in the path tree to signal upper-level routines that the input + * is known distinct.) */ +typedef enum +{ + UNIQUE_PATH_NOOP, /* input is known unique already */ + UNIQUE_PATH_HASH, /* use hashing */ + UNIQUE_PATH_SORT /* use sorting */ +} UniquePathMethod; + typedef struct UniquePath { Path path; Path *subpath; - bool use_hash; + UniquePathMethod umethod; double rows; /* estimated number of result tuples */ } UniquePath; diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index c948a9039fb..947c4467e7b 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.71 2004/01/04 03:51:52 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.72 2004/01/05 18:04:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause); extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids); extern List *pull_constant_clauses(List *quals, List **constantQual); +extern bool has_distinct_clause(Query *query); extern bool has_distinct_on_clause(Query *query); extern int NumRelids(Node *clause); |