diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2008-08-07 01:11:52 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2008-08-07 01:11:52 +0000 |
commit | 2d1d96b1cea8f67a095e8f28372af4081605f681 (patch) | |
tree | 91be573dfa6eacbe8a4421d700af3fadc3d1bda8 /src/backend/optimizer/util | |
parent | 3d40d5e70ebe21b7d52467987bffad8aea16f29b (diff) | |
download | postgresql-2d1d96b1cea8f67a095e8f28372af4081605f681.tar.gz postgresql-2d1d96b1cea8f67a095e8f28372af4081605f681.zip |
Teach the system how to use hashing for UNION. (INTERSECT/EXCEPT will follow,
but seem like a separate patch since most of the remaining work is on the
executor side.) I took the opportunity to push selection of the grouping
operators for set operations into the parser where it belongs. Otherwise this
is just a small exercise in making prepunion.c consider both alternatives.
As with the recent DISTINCT patch, this means we can UNION on datatypes that
can hash but not sort, and it means that UNION without ORDER BY is no longer
certain to produce sorted output.
Diffstat (limited to 'src/backend/optimizer/util')
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 5 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 29 | ||||
-rw-r--r-- | src/backend/optimizer/util/tlist.c | 106 |
3 files changed, 121 insertions, 19 deletions
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 04ef2224c91..858d4abcbd8 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -3933,6 +3933,8 @@ expression_tree_walker(Node *node, return true; if (walker(setop->rarg, context)) return true; + + /* groupClauses are deemed uninteresting */ } break; case T_InClauseInfo: @@ -4535,6 +4537,7 @@ expression_tree_mutator(Node *node, FLATCOPY(newnode, setop, SetOperationStmt); MUTATE(newnode->larg, setop->larg, Node *); MUTATE(newnode->rarg, setop->rarg, Node *); + /* We do not mutate groupClauses by default */ return (Node *) newnode; } break; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ad3c10ac27b..655443e9efe 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,7 +24,6 @@ #include "optimizer/paths.h" #include "optimizer/tlist.h" #include "parser/parse_expr.h" -#include "parser/parse_oper.h" #include "parser/parsetree.h" #include "utils/selfuncs.h" #include "utils/lsyscache.h" @@ -1003,12 +1002,6 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) /* * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, * except with ALL. - * - * XXX this code knows that prepunion.c will adopt the default sort/group - * operators for each column datatype to determine uniqueness. It'd - * probably be better if these operators were chosen at parse time and - * stored into the parsetree, instead of leaving bits of the planner to - * decide semantics. */ if (query->setOperations) { @@ -1019,24 +1012,26 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) if (!topop->all) { + ListCell *lg; + /* We're good if all the nonjunk output columns are in colnos */ + lg = list_head(topop->groupClauses); foreach(l, query->targetList) { TargetEntry *tle = (TargetEntry *) lfirst(l); - Oid tle_eq_opr; + SortGroupClause *sgc; if (tle->resjunk) continue; /* ignore resjunk columns */ + /* non-resjunk columns should have grouping clauses */ + Assert(lg != NULL); + sgc = (SortGroupClause *) lfirst(lg); + lg = lnext(lg); + opid = distinct_col_search(tle->resno, colnos, opids); - if (!OidIsValid(opid)) - break; /* exit early if no match */ - /* check for compatible semantics */ - get_sort_group_operators(exprType((Node *) tle->expr), - false, false, false, - NULL, &tle_eq_opr, NULL); - if (!OidIsValid(tle_eq_opr) || - !equality_ops_are_compatible(opid, tle_eq_opr)) + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index fc0880ad56c..a2c627fd4d5 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,6 +18,7 @@ #include "optimizer/tlist.h" #include "optimizer/var.h" #include "parser/parse_expr.h" +#include "utils/lsyscache.h" /***************************************************************************** @@ -202,6 +203,109 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList) } +/***************************************************************************** + * Functions to extract data from a list of SortGroupClauses + * + * These don't really belong in tlist.c, but they are sort of related to the + * functions just above, and they don't seem to deserve their own file. + *****************************************************************************/ + +/* + * extract_grouping_ops - make an array of the equality operator OIDs + * for a SortGroupClause list + */ +Oid * +extract_grouping_ops(List *groupClause) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *groupOperators; + ListCell *glitem; + + groupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + groupOperators[colno] = groupcl->eqop; + Assert(OidIsValid(groupOperators[colno])); + colno++; + } + + return groupOperators; +} + +/* + * extract_grouping_cols - make an array of the grouping column resnos + * for a SortGroupClause list + */ +AttrNumber * +extract_grouping_cols(List *groupClause, List *tlist) +{ + AttrNumber *grpColIdx; + int numCols = list_length(groupClause); + int colno = 0; + ListCell *glitem; + + grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpColIdx[colno++] = tle->resno; + } + + return grpColIdx; +} + +/* + * grouping_is_sortable - is it possible to implement grouping list by sorting? + * + * This is easy since the parser will have included a sortop if one exists. + */ +bool +grouping_is_sortable(List *groupClause) +{ + ListCell *glitem; + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + if (!OidIsValid(groupcl->sortop)) + return false; + } + return true; +} + +/* + * grouping_is_hashable - is it possible to implement grouping list by hashing? + * + * We assume hashing is OK if the equality operators are marked oprcanhash. + * (If there isn't actually a supporting hash function, the executor will + * complain at runtime; but this is a misdeclaration of the operator, not + * a system bug.) + */ +bool +grouping_is_hashable(List *groupClause) +{ + ListCell *glitem; + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + if (!op_hashjoinable(groupcl->eqop)) + return false; + } + return true; +} + + + /* * Does tlist have same output datatypes as listed in colTypes? * |