aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/plan/planmain.c4
-rw-r--r--src/backend/optimizer/plan/planner.c31
-rw-r--r--src/backend/optimizer/prep/prepunion.c32
-rw-r--r--src/backend/parser/analyze.c7
-rw-r--r--src/backend/parser/parse_clause.c199
-rw-r--r--src/include/nodes/parsenodes.h17
-rw-r--r--src/include/parser/parse_clause.h7
7 files changed, 173 insertions, 124 deletions
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 3776cf9b347..d25a5509b4c 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.106 2008/01/11 04:02:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -288,7 +288,7 @@ query_planner(PlannerInfo *root, List *tlist,
* levels of sort --- and, therefore, certainly need to read all the
* tuples --- unless ORDER BY is a subset of GROUP BY.
*/
- if (parse->groupClause && parse->sortClause &&
+ if (root->group_pathkeys && root->sort_pathkeys &&
!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0;
}
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 9248022d8b3..6ebbc5b7bc3 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.234 2008/07/10 02:14:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -826,6 +826,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/*
* Calculate pathkeys that represent result ordering requirements
*/
+ Assert(parse->distinctClause == NIL);
sort_pathkeys = make_pathkeys_for_sortclauses(root,
parse->sortClause,
tlist,
@@ -864,17 +865,29 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize
* them after EquivalenceClasses have been formed.
+ *
+ * Note: for the moment, DISTINCT is always implemented via sort/uniq,
+ * and we set the sort_pathkeys to be the more rigorous of the
+ * DISTINCT and ORDER BY requirements. This should be changed
+ * someday, but DISTINCT ON is a bit of a problem ...
*/
root->group_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->groupClause,
tlist,
false);
- root->sort_pathkeys =
- make_pathkeys_for_sortclauses(root,
- parse->sortClause,
- tlist,
- false);
+ if (list_length(parse->distinctClause) > list_length(parse->sortClause))
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ parse->distinctClause,
+ tlist,
+ false);
+ else
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ parse->sortClause,
+ tlist,
+ false);
/*
* Will need actual number of aggregates for estimating costs.
@@ -903,9 +916,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* by ORDER BY --- but that might just leave us failing to exploit an
* available sort order at all. Needs more thought...)
*/
- if (parse->groupClause)
+ if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
- else if (parse->sortClause)
+ else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
else
root->query_pathkeys = NIL;
@@ -1172,7 +1185,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* If we were not able to make the plan come out in the right order, add
* an explicit sort step.
*/
- if (parse->sortClause)
+ if (sort_pathkeys)
{
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
{
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index ed18cf5e3fe..b30cedee9fc 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.147 2008/06/19 00:46:04 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.148 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -69,6 +69,7 @@ static List *generate_setop_tlist(List *colTypes, int flag,
static List *generate_append_tlist(List *colTypes, bool flag,
List *input_plans,
List *refnames_tlist);
+static List *generate_setop_sortlist(List *targetlist);
static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
Index rti);
static void make_inh_translation_lists(Relation oldrelation,
@@ -319,7 +320,7 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
{
List *sortList;
- sortList = addAllTargetsToSortList(NULL, NIL, tlist, false);
+ sortList = generate_setop_sortlist(tlist);
if (sortList)
{
plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
@@ -384,7 +385,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
* Sort the child results, then add a SetOp plan node to generate the
* correct output.
*/
- sortList = addAllTargetsToSortList(NULL, NIL, tlist, false);
+ sortList = generate_setop_sortlist(tlist);
if (sortList == NIL) /* nothing to sort on? */
{
@@ -675,6 +676,31 @@ generate_append_tlist(List *colTypes, bool flag,
return tlist;
}
+/*
+ * generate_setop_sortlist
+ * Build a SortClause list enumerating all the non-resjunk tlist entries,
+ * using default ordering properties.
+ */
+static List *
+generate_setop_sortlist(List *targetlist)
+{
+ List *sortlist = NIL;
+ ListCell *l;
+
+ foreach(l, targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (!tle->resjunk)
+ sortlist = addTargetToSortList(NULL, tle,
+ sortlist, targetlist,
+ SORTBY_DEFAULT,
+ SORTBY_NULLS_DEFAULT,
+ NIL, false);
+ }
+ return sortlist;
+}
+
/*
* find_all_inheritors -
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 520b5037255..329b1bc881a 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -17,7 +17,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.373 2008/07/18 20:26:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.374 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -711,6 +711,8 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
/*
* Transform sorting/grouping stuff. Do ORDER BY first because both
* transformGroupClause and transformDistinctClause need the results.
+ * Note that these functions can also change the targetList, so it's
+ * passed to them by reference.
*/
qry->sortClause = transformSortClause(pstate,
stmt->sortClause,
@@ -725,8 +727,9 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->distinctClause = transformDistinctClause(pstate,
stmt->distinctClause,
&qry->targetList,
- &qry->sortClause);
+ qry->sortClause);
+ /* transform LIMIT */
qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset,
"OFFSET");
qry->limitCount = transformLimitClause(pstate, stmt->limitCount,
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index f532b26d374..d82573fd03d 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.170 2008/06/19 00:46:05 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.171 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1454,16 +1454,23 @@ transformSortClause(ParseState *pstate,
* transformDistinctClause -
* transform a DISTINCT or DISTINCT ON clause
*
- * Since we may need to add items to the query's sortClause list, that list
- * is passed by reference. Likewise for the targetlist.
+ * Since we may need to add items to the query's targetlist, that list
+ * is passed by reference.
+ *
+ * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
+ * possible into the distinctClause. This avoids a possible need to re-sort,
+ * and allows the user to determine the equality semantics used by DISTINCT,
+ * should she be working with a datatype that has more than one btree equality
+ * operator.
*/
List *
transformDistinctClause(ParseState *pstate, List *distinctlist,
- List **targetlist, List **sortClause)
+ List **targetlist, List *sortClause)
{
List *result = NIL;
ListCell *slitem;
ListCell *dlitem;
+ ListCell *tlitem;
/* No work if there was no DISTINCT clause */
if (distinctlist == NIL)
@@ -1474,25 +1481,21 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
/* We had SELECT DISTINCT */
/*
- * All non-resjunk elements from target list that are not already in
- * the sort list should be added to it. (We don't really care what
- * order the DISTINCT fields are checked in, so we can leave the
- * user's ORDER BY spec alone, and just add additional sort keys to it
- * to ensure that all targetlist items get sorted.)
- */
- *sortClause = addAllTargetsToSortList(pstate,
- *sortClause,
- *targetlist,
- true);
-
- /*
- * Now, DISTINCT list consists of all non-resjunk sortlist items.
- * Actually, all the sortlist items had better be non-resjunk!
- * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that
- * does not appear anywhere in the SELECT targetlist, and we can't
- * implement that with only one sorting pass...
+ * The distinctClause should consist of all ORDER BY items followed
+ * by all other non-resjunk targetlist items. There must not be any
+ * resjunk ORDER BY items --- that would imply that we are sorting
+ * by a value that isn't necessarily unique within a DISTINCT group,
+ * so the results wouldn't be well-defined. This construction
+ * ensures we follow the rule that sortClause and distinctClause match;
+ * in fact the sortClause will always be a prefix of distinctClause.
+ *
+ * Note a corner case: the same TLE could be in the ORDER BY list
+ * multiple times with different sortops. We have to include it in
+ * the distinctClause the same way to preserve the prefix property.
+ * The net effect will be that the TLE value will be made unique
+ * according to both sortops.
*/
- foreach(slitem, *sortClause)
+ foreach(slitem, sortClause)
{
SortClause *scl = (SortClause *) lfirst(slitem);
TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
@@ -1504,71 +1507,103 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
else
result = lappend(result, copyObject(scl));
}
+
+ /*
+ * Now add any remaining non-resjunk tlist items, using default
+ * sorting semantics for their data types.
+ */
+ foreach(tlitem, *targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(tlitem);
+
+ if (tle->resjunk)
+ continue; /* ignore junk */
+ if (targetIsInSortList(tle, InvalidOid, result))
+ continue; /* already in list (with some semantics) */
+ result = addTargetToSortList(pstate, tle,
+ result, *targetlist,
+ SORTBY_DEFAULT,
+ SORTBY_NULLS_DEFAULT,
+ NIL, true);
+ }
}
else
{
/* We had SELECT DISTINCT ON (expr, ...) */
+ Bitmapset *refnos = NULL;
+ int sortgroupref;
+ bool skipped_sortitem;
/*
- * If the user writes both DISTINCT ON and ORDER BY, then the two
- * expression lists must match (until one or the other runs out).
- * Otherwise the ORDER BY requires a different sort order than the
- * DISTINCT does, and we can't implement that with only one sort pass
- * (and if we do two passes, the results will be rather
- * unpredictable). However, it's OK to have more DISTINCT ON
- * expressions than ORDER BY expressions; we can just add the extra
- * DISTINCT values to the sort list, much as we did above for ordinary
- * DISTINCT fields.
- *
- * Actually, it'd be OK for the common prefixes of the two lists to
- * match in any order, but implementing that check seems like more
- * trouble than it's worth.
+ * Add all the DISTINCT ON expressions to the tlist (if not already
+ * present, they are added as resjunk items). Assign sortgroupref
+ * numbers to them, and form a bitmapset of these numbers. (A
+ * bitmapset is convenient here because we don't care about order
+ * and we can discard duplicates.)
*/
- ListCell *nextsortlist = list_head(*sortClause);
-
foreach(dlitem, distinctlist)
{
+ Node *dexpr = (Node *) lfirst(dlitem);
TargetEntry *tle;
- tle = findTargetlistEntry(pstate, lfirst(dlitem),
+ tle = findTargetlistEntry(pstate, dexpr,
targetlist, DISTINCT_ON_CLAUSE);
+ sortgroupref = assignSortGroupRef(tle, *targetlist);
+ refnos = bms_add_member(refnos, sortgroupref);
+ }
- if (nextsortlist != NULL)
- {
- SortClause *scl = (SortClause *) lfirst(nextsortlist);
+ /*
+ * If the user writes both DISTINCT ON and ORDER BY, adopt the
+ * sorting semantics from ORDER BY items that match DISTINCT ON
+ * items, and also adopt their column sort order. We insist that
+ * the distinctClause and sortClause match, so throw error if we
+ * find the need to add any more distinctClause items after we've
+ * skipped an ORDER BY item that wasn't in DISTINCT ON.
+ */
+ skipped_sortitem = false;
+ foreach(slitem, sortClause)
+ {
+ SortClause *scl = (SortClause *) lfirst(slitem);
- if (tle->ressortgroupref != scl->tleSortGroupRef)
+ if (bms_is_member(scl->tleSortGroupRef, refnos))
+ {
+ if (skipped_sortitem)
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
- result = lappend(result, copyObject(scl));
- nextsortlist = lnext(nextsortlist);
+ else
+ result = lappend(result, copyObject(scl));
}
else
{
- *sortClause = addTargetToSortList(pstate, tle,
- *sortClause, *targetlist,
- SORTBY_DEFAULT,
- SORTBY_NULLS_DEFAULT,
- NIL, true);
+ skipped_sortitem = true;
+ }
+ }
- /*
- * Probably, the tle should always have been added at the end
- * of the sort list ... but search to be safe.
- */
- foreach(slitem, *sortClause)
- {
- SortClause *scl = (SortClause *) lfirst(slitem);
+ /*
+ * Now add any remaining DISTINCT ON items, using default sorting
+ * semantics for their data types. (Note: this is pretty
+ * questionable; if the ORDER BY list doesn't include all the DISTINCT
+ * ON items and more besides, you certainly aren't using DISTINCT ON
+ * in the intended way, and you probably aren't going to get
+ * consistent results. It might be better to throw an error or warning
+ * here. But historically we've allowed it, so keep doing so.)
+ */
+ while ((sortgroupref = bms_first_member(refnos)) >= 0)
+ {
+ TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist);
- if (tle->ressortgroupref == scl->tleSortGroupRef)
- {
- result = lappend(result, copyObject(scl));
- break;
- }
- }
- if (slitem == NULL) /* should not happen */
- elog(ERROR, "failed to add DISTINCT ON clause to target list");
- }
+ if (targetIsInSortList(tle, InvalidOid, result))
+ continue; /* already in list (with some semantics) */
+ if (skipped_sortitem)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
+ result = addTargetToSortList(pstate, tle,
+ result, *targetlist,
+ SORTBY_DEFAULT,
+ SORTBY_NULLS_DEFAULT,
+ NIL, true);
}
}
@@ -1576,38 +1611,8 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
}
/*
- * addAllTargetsToSortList
- * Make sure all non-resjunk targets in the targetlist are in the
- * ORDER BY list, adding the not-yet-sorted ones to the end of the list.
- * This is typically used to help implement SELECT DISTINCT.
- *
- * See addTargetToSortList for info about pstate and resolveUnknown inputs.
- *
- * Returns the updated ORDER BY list.
- */
-List *
-addAllTargetsToSortList(ParseState *pstate, List *sortlist,
- List *targetlist, bool resolveUnknown)
-{
- ListCell *l;
-
- foreach(l, targetlist)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(l);
-
- if (!tle->resjunk)
- sortlist = addTargetToSortList(pstate, tle,
- sortlist, targetlist,
- SORTBY_DEFAULT,
- SORTBY_NULLS_DEFAULT,
- NIL, resolveUnknown);
- }
- return sortlist;
-}
-
-/*
* addTargetToSortList
- * If the given targetlist entry isn't already in the ORDER BY list,
+ * If the given targetlist entry isn't already in the SortClause list,
* add it to the end of the list, using the given sort ordering info.
*
* If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not,
@@ -1615,7 +1620,7 @@ addAllTargetsToSortList(ParseState *pstate, List *sortlist,
* pstate should be provided if resolveUnknown is TRUE, but can be NULL
* otherwise.
*
- * Returns the updated ORDER BY list.
+ * Returns the updated SortClause list.
*/
List *
addTargetToSortList(ParseState *pstate, TargetEntry *tle,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index e6de4b49c85..fa78b4b188b 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.368 2008/07/18 03:32:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.369 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -613,14 +613,19 @@ typedef struct RangeTblEntry
* tleSortGroupRef must match ressortgroupref of exactly one entry of the
* associated targetlist; that is the expression to be sorted (or grouped) by.
* sortop is the OID of the ordering operator (a "<" or ">" operator).
- * nulls_first does about what you'd expect.
+ * nulls_first means about what you'd expect.
*
* SortClauses are also used to identify targets that we will do a "Unique"
* filter step on (for SELECT DISTINCT and SELECT DISTINCT ON). The
- * distinctClause list is simply a copy of the relevant members of the
- * sortClause list. Note that distinctClause can be a subset of sortClause,
- * but cannot have members not present in sortClause; and the members that
- * do appear must be in the same order as in sortClause.
+ * distinctClause list is a list of SortClauses for the expressions to be
+ * unique-ified. (As per comment for GroupClause, this overspecifies the
+ * semantics.) In SELECT DISTINCT, the distinctClause list is typically
+ * longer than the ORDER BY list, while in SELECT DISTINCT ON it's typically
+ * shorter. The two lists must match up to the end of the shorter one ---
+ * the parser rearranges the distinctClause if necessary to make this true.
+ * (This restriction ensures that only one sort step is needed to both
+ * satisfy the ORDER BY and set up for the Unique step. This is semantically
+ * necessary for DISTINCT ON, and offers no real drawback for DISTINCT.)
*/
typedef struct SortClause
{
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index f2a6bf01ba8..6fc357522e7 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.49 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.50 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -31,11 +31,8 @@ extern List *transformGroupClause(ParseState *pstate, List *grouplist,
extern List *transformSortClause(ParseState *pstate, List *orderlist,
List **targetlist, bool resolveUnknown);
extern List *transformDistinctClause(ParseState *pstate, List *distinctlist,
- List **targetlist, List **sortClause);
+ List **targetlist, List *sortClause);
-extern List *addAllTargetsToSortList(ParseState *pstate,
- List *sortlist, List *targetlist,
- bool resolveUnknown);
extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
List *sortlist, List *targetlist,
SortByDir sortby_dir, SortByNulls sortby_nulls,