aboutsummaryrefslogtreecommitdiff
path: root/src/backend/parser/parse_clause.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/parser/parse_clause.c')
-rw-r--r--src/backend/parser/parse_clause.c199
1 files changed, 102 insertions, 97 deletions
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index f532b26d374..d82573fd03d 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.170 2008/06/19 00:46:05 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.171 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1454,16 +1454,23 @@ transformSortClause(ParseState *pstate,
* transformDistinctClause -
* transform a DISTINCT or DISTINCT ON clause
*
- * Since we may need to add items to the query's sortClause list, that list
- * is passed by reference. Likewise for the targetlist.
+ * Since we may need to add items to the query's targetlist, that list
+ * is passed by reference.
+ *
+ * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
+ * possible into the distinctClause. This avoids a possible need to re-sort,
+ * and allows the user to determine the equality semantics used by DISTINCT,
+ * should she be working with a datatype that has more than one btree equality
+ * operator.
*/
List *
transformDistinctClause(ParseState *pstate, List *distinctlist,
- List **targetlist, List **sortClause)
+ List **targetlist, List *sortClause)
{
List *result = NIL;
ListCell *slitem;
ListCell *dlitem;
+ ListCell *tlitem;
/* No work if there was no DISTINCT clause */
if (distinctlist == NIL)
@@ -1474,25 +1481,21 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
/* We had SELECT DISTINCT */
/*
- * All non-resjunk elements from target list that are not already in
- * the sort list should be added to it. (We don't really care what
- * order the DISTINCT fields are checked in, so we can leave the
- * user's ORDER BY spec alone, and just add additional sort keys to it
- * to ensure that all targetlist items get sorted.)
- */
- *sortClause = addAllTargetsToSortList(pstate,
- *sortClause,
- *targetlist,
- true);
-
- /*
- * Now, DISTINCT list consists of all non-resjunk sortlist items.
- * Actually, all the sortlist items had better be non-resjunk!
- * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that
- * does not appear anywhere in the SELECT targetlist, and we can't
- * implement that with only one sorting pass...
+ * The distinctClause should consist of all ORDER BY items followed
+ * by all other non-resjunk targetlist items. There must not be any
+ * resjunk ORDER BY items --- that would imply that we are sorting
+ * by a value that isn't necessarily unique within a DISTINCT group,
+ * so the results wouldn't be well-defined. This construction
+ * ensures we follow the rule that sortClause and distinctClause match;
+ * in fact the sortClause will always be a prefix of distinctClause.
+ *
+ * Note a corner case: the same TLE could be in the ORDER BY list
+ * multiple times with different sortops. We have to include it in
+ * the distinctClause the same way to preserve the prefix property.
+ * The net effect will be that the TLE value will be made unique
+ * according to both sortops.
*/
- foreach(slitem, *sortClause)
+ foreach(slitem, sortClause)
{
SortClause *scl = (SortClause *) lfirst(slitem);
TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
@@ -1504,71 +1507,103 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
else
result = lappend(result, copyObject(scl));
}
+
+ /*
+ * Now add any remaining non-resjunk tlist items, using default
+ * sorting semantics for their data types.
+ */
+ foreach(tlitem, *targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(tlitem);
+
+ if (tle->resjunk)
+ continue; /* ignore junk */
+ if (targetIsInSortList(tle, InvalidOid, result))
+ continue; /* already in list (with some semantics) */
+ result = addTargetToSortList(pstate, tle,
+ result, *targetlist,
+ SORTBY_DEFAULT,
+ SORTBY_NULLS_DEFAULT,
+ NIL, true);
+ }
}
else
{
/* We had SELECT DISTINCT ON (expr, ...) */
+ Bitmapset *refnos = NULL;
+ int sortgroupref;
+ bool skipped_sortitem;
/*
- * If the user writes both DISTINCT ON and ORDER BY, then the two
- * expression lists must match (until one or the other runs out).
- * Otherwise the ORDER BY requires a different sort order than the
- * DISTINCT does, and we can't implement that with only one sort pass
- * (and if we do two passes, the results will be rather
- * unpredictable). However, it's OK to have more DISTINCT ON
- * expressions than ORDER BY expressions; we can just add the extra
- * DISTINCT values to the sort list, much as we did above for ordinary
- * DISTINCT fields.
- *
- * Actually, it'd be OK for the common prefixes of the two lists to
- * match in any order, but implementing that check seems like more
- * trouble than it's worth.
+ * Add all the DISTINCT ON expressions to the tlist (if not already
+ * present, they are added as resjunk items). Assign sortgroupref
+ * numbers to them, and form a bitmapset of these numbers. (A
+ * bitmapset is convenient here because we don't care about order
+ * and we can discard duplicates.)
*/
- ListCell *nextsortlist = list_head(*sortClause);
-
foreach(dlitem, distinctlist)
{
+ Node *dexpr = (Node *) lfirst(dlitem);
TargetEntry *tle;
- tle = findTargetlistEntry(pstate, lfirst(dlitem),
+ tle = findTargetlistEntry(pstate, dexpr,
targetlist, DISTINCT_ON_CLAUSE);
+ sortgroupref = assignSortGroupRef(tle, *targetlist);
+ refnos = bms_add_member(refnos, sortgroupref);
+ }
- if (nextsortlist != NULL)
- {
- SortClause *scl = (SortClause *) lfirst(nextsortlist);
+ /*
+ * If the user writes both DISTINCT ON and ORDER BY, adopt the
+ * sorting semantics from ORDER BY items that match DISTINCT ON
+ * items, and also adopt their column sort order. We insist that
+ * the distinctClause and sortClause match, so throw error if we
+ * find the need to add any more distinctClause items after we've
+ * skipped an ORDER BY item that wasn't in DISTINCT ON.
+ */
+ skipped_sortitem = false;
+ foreach(slitem, sortClause)
+ {
+ SortClause *scl = (SortClause *) lfirst(slitem);
- if (tle->ressortgroupref != scl->tleSortGroupRef)
+ if (bms_is_member(scl->tleSortGroupRef, refnos))
+ {
+ if (skipped_sortitem)
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
- result = lappend(result, copyObject(scl));
- nextsortlist = lnext(nextsortlist);
+ else
+ result = lappend(result, copyObject(scl));
}
else
{
- *sortClause = addTargetToSortList(pstate, tle,
- *sortClause, *targetlist,
- SORTBY_DEFAULT,
- SORTBY_NULLS_DEFAULT,
- NIL, true);
+ skipped_sortitem = true;
+ }
+ }
- /*
- * Probably, the tle should always have been added at the end
- * of the sort list ... but search to be safe.
- */
- foreach(slitem, *sortClause)
- {
- SortClause *scl = (SortClause *) lfirst(slitem);
+ /*
+ * Now add any remaining DISTINCT ON items, using default sorting
+ * semantics for their data types. (Note: this is pretty
+ * questionable; if the ORDER BY list doesn't include all the DISTINCT
+ * ON items and more besides, you certainly aren't using DISTINCT ON
+ * in the intended way, and you probably aren't going to get
+ * consistent results. It might be better to throw an error or warning
+ * here. But historically we've allowed it, so keep doing so.)
+ */
+ while ((sortgroupref = bms_first_member(refnos)) >= 0)
+ {
+ TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist);
- if (tle->ressortgroupref == scl->tleSortGroupRef)
- {
- result = lappend(result, copyObject(scl));
- break;
- }
- }
- if (slitem == NULL) /* should not happen */
- elog(ERROR, "failed to add DISTINCT ON clause to target list");
- }
+ if (targetIsInSortList(tle, InvalidOid, result))
+ continue; /* already in list (with some semantics) */
+ if (skipped_sortitem)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
+ result = addTargetToSortList(pstate, tle,
+ result, *targetlist,
+ SORTBY_DEFAULT,
+ SORTBY_NULLS_DEFAULT,
+ NIL, true);
}
}
@@ -1576,38 +1611,8 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
}
/*
- * addAllTargetsToSortList
- * Make sure all non-resjunk targets in the targetlist are in the
- * ORDER BY list, adding the not-yet-sorted ones to the end of the list.
- * This is typically used to help implement SELECT DISTINCT.
- *
- * See addTargetToSortList for info about pstate and resolveUnknown inputs.
- *
- * Returns the updated ORDER BY list.
- */
-List *
-addAllTargetsToSortList(ParseState *pstate, List *sortlist,
- List *targetlist, bool resolveUnknown)
-{
- ListCell *l;
-
- foreach(l, targetlist)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(l);
-
- if (!tle->resjunk)
- sortlist = addTargetToSortList(pstate, tle,
- sortlist, targetlist,
- SORTBY_DEFAULT,
- SORTBY_NULLS_DEFAULT,
- NIL, resolveUnknown);
- }
- return sortlist;
-}
-
-/*
* addTargetToSortList
- * If the given targetlist entry isn't already in the ORDER BY list,
+ * If the given targetlist entry isn't already in the SortClause list,
* add it to the end of the list, using the given sort ordering info.
*
* If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not,
@@ -1615,7 +1620,7 @@ addAllTargetsToSortList(ParseState *pstate, List *sortlist,
* pstate should be provided if resolveUnknown is TRUE, but can be NULL
* otherwise.
*
- * Returns the updated ORDER BY list.
+ * Returns the updated SortClause list.
*/
List *
addTargetToSortList(ParseState *pstate, TargetEntry *tle,