diff options
Diffstat (limited to 'src/backend/parser/parse_clause.c')
-rw-r--r-- | src/backend/parser/parse_clause.c | 199 |
1 files changed, 102 insertions, 97 deletions
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index f532b26d374..d82573fd03d 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.170 2008/06/19 00:46:05 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.171 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1454,16 +1454,23 @@ transformSortClause(ParseState *pstate, * transformDistinctClause - * transform a DISTINCT or DISTINCT ON clause * - * Since we may need to add items to the query's sortClause list, that list - * is passed by reference. Likewise for the targetlist. + * Since we may need to add items to the query's targetlist, that list + * is passed by reference. + * + * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as + * possible into the distinctClause. This avoids a possible need to re-sort, + * and allows the user to determine the equality semantics used by DISTINCT, + * should she be working with a datatype that has more than one btree equality + * operator. */ List * transformDistinctClause(ParseState *pstate, List *distinctlist, - List **targetlist, List **sortClause) + List **targetlist, List *sortClause) { List *result = NIL; ListCell *slitem; ListCell *dlitem; + ListCell *tlitem; /* No work if there was no DISTINCT clause */ if (distinctlist == NIL) @@ -1474,25 +1481,21 @@ transformDistinctClause(ParseState *pstate, List *distinctlist, /* We had SELECT DISTINCT */ /* - * All non-resjunk elements from target list that are not already in - * the sort list should be added to it. (We don't really care what - * order the DISTINCT fields are checked in, so we can leave the - * user's ORDER BY spec alone, and just add additional sort keys to it - * to ensure that all targetlist items get sorted.) - */ - *sortClause = addAllTargetsToSortList(pstate, - *sortClause, - *targetlist, - true); - - /* - * Now, DISTINCT list consists of all non-resjunk sortlist items. - * Actually, all the sortlist items had better be non-resjunk! - * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that - * does not appear anywhere in the SELECT targetlist, and we can't - * implement that with only one sorting pass... + * The distinctClause should consist of all ORDER BY items followed + * by all other non-resjunk targetlist items. There must not be any + * resjunk ORDER BY items --- that would imply that we are sorting + * by a value that isn't necessarily unique within a DISTINCT group, + * so the results wouldn't be well-defined. This construction + * ensures we follow the rule that sortClause and distinctClause match; + * in fact the sortClause will always be a prefix of distinctClause. + * + * Note a corner case: the same TLE could be in the ORDER BY list + * multiple times with different sortops. We have to include it in + * the distinctClause the same way to preserve the prefix property. + * The net effect will be that the TLE value will be made unique + * according to both sortops. */ - foreach(slitem, *sortClause) + foreach(slitem, sortClause) { SortClause *scl = (SortClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist); @@ -1504,71 +1507,103 @@ transformDistinctClause(ParseState *pstate, List *distinctlist, else result = lappend(result, copyObject(scl)); } + + /* + * Now add any remaining non-resjunk tlist items, using default + * sorting semantics for their data types. + */ + foreach(tlitem, *targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(tlitem); + + if (tle->resjunk) + continue; /* ignore junk */ + if (targetIsInSortList(tle, InvalidOid, result)) + continue; /* already in list (with some semantics) */ + result = addTargetToSortList(pstate, tle, + result, *targetlist, + SORTBY_DEFAULT, + SORTBY_NULLS_DEFAULT, + NIL, true); + } } else { /* We had SELECT DISTINCT ON (expr, ...) */ + Bitmapset *refnos = NULL; + int sortgroupref; + bool skipped_sortitem; /* - * If the user writes both DISTINCT ON and ORDER BY, then the two - * expression lists must match (until one or the other runs out). - * Otherwise the ORDER BY requires a different sort order than the - * DISTINCT does, and we can't implement that with only one sort pass - * (and if we do two passes, the results will be rather - * unpredictable). However, it's OK to have more DISTINCT ON - * expressions than ORDER BY expressions; we can just add the extra - * DISTINCT values to the sort list, much as we did above for ordinary - * DISTINCT fields. - * - * Actually, it'd be OK for the common prefixes of the two lists to - * match in any order, but implementing that check seems like more - * trouble than it's worth. + * Add all the DISTINCT ON expressions to the tlist (if not already + * present, they are added as resjunk items). Assign sortgroupref + * numbers to them, and form a bitmapset of these numbers. (A + * bitmapset is convenient here because we don't care about order + * and we can discard duplicates.) */ - ListCell *nextsortlist = list_head(*sortClause); - foreach(dlitem, distinctlist) { + Node *dexpr = (Node *) lfirst(dlitem); TargetEntry *tle; - tle = findTargetlistEntry(pstate, lfirst(dlitem), + tle = findTargetlistEntry(pstate, dexpr, targetlist, DISTINCT_ON_CLAUSE); + sortgroupref = assignSortGroupRef(tle, *targetlist); + refnos = bms_add_member(refnos, sortgroupref); + } - if (nextsortlist != NULL) - { - SortClause *scl = (SortClause *) lfirst(nextsortlist); + /* + * If the user writes both DISTINCT ON and ORDER BY, adopt the + * sorting semantics from ORDER BY items that match DISTINCT ON + * items, and also adopt their column sort order. We insist that + * the distinctClause and sortClause match, so throw error if we + * find the need to add any more distinctClause items after we've + * skipped an ORDER BY item that wasn't in DISTINCT ON. + */ + skipped_sortitem = false; + foreach(slitem, sortClause) + { + SortClause *scl = (SortClause *) lfirst(slitem); - if (tle->ressortgroupref != scl->tleSortGroupRef) + if (bms_is_member(scl->tleSortGroupRef, refnos)) + { + if (skipped_sortitem) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); - result = lappend(result, copyObject(scl)); - nextsortlist = lnext(nextsortlist); + else + result = lappend(result, copyObject(scl)); } else { - *sortClause = addTargetToSortList(pstate, tle, - *sortClause, *targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, true); + skipped_sortitem = true; + } + } - /* - * Probably, the tle should always have been added at the end - * of the sort list ... but search to be safe. - */ - foreach(slitem, *sortClause) - { - SortClause *scl = (SortClause *) lfirst(slitem); + /* + * Now add any remaining DISTINCT ON items, using default sorting + * semantics for their data types. (Note: this is pretty + * questionable; if the ORDER BY list doesn't include all the DISTINCT + * ON items and more besides, you certainly aren't using DISTINCT ON + * in the intended way, and you probably aren't going to get + * consistent results. It might be better to throw an error or warning + * here. But historically we've allowed it, so keep doing so.) + */ + while ((sortgroupref = bms_first_member(refnos)) >= 0) + { + TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist); - if (tle->ressortgroupref == scl->tleSortGroupRef) - { - result = lappend(result, copyObject(scl)); - break; - } - } - if (slitem == NULL) /* should not happen */ - elog(ERROR, "failed to add DISTINCT ON clause to target list"); - } + if (targetIsInSortList(tle, InvalidOid, result)) + continue; /* already in list (with some semantics) */ + if (skipped_sortitem) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); + result = addTargetToSortList(pstate, tle, + result, *targetlist, + SORTBY_DEFAULT, + SORTBY_NULLS_DEFAULT, + NIL, true); } } @@ -1576,38 +1611,8 @@ transformDistinctClause(ParseState *pstate, List *distinctlist, } /* - * addAllTargetsToSortList - * Make sure all non-resjunk targets in the targetlist are in the - * ORDER BY list, adding the not-yet-sorted ones to the end of the list. - * This is typically used to help implement SELECT DISTINCT. - * - * See addTargetToSortList for info about pstate and resolveUnknown inputs. - * - * Returns the updated ORDER BY list. - */ -List * -addAllTargetsToSortList(ParseState *pstate, List *sortlist, - List *targetlist, bool resolveUnknown) -{ - ListCell *l; - - foreach(l, targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (!tle->resjunk) - sortlist = addTargetToSortList(pstate, tle, - sortlist, targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, resolveUnknown); - } - return sortlist; -} - -/* * addTargetToSortList - * If the given targetlist entry isn't already in the ORDER BY list, + * If the given targetlist entry isn't already in the SortClause list, * add it to the end of the list, using the given sort ordering info. * * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not, @@ -1615,7 +1620,7 @@ addAllTargetsToSortList(ParseState *pstate, List *sortlist, * pstate should be provided if resolveUnknown is TRUE, but can be NULL * otherwise. * - * Returns the updated ORDER BY list. + * Returns the updated SortClause list. */ List * addTargetToSortList(ParseState *pstate, TargetEntry *tle, |