1 files changed, 102 insertions, 97 deletions
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index f532b26d374..d82573fd03d 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.170 2008/06/19 00:46:05 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.171 2008/07/31 22:47:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1454,16 +1454,23 @@ transformSortClause(ParseState *pstate,
  * transformDistinctClause -
  *	  transform a DISTINCT or DISTINCT ON clause
  *
- * Since we may need to add items to the query's sortClause list, that list
- * is passed by reference.	Likewise for the targetlist.
+ * Since we may need to add items to the query's targetlist, that list
+ * is passed by reference.
+ *
+ * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
+ * possible into the distinctClause.  This avoids a possible need to re-sort,
+ * and allows the user to determine the equality semantics used by DISTINCT,
+ * should she be working with a datatype that has more than one btree equality
+ * operator.
  */
 List *
 transformDistinctClause(ParseState *pstate, List *distinctlist,
-						List **targetlist, List **sortClause)
+						List **targetlist, List *sortClause)
 {
 	List	   *result = NIL;
 	ListCell   *slitem;
 	ListCell   *dlitem;
+	ListCell   *tlitem;
 
 	/* No work if there was no DISTINCT clause */
 	if (distinctlist == NIL)
@@ -1474,25 +1481,21 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
 		/* We had SELECT DISTINCT */
 
 		/*
-		 * All non-resjunk elements from target list that are not already in
-		 * the sort list should be added to it.  (We don't really care what
-		 * order the DISTINCT fields are checked in, so we can leave the
-		 * user's ORDER BY spec alone, and just add additional sort keys to it
-		 * to ensure that all targetlist items get sorted.)
-		 */
-		*sortClause = addAllTargetsToSortList(pstate,
-											  *sortClause,
-											  *targetlist,
-											  true);
-
-		/*
-		 * Now, DISTINCT list consists of all non-resjunk sortlist items.
-		 * Actually, all the sortlist items had better be non-resjunk!
-		 * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that
-		 * does not appear anywhere in the SELECT targetlist, and we can't
-		 * implement that with only one sorting pass...
+		 * The distinctClause should consist of all ORDER BY items followed
+		 * by all other non-resjunk targetlist items.  There must not be any
+		 * resjunk ORDER BY items --- that would imply that we are sorting
+		 * by a value that isn't necessarily unique within a DISTINCT group,
+		 * so the results wouldn't be well-defined.  This construction
+		 * ensures we follow the rule that sortClause and distinctClause match;
+		 * in fact the sortClause will always be a prefix of distinctClause.
+		 *
+		 * Note a corner case: the same TLE could be in the ORDER BY list
+		 * multiple times with different sortops.  We have to include it in
+		 * the distinctClause the same way to preserve the prefix property.
+		 * The net effect will be that the TLE value will be made unique
+		 * according to both sortops.
 		 */
-		foreach(slitem, *sortClause)
+		foreach(slitem, sortClause)
 		{
 			SortClause *scl = (SortClause *) lfirst(slitem);
 			TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
@@ -1504,71 +1507,103 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
 			else
 				result = lappend(result, copyObject(scl));
 		}
+
+		/*
+		 * Now add any remaining non-resjunk tlist items, using default
+		 * sorting semantics for their data types.
+		 */
+		foreach(tlitem, *targetlist)
+		{
+			TargetEntry *tle = (TargetEntry *) lfirst(tlitem);
+
+			if (tle->resjunk)
+				continue;		/* ignore junk */
+			if (targetIsInSortList(tle, InvalidOid, result))
+				continue;		/* already in list (with some semantics) */
+			result = addTargetToSortList(pstate, tle,
+										 result, *targetlist,
+										 SORTBY_DEFAULT,
+										 SORTBY_NULLS_DEFAULT,
+										 NIL, true);
+		}
 	}
 	else
 	{
 		/* We had SELECT DISTINCT ON (expr, ...) */
+		Bitmapset  *refnos = NULL;
+		int			sortgroupref;
+		bool		skipped_sortitem;
 
 		/*
-		 * If the user writes both DISTINCT ON and ORDER BY, then the two
-		 * expression lists must match (until one or the other runs out).
-		 * Otherwise the ORDER BY requires a different sort order than the
-		 * DISTINCT does, and we can't implement that with only one sort pass
-		 * (and if we do two passes, the results will be rather
-		 * unpredictable). However, it's OK to have more DISTINCT ON
-		 * expressions than ORDER BY expressions; we can just add the extra
-		 * DISTINCT values to the sort list, much as we did above for ordinary
-		 * DISTINCT fields.
-		 *
-		 * Actually, it'd be OK for the common prefixes of the two lists to
-		 * match in any order, but implementing that check seems like more
-		 * trouble than it's worth.
+		 * Add all the DISTINCT ON expressions to the tlist (if not already
+		 * present, they are added as resjunk items).  Assign sortgroupref
+		 * numbers to them, and form a bitmapset of these numbers.  (A
+		 * bitmapset is convenient here because we don't care about order
+		 * and we can discard duplicates.)
 		 */
-		ListCell   *nextsortlist = list_head(*sortClause);
-
 		foreach(dlitem, distinctlist)
 		{
+			Node   *dexpr = (Node *) lfirst(dlitem);
 			TargetEntry *tle;
 
-			tle = findTargetlistEntry(pstate, lfirst(dlitem),
+			tle = findTargetlistEntry(pstate, dexpr,
 									  targetlist, DISTINCT_ON_CLAUSE);
+			sortgroupref = assignSortGroupRef(tle, *targetlist);
+			refnos = bms_add_member(refnos, sortgroupref);
+		}
 
-			if (nextsortlist != NULL)
-			{
-				SortClause *scl = (SortClause *) lfirst(nextsortlist);
+		/*
+		 * If the user writes both DISTINCT ON and ORDER BY, adopt the
+		 * sorting semantics from ORDER BY items that match DISTINCT ON
+		 * items, and also adopt their column sort order.  We insist that
+		 * the distinctClause and sortClause match, so throw error if we
+		 * find the need to add any more distinctClause items after we've
+		 * skipped an ORDER BY item that wasn't in DISTINCT ON.
+		 */
+		skipped_sortitem = false;
+		foreach(slitem, sortClause)
+		{
+			SortClause *scl = (SortClause *) lfirst(slitem);
 
-				if (tle->ressortgroupref != scl->tleSortGroupRef)
+			if (bms_is_member(scl->tleSortGroupRef, refnos))
+			{
+				if (skipped_sortitem)
 					ereport(ERROR,
 							(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 							 errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
-				result = lappend(result, copyObject(scl));
-				nextsortlist = lnext(nextsortlist);
+				else
+					result = lappend(result, copyObject(scl));
 			}
 			else
 			{
-				*sortClause = addTargetToSortList(pstate, tle,
-												  *sortClause, *targetlist,
-												  SORTBY_DEFAULT,
-												  SORTBY_NULLS_DEFAULT,
-												  NIL, true);
+				skipped_sortitem = true;
+			}
+		}
 
-				/*
-				 * Probably, the tle should always have been added at the end
-				 * of the sort list ... but search to be safe.
-				 */
-				foreach(slitem, *sortClause)
-				{
-					SortClause *scl = (SortClause *) lfirst(slitem);
+		/*
+		 * Now add any remaining DISTINCT ON items, using default sorting
+		 * semantics for their data types.  (Note: this is pretty
+		 * questionable; if the ORDER BY list doesn't include all the DISTINCT
+		 * ON items and more besides, you certainly aren't using DISTINCT ON
+		 * in the intended way, and you probably aren't going to get
+		 * consistent results.  It might be better to throw an error or warning
+		 * here.  But historically we've allowed it, so keep doing so.)
+		 */
+		while ((sortgroupref = bms_first_member(refnos)) >= 0)
+		{
+			TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist);
 
-					if (tle->ressortgroupref == scl->tleSortGroupRef)
-					{
-						result = lappend(result, copyObject(scl));
-						break;
-					}
-				}
-				if (slitem == NULL)		/* should not happen */
-					elog(ERROR, "failed to add DISTINCT ON clause to target list");
-			}
+			if (targetIsInSortList(tle, InvalidOid, result))
+				continue;		/* already in list (with some semantics) */
+			if (skipped_sortitem)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
+			result = addTargetToSortList(pstate, tle,
+										 result, *targetlist,
+										 SORTBY_DEFAULT,
+										 SORTBY_NULLS_DEFAULT,
+										 NIL, true);
 		}
 	}
 
@@ -1576,38 +1611,8 @@ transformDistinctClause(ParseState *pstate, List *distinctlist,
 }
 
 /*
- * addAllTargetsToSortList
- *		Make sure all non-resjunk targets in the targetlist are in the
- *		ORDER BY list, adding the not-yet-sorted ones to the end of the list.
- *		This is typically used to help implement SELECT DISTINCT.
- *
- * See addTargetToSortList for info about pstate and resolveUnknown inputs.
- *
- * Returns the updated ORDER BY list.
- */
-List *
-addAllTargetsToSortList(ParseState *pstate, List *sortlist,
-						List *targetlist, bool resolveUnknown)
-{
-	ListCell   *l;
-
-	foreach(l, targetlist)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
-
-		if (!tle->resjunk)
-			sortlist = addTargetToSortList(pstate, tle,
-										   sortlist, targetlist,
-										   SORTBY_DEFAULT,
-										   SORTBY_NULLS_DEFAULT,
-										   NIL, resolveUnknown);
-	}
-	return sortlist;
-}
-
-/*
  * addTargetToSortList
- *		If the given targetlist entry isn't already in the ORDER BY list,
+ *		If the given targetlist entry isn't already in the SortClause list,
  *		add it to the end of the list, using the given sort ordering info.
  *
  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
@@ -1615,7 +1620,7 @@ addAllTargetsToSortList(ParseState *pstate, List *sortlist,
  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
  * otherwise.
  *
- * Returns the updated ORDER BY list.
+ * Returns the updated SortClause list.
  */
 List *
 addTargetToSortList(ParseState *pstate, TargetEntry *tle,