11 files changed, 197 insertions, 179 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 9a9aba2a417..ba18db0dd97 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.46 2008/04/09 01:00:46 momjian Exp $
+$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.47 2008/08/02 21:31:59 tgl Exp $
 
 Optimizer
 =========
@@ -563,8 +563,8 @@ competing Paths are equivalently sorted.
 
 Pathkeys are also useful to represent an ordering that we wish to achieve,
 since they are easily compared to the pathkeys of a potential candidate
-path.  So, SortClause lists are turned into pathkeys lists for use inside
-the optimizer.
+path.  So, SortGroupClause lists are turned into pathkeys lists for use
+inside the optimizer.
 
 Because we have to generate pathkeys lists from the sort clauses before
 we've finished EquivalenceClass merging, we cannot use the pointer-equality
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index c8335868133..b13371e00eb 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.171 2008/06/27 03:56:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.172 2008/08/02 21:31:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -963,11 +963,12 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
  *
  * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
  * refer to non-DISTINCT output columns, because that could change the set
- * of rows returned.  This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, but unfortunately it's fairly
- * expensive to tell the difference between DISTINCT and DISTINCT ON in the
- * parsetree representation.  It's cheaper to just make sure all the Vars
- * in the qual refer to DISTINCT columns.
+ * of rows returned.  (This condition is vacuous for DISTINCT, because then
+ * there are no non-DISTINCT output columns, so we needn't check.  But note
+ * we are assuming that the qual can't distinguish values that the DISTINCT
+ * operator sees as equal.  This is a bit shaky but we have no way to test
+ * for the case, and it's unlikely enough that we shouldn't refuse the
+ * optimization just because it could theoretically happen.)
  *
  * 5. We must not push down any quals that refer to subselect outputs that
  * return sets, else we'd introduce functions-returning-sets into the
@@ -1030,8 +1031,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
 		Assert(tle != NULL);
 		Assert(!tle->resjunk);
 
-		/* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
-		if (subquery->distinctClause != NIL &&
+		/* If subquery uses DISTINCT ON, check point 4 */
+		if (subquery->hasDistinctOn &&
 			!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
 		{
 			/* non-DISTINCT column, so fail */
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b289ea6e65b..c4459e6e3eb 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.10 2008/03/31 16:59:26 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.11 2008/08/02 21:31:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -355,7 +355,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
  *	  class it is a member of; if none, build a new single-member
  *	  EquivalenceClass for it.
  *
- * sortref is the SortGroupRef of the originating SortClause, if any,
+ * sortref is the SortGroupRef of the originating SortGroupClause, if any,
  * or zero if not.
  *
  * This can be used safely both before and after EquivalenceClass merging;
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index f0141b23668..ccb23834de0 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.94 2008/08/02 21:31:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -227,8 +227,8 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
  *	  a PathKey.  If canonicalize = true, the result is a "canonical"
  *	  PathKey, otherwise not.  (But note it might be redundant anyway.)
  *
- * If the PathKey is being generated from a SortClause, sortref should be
- * the SortClause's SortGroupRef; otherwise zero.
+ * If the PathKey is being generated from a SortGroupClause, sortref should be
+ * the SortGroupClause's SortGroupRef; otherwise zero.
  *
  * canonicalize should always be TRUE after EquivalenceClass merging has
  * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
@@ -823,7 +823,7 @@ build_join_pathkeys(PlannerInfo *root,
 /*
  * make_pathkeys_for_sortclauses
  *		Generate a pathkeys list that represents the sort order specified
- *		by a list of SortClauses (GroupClauses will work too!)
+ *		by a list of SortGroupClauses
  *
  * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
  * otherwise not.  canonicalize should always be TRUE after EquivalenceClass
@@ -832,7 +832,7 @@ build_join_pathkeys(PlannerInfo *root,
  * be able to represent requested pathkeys before the equivalence classes have
  * been created for the query.)
  *
- * 'sortclauses' is a list of SortClause or GroupClause nodes
+ * 'sortclauses' is a list of SortGroupClause nodes
  * 'tlist' is the targetlist to find the referenced tlist entries in
  */
 List *
@@ -846,11 +846,12 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 
 	foreach(l, sortclauses)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Expr	   *sortkey;
 		PathKey    *pathkey;
 
 		sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+		Assert(OidIsValid(sortcl->sortop));
 		pathkey = make_pathkey_from_sortinfo(root,
 											 sortkey,
 											 sortcl->sortop,
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 0884cda6070..80b6ed2edfe 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.241 2008/06/27 03:56:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.242 2008/08/02 21:32:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -760,7 +760,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 			Oid			in_oper = lfirst_oid(l);
 			Oid			sortop;
 			TargetEntry *tle;
-			SortClause *sortcl;
+			SortGroupClause *sortcl;
 
 			sortop = get_ordering_op_for_equality_op(in_oper, false);
 			if (!OidIsValid(sortop))	/* shouldn't happen */
@@ -769,9 +769,10 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 			tle = get_tle_by_resno(subplan->targetlist,
 								   groupColIdx[groupColPos]);
 			Assert(tle != NULL);
-			sortcl = makeNode(SortClause);
+			sortcl = makeNode(SortGroupClause);
 			sortcl->tleSortGroupRef = assignSortGroupRef(tle,
 														 subplan->targetlist);
+			sortcl->eqop = in_oper;
 			sortcl->sortop = sortop;
 			sortcl->nulls_first = false;
 			sortList = lappend(sortList, sortcl);
@@ -2531,6 +2532,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first,
 {
 	int			i;
 
+	Assert(OidIsValid(sortOp));
+
 	for (i = 0; i < numCols; i++)
 	{
 		/*
@@ -2753,7 +2756,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys,
  * make_sort_from_sortclauses
  *	  Create sort plan to sort according to given sortclauses
  *
- *	  'sortcls' is a list of SortClauses
+ *	  'sortcls' is a list of SortGroupClauses
  *	  'lefttree' is the node which yields input tuples
  */
 Sort *
@@ -2778,7 +2781,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
 
 	foreach(l, sortcls)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist);
 
 		/*
@@ -2802,14 +2805,14 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
  * make_sort_from_groupcols
  *	  Create sort plan to sort based on grouping columns
  *
- * 'groupcls' is the list of GroupClauses
+ * 'groupcls' is the list of SortGroupClauses
  * 'grpColIdx' gives the column numbers to use
  *
  * This might look like it could be merged with make_sort_from_sortclauses,
  * but presently we *must* use the grpColIdx[] array to locate sort columns,
  * because the child plan's tlist is not marked with ressortgroupref info
  * appropriate to the grouping node.  So, only the sort ordering info
- * is used from the GroupClause entries.
+ * is used from the SortGroupClause entries.
  */
 Sort *
 make_sort_from_groupcols(PlannerInfo *root,
@@ -2837,7 +2840,7 @@ make_sort_from_groupcols(PlannerInfo *root,
 
 	foreach(l, groupcls)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(l);
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(l);
 		TargetEntry *tle = get_tle_by_resno(sub_tlist, grpColIdx[grpno]);
 
 		/*
@@ -3038,7 +3041,7 @@ make_group(PlannerInfo *root,
 }
 
 /*
- * distinctList is a list of SortClauses, identifying the targetlist items
+ * distinctList is a list of SortGroupClauses, identifying the targetlist items
  * that should be considered by the Unique filter.	The input path must
  * already be sorted accordingly.
  */
@@ -3074,7 +3077,7 @@ make_unique(Plan *lefttree, List *distinctList)
 	plan->righttree = NULL;
 
 	/*
-	 * convert SortClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and equality
 	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
@@ -3083,14 +3086,12 @@ make_unique(Plan *lefttree, List *distinctList)
 
 	foreach(slitem, distinctList)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(slitem);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 
 		uniqColIdx[keyno] = tle->resno;
-		uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
-		if (!OidIsValid(uniqOperators[keyno]))	/* shouldn't happen */
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 sortcl->sortop);
+		uniqOperators[keyno] = sortcl->eqop;
+		Assert(OidIsValid(uniqOperators[keyno]));
 		keyno++;
 	}
 
@@ -3102,8 +3103,8 @@ make_unique(Plan *lefttree, List *distinctList)
 }
 
 /*
- * distinctList is a list of SortClauses, identifying the targetlist items
- * that should be considered by the SetOp filter.  The input path must
+ * distinctList is a list of SortGroupClauses, identifying the targetlist
+ * items that should be considered by the SetOp filter.  The input path must
  * already be sorted accordingly.
  */
 SetOp *
@@ -3140,7 +3141,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
 	plan->righttree = NULL;
 
 	/*
-	 * convert SortClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and equality
 	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
@@ -3149,14 +3150,12 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
 
 	foreach(slitem, distinctList)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(slitem);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 
 		dupColIdx[keyno] = tle->resno;
-		dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
-		if (!OidIsValid(dupOperators[keyno]))	/* shouldn't happen */
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 sortcl->sortop);
+		dupOperators[keyno] = sortcl->eqop;
+		Assert(OidIsValid(dupOperators[keyno]));
 		keyno++;
 	}
 
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index af140d4acb5..a6b4b1df661 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.41 2008/07/10 02:14:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.42 2008/08/02 21:32:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -477,7 +477,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	Plan	   *plan;
 	Plan	   *iplan;
 	TargetEntry *tle;
-	SortClause *sortcl;
+	SortGroupClause *sortcl;
 
 	/*
 	 * Generate a suitably modified query.	Much of the work here is probably
@@ -492,6 +492,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	subparse->utilityStmt = NULL;
 	subparse->intoClause = NULL;
 	subparse->hasAggs = false;
+	subparse->hasDistinctOn = false;
 	subparse->groupClause = NIL;
 	subparse->havingQual = NULL;
 	subparse->distinctClause = NIL;
@@ -505,8 +506,12 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	subparse->targetList = list_make1(tle);
 
 	/* set up the appropriate ORDER BY entry */
-	sortcl = makeNode(SortClause);
+	sortcl = makeNode(SortGroupClause);
 	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
+	sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL);
+	if (!OidIsValid(sortcl->eqop))		/* shouldn't happen */
+		elog(ERROR, "could not find equality operator for ordering operator %u",
+			 info->aggsortop);
 	sortcl->sortop = info->aggsortop;
 	sortcl->nulls_first = info->nulls_first;
 	subparse->sortClause = list_make1(sortcl);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 6ebbc5b7bc3..735a77ac4e1 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,6 +67,7 @@ static bool is_dummy_plan(Plan *plan);
 static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
+static void preprocess_groupclause(PlannerInfo *root);
 static Oid *extract_grouping_ops(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root,
 					   double tuple_fraction, double limit_tuples,
@@ -846,11 +847,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		Path	   *best_path;
 		long		numGroups = 0;
 		AggClauseCounts agg_counts;
-		int			numGroupCols = list_length(parse->groupClause);
+		int			numGroupCols;
 		bool		use_hashed_grouping = false;
 
 		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
 
+		/* Preprocess GROUP BY clause, if any */
+		if (parse->groupClause)
+			preprocess_groupclause(root);
+		numGroupCols = list_length(parse->groupClause);
+
 		/* Preprocess targetlist */
 		tlist = preprocess_targetlist(root, tlist);
 
@@ -1476,6 +1482,88 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
 	return tuple_fraction;
 }
 
+
+/*
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ *
+ * In principle it might be interesting to consider other orderings of the
+ * GROUP BY elements, which could match the sort ordering of other
+ * possible plans (eg an indexscan) and thereby reduce cost.  We don't
+ * bother with that, though.  Hashed grouping will frequently win anyway.
+ */
+static void
+preprocess_groupclause(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+	List	   *new_groupclause;
+	bool		partial_match;
+	ListCell   *sl;
+	ListCell   *gl;
+
+	/* If no ORDER BY, nothing useful to do here anyway */
+	if (parse->sortClause == NIL)
+		return;
+
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	new_groupclause = NIL;
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
+
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+
+	/* Did we match all of the ORDER BY list, or just some of it? */
+	partial_match = (sl != NULL);
+
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return;
+
+	/*
+	 * Add any remaining GROUP BY items to the new list, but only if we
+	 * were able to make a complete match.  In other words, we only
+	 * rearrange the GROUP BY list if the result is that one list is a
+	 * prefix of the other --- otherwise there's no possibility of a
+	 * common sort.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (partial_match)
+			return;				/* give up, no common sort possible */
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
+	parse->groupClause = new_groupclause;
+}
+
 /*
  * extract_grouping_ops - make an array of the equality operator OIDs
  *		for the GROUP BY clause
@@ -1492,12 +1580,10 @@ extract_grouping_ops(List *groupClause)
 
 	foreach(glitem, groupClause)
 	{
-		GroupClause *groupcl = (GroupClause *) lfirst(glitem);
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
 
-		groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
-		if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 groupcl->sortop);
+		groupOperators[colno] = groupcl->eqop;
+		Assert(OidIsValid(groupOperators[colno]));
 		colno++;
 	}
 
@@ -1738,7 +1824,7 @@ make_subplanTargetList(PlannerInfo *root,
 
 		foreach(gl, parse->groupClause)
 		{
-			GroupClause *grpcl = (GroupClause *) lfirst(gl);
+			SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 			Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
 			TargetEntry *te = NULL;
 			ListCell   *sl;
@@ -1797,7 +1883,7 @@ locate_grouping_columns(PlannerInfo *root,
 
 	foreach(gl, root->parse->groupClause)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(gl);
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 		Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
 		TargetEntry *te = NULL;
 		ListCell   *sl;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index b30cedee9fc..31ba005edb7 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.148 2008/07/31 22:47:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -98,7 +98,7 @@ static List *adjust_inherited_tlist(List *tlist,
  * zero means "all the tuples will be fetched".  Any LIMIT present at the
  * top level has already been factored into tuple_fraction.
  *
- * *sortClauses is an output argument: it is set to a list of SortClauses
+ * *sortClauses is an output argument: it is set to a list of SortGroupClauses
  * representing the result ordering of the topmost set operation.
  */
 Plan *
@@ -152,7 +152,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction,
  * junkOK: if true, child resjunk columns may be left in the result
  * flag: if >= 0, add a resjunk output column indicating value of flag
  * refnames_tlist: targetlist to take column names from
- * *sortClauses: receives list of SortClauses for result plan, if any
+ * *sortClauses: receives list of SortGroupClauses for result plan, if any
  *
  * We don't have to care about typmods here: the only allowed difference
  * between set-op input and output typmods is input is a specific typmod
@@ -678,8 +678,11 @@ generate_append_tlist(List *colTypes, bool flag,
 
 /*
  * generate_setop_sortlist
- *		Build a SortClause list enumerating all the non-resjunk tlist entries,
- *		using default ordering properties.
+ *		Build a SortGroupClause list enumerating all the non-resjunk
+ *		tlist entries, using default ordering properties.
+ *
+ * For now, we require all the items to be sortable.  Eventually we
+ * should implement hashing setops and allow hash-only datatypes.
  */
 static List *
 generate_setop_sortlist(List *targetlist)
@@ -692,11 +695,10 @@ generate_setop_sortlist(List *targetlist)
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 
 		if (!tle->resjunk)
-			sortlist = addTargetToSortList(NULL, tle,
-										   sortlist, targetlist,
-										   SORTBY_DEFAULT,
-										   SORTBY_NULLS_DEFAULT,
-										   NIL, false);
+			sortlist = addTargetToGroupList(NULL, tle,
+											sortlist, targetlist,
+											true, /* XXX fixme someday */
+											false);
 	}
 	return sortlist;
 }
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index ea924222792..04ef2224c91 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.259 2008/05/15 17:37:49 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -1335,85 +1335,6 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids)
 
 
 /*****************************************************************************
- *		Tests on clauses of queries
- *
- * Possibly this code should go someplace else, since this isn't quite the
- * same meaning of "clause" as is used elsewhere in this module.  But I can't
- * think of a better place for it...
- *****************************************************************************/
-
-/*
- * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is
- * not the same as the set of output columns.
- */
-bool
-has_distinct_on_clause(Query *query)
-{
-	ListCell   *l;
-
-	/* Is there a DISTINCT clause at all? */
-	if (query->distinctClause == NIL)
-		return false;
-
-	/*
-	 * If the DISTINCT list contains all the nonjunk targetlist items, and
-	 * nothing else (ie, no junk tlist items), then it's a simple DISTINCT,
-	 * else it's DISTINCT ON.  We do not require the lists to be in the same
-	 * order (since the parser may have adjusted the DISTINCT clause ordering
-	 * to agree with ORDER BY).  Furthermore, a non-DISTINCT junk tlist item
-	 * that is in the sortClause is also evidence of DISTINCT ON, since we
-	 * don't allow ORDER BY on junk tlist items when plain DISTINCT is used.
-	 *
-	 * This code assumes that the DISTINCT list is valid, ie, all its entries
-	 * match some entry of the tlist.
-	 */
-	foreach(l, query->targetList)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
-
-		if (tle->ressortgroupref == 0)
-		{
-			if (tle->resjunk)
-				continue;		/* we can ignore unsorted junk cols */
-			return true;		/* definitely not in DISTINCT list */
-		}
-		if (targetIsInSortList(tle, InvalidOid, query->distinctClause))
-		{
-			if (tle->resjunk)
-				return true;	/* junk TLE in DISTINCT means DISTINCT ON */
-			/* else this TLE is okay, keep looking */
-		}
-		else
-		{
-			/* This TLE is not in DISTINCT list */
-			if (!tle->resjunk)
-				return true;	/* non-junk, non-DISTINCT, so DISTINCT ON */
-			if (targetIsInSortList(tle, InvalidOid, query->sortClause))
-				return true;	/* sorted, non-distinct junk */
-			/* unsorted junk is okay, keep looking */
-		}
-	}
-	/* It's a simple DISTINCT */
-	return false;
-}
-
-/*
- * Test whether a query uses simple DISTINCT, ie, has a distinct-list that
- * is the same as the set of output columns.
- */
-bool
-has_distinct_clause(Query *query)
-{
-	/* Is there a DISTINCT clause at all? */
-	if (query->distinctClause == NIL)
-		return false;
-
-	/* It's DISTINCT if it's not DISTINCT ON */
-	return !has_distinct_on_clause(query);
-}
-
-
-/*****************************************************************************
  *																			 *
  *		General clause-manipulating routines								 *
  *																			 *
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index c5b6c2ed610..ad3c10ac27b 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -935,8 +935,10 @@ translate_sub_tlist(List *tlist, int relid)
  * corresponding upper-level equality operators listed in opids would think
  * the values are distinct.  (Note: the opids entries could be cross-type
  * operators, and thus not exactly the equality operators that the subquery
- * would use itself.  We assume that the subquery is compatible if these
- * operators appear in the same btree opfamily as the ones the subquery uses.)
+ * would use itself.  We use equality_ops_are_compatible() to check
+ * compatibility.  That looks at btree or hash opfamily membership, and so
+ * should give trustworthy answers for all operators that we might need
+ * to deal with here.)
  */
 static bool
 query_is_distinct_for(Query *query, List *colnos, List *opids)
@@ -955,13 +957,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	{
 		foreach(l, query->distinctClause)
 		{
-			SortClause *scl = (SortClause *) lfirst(l);
-			TargetEntry *tle = get_sortgroupclause_tle(scl,
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
+			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);
 
 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
-				!ops_in_same_btree_opfamily(opid, scl->sortop))
+				!equality_ops_are_compatible(opid, sgc->eqop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -976,13 +978,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	{
 		foreach(l, query->groupClause)
 		{
-			GroupClause *grpcl = (GroupClause *) lfirst(l);
-			TargetEntry *tle = get_sortgroupclause_tle(grpcl,
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
+			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);
 
 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
-				!ops_in_same_btree_opfamily(opid, grpcl->sortop))
+				!equality_ops_are_compatible(opid, sgc->eqop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -1002,10 +1004,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
 	 * except with ALL.
 	 *
-	 * XXX this code knows that prepunion.c will adopt the default ordering
-	 * operator for each column datatype as the sortop.  It'd probably be
-	 * better if these operators were chosen at parse time and stored into the
-	 * parsetree, instead of leaving bits of the planner to decide semantics.
+	 * XXX this code knows that prepunion.c will adopt the default sort/group
+	 * operators for each column datatype to determine uniqueness.  It'd
+	 * probably be better if these operators were chosen at parse time and
+	 * stored into the parsetree, instead of leaving bits of the planner to
+	 * decide semantics.
 	 */
 	if (query->setOperations)
 	{
@@ -1020,14 +1023,20 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 			foreach(l, query->targetList)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(l);
+				Oid		tle_eq_opr;
 
 				if (tle->resjunk)
 					continue;	/* ignore resjunk columns */
 
 				opid = distinct_col_search(tle->resno, colnos, opids);
-				if (!OidIsValid(opid) ||
-					!ops_in_same_btree_opfamily(opid,
-						   ordering_oper_opid(exprType((Node *) tle->expr))))
+				if (!OidIsValid(opid))
+					break;		/* exit early if no match */
+				/* check for compatible semantics */
+				get_sort_group_operators(exprType((Node *) tle->expr),
+										 false, false, false,
+										 NULL, &tle_eq_opr, NULL);
+				if (!OidIsValid(tle_eq_opr) ||
+					!equality_ops_are_compatible(opid, tle_eq_opr))
 					break;		/* exit early if no match */
 			}
 			if (l == NULL)		/* had matches for all? */
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 618d65925c7..fc0880ad56c 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.78 2008/01/01 19:45:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -156,49 +156,43 @@ get_sortgroupref_tle(Index sortref, List *targetList)
 
 /*
  * get_sortgroupclause_tle
- *		Find the targetlist entry matching the given SortClause
- *		(or GroupClause) by ressortgroupref, and return it.
- *
- * Because GroupClause is typedef'd as SortClause, either kind of
- * node can be passed without casting.
+ *		Find the targetlist entry matching the given SortGroupClause
+ *		by ressortgroupref, and return it.
  */
 TargetEntry *
-get_sortgroupclause_tle(SortClause *sortClause,
+get_sortgroupclause_tle(SortGroupClause *sgClause,
 						List *targetList)
 {
-	return get_sortgroupref_tle(sortClause->tleSortGroupRef, targetList);
+	return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList);
 }
 
 /*
  * get_sortgroupclause_expr
- *		Find the targetlist entry matching the given SortClause
- *		(or GroupClause) by ressortgroupref, and return its expression.
- *
- * Because GroupClause is typedef'd as SortClause, either kind of
- * node can be passed without casting.
+ *		Find the targetlist entry matching the given SortGroupClause
+ *		by ressortgroupref, and return its expression.
  */
 Node *
-get_sortgroupclause_expr(SortClause *sortClause, List *targetList)
+get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
 {
-	TargetEntry *tle = get_sortgroupclause_tle(sortClause, targetList);
+	TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList);
 
 	return (Node *) tle->expr;
 }
 
 /*
  * get_sortgrouplist_exprs
- *		Given a list of SortClauses (or GroupClauses), build a list
+ *		Given a list of SortGroupClauses, build a list
  *		of the referenced targetlist expressions.
  */
 List *
-get_sortgrouplist_exprs(List *sortClauses, List *targetList)
+get_sortgrouplist_exprs(List *sgClauses, List *targetList)
 {
 	List	   *result = NIL;
 	ListCell   *l;
 
-	foreach(l, sortClauses)
+	foreach(l, sgClauses)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Node	   *sortexpr;
 
 		sortexpr = get_sortgroupclause_expr(sortcl, targetList);