5 files changed, 362 insertions, 170 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5b7fde2533a..c818f0ddf10 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.239 2008/08/05 16:03:10 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,10 +68,6 @@ static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
 static void preprocess_groupclause(PlannerInfo *root);
-static Oid *extract_grouping_ops(List *groupClause);
-static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
-static bool grouping_is_sortable(List *groupClause);
-static bool grouping_is_hashable(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root,
 					   double tuple_fraction, double limit_tuples,
 					   Path *cheapest_path, Path *sorted_path,
@@ -784,10 +780,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 		/*
 		 * If there's a top-level ORDER BY, assume we have to fetch all the
-		 * tuples.	This might seem too simplistic given all the hackery below
-		 * to possibly avoid the sort ... but a nonzero tuple_fraction is only
-		 * of use to plan_set_operations() when the setop is UNION ALL, and
-		 * the result of UNION ALL is always unsorted.
+		 * tuples.	This might be too simplistic given all the hackery below
+		 * to possibly avoid the sort; but the odds of accurate estimates
+		 * here are pretty low anyway.
 		 */
 		if (parse->sortClause)
 			tuple_fraction = 0.0;
@@ -818,7 +813,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 */
 		Assert(parse->commandType == CMD_SELECT);
 
-		tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);
+		tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
+										tlist);
 
 		/*
 		 * Can't handle FOR UPDATE/SHARE here (parser should have checked
@@ -1715,100 +1711,6 @@ preprocess_groupclause(PlannerInfo *root)
 }
 
 /*
- * extract_grouping_ops - make an array of the equality operator OIDs
- *		for a SortGroupClause list
- */
-static Oid *
-extract_grouping_ops(List *groupClause)
-{
-	int			numCols = list_length(groupClause);
-	int			colno = 0;
-	Oid		   *groupOperators;
-	ListCell   *glitem;
-
-	groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-		groupOperators[colno] = groupcl->eqop;
-		Assert(OidIsValid(groupOperators[colno]));
-		colno++;
-	}
-
-	return groupOperators;
-}
-
-/*
- * extract_grouping_cols - make an array of the grouping column resnos
- *		for a SortGroupClause list
- */
-static AttrNumber *
-extract_grouping_cols(List *groupClause, List *tlist)
-{
-	AttrNumber *grpColIdx;
-	int			numCols = list_length(groupClause);
-	int			colno = 0;
-	ListCell   *glitem;
-
-	grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-		TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
-
-		grpColIdx[colno++] = tle->resno;
-	}
-
-	return grpColIdx;
-}
-
-/*
- * grouping_is_sortable - is it possible to implement grouping list by sorting?
- *
- * This is easy since the parser will have included a sortop if one exists.
- */
-static bool
-grouping_is_sortable(List *groupClause)
-{
-	ListCell   *glitem;
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-		if (!OidIsValid(groupcl->sortop))
-			return false;
-	}
-	return true;
-}
-
-/*
- * grouping_is_hashable - is it possible to implement grouping list by hashing?
- *
- * We assume hashing is OK if the equality operators are marked oprcanhash.
- * (If there isn't actually a supporting hash function, the executor will
- * complain at runtime; but this is a misdeclaration of the operator, not
- * a system bug.)
- */
-static bool
-grouping_is_hashable(List *groupClause)
-{
-	ListCell   *glitem;
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-		if (!op_hashjoinable(groupcl->eqop))
-			return false;
-	}
-	return true;
-}
-
-/*
  * choose_hashed_grouping - should we use hashed grouping?
  *
  * Note: this is only applied when both alternatives are actually feasible.
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 31ba005edb7..750d59a9515 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,8 +32,12 @@
 #include "access/heapam.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
+#include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
 #include "optimizer/plancat.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
@@ -61,6 +65,13 @@ static List *recurse_union_children(Node *setOp, PlannerInfo *root,
 					   double tuple_fraction,
 					   SetOperationStmt *top_union,
 					   List *refnames_tlist);
+static Plan *make_union_unique(SetOperationStmt *op, Plan *plan,
+				  PlannerInfo *root, double tuple_fraction,
+				  List **sortClauses);
+static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+					Plan *input_plan,
+					double tuple_fraction, double dNumDistinctRows,
+					const char *construct);
 static List *generate_setop_tlist(List *colTypes, int flag,
 					 Index varno,
 					 bool hack_constants,
@@ -69,7 +80,7 @@ static List *generate_setop_tlist(List *colTypes, int flag,
 static List *generate_append_tlist(List *colTypes, bool flag,
 					  List *input_plans,
 					  List *refnames_tlist);
-static List *generate_setop_sortlist(List *targetlist);
+static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
 static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
 						 Index rti);
 static void make_inh_translation_lists(Relation oldrelation,
@@ -99,7 +110,8 @@ static List *adjust_inherited_tlist(List *tlist,
  * top level has already been factored into tuple_fraction.
  *
  * *sortClauses is an output argument: it is set to a list of SortGroupClauses
- * representing the result ordering of the topmost set operation.
+ * representing the result ordering of the topmost set operation.  (This will
+ * be NIL if the output isn't ordered.)
  */
 Plan *
 plan_set_operations(PlannerInfo *root, double tuple_fraction,
@@ -287,8 +299,8 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
 	/*
 	 * If any of my children are identical UNION nodes (same op, all-flag, and
 	 * colTypes) then they can be merged into this node so that we generate
-	 * only one Append and Sort for the lot.  Recurse to find such nodes and
-	 * compute their children's plans.
+	 * only one Append and unique-ification for the lot.  Recurse to find such
+	 * nodes and compute their children's plans.
 	 */
 	planlist = list_concat(recurse_union_children(op->larg, root,
 												  tuple_fraction,
@@ -314,22 +326,12 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
 
 	/*
 	 * For UNION ALL, we just need the Append plan.  For UNION, need to add
-	 * Sort and Unique nodes to produce unique output.
+	 * node(s) to remove duplicates.
 	 */
-	if (!op->all)
-	{
-		List	   *sortList;
-
-		sortList = generate_setop_sortlist(tlist);
-		if (sortList)
-		{
-			plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
-			plan = (Plan *) make_unique(plan, sortList);
-		}
-		*sortClauses = sortList;
-	}
+	if (op->all)
+		*sortClauses = NIL;		/* result of UNION ALL is always unsorted */
 	else
-		*sortClauses = NIL;
+		plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses);
 
 	return plan;
 }
@@ -346,7 +348,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
 			   *rplan,
 			   *plan;
 	List	   *tlist,
-			   *sortList,
+			   *groupList,
 			   *planlist,
 			   *child_sortclauses;
 	SetOpCmd	cmd;
@@ -381,19 +383,24 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
 	 */
 	plan = (Plan *) make_append(planlist, false, tlist);
 
-	/*
-	 * Sort the child results, then add a SetOp plan node to generate the
-	 * correct output.
-	 */
-	sortList = generate_setop_sortlist(tlist);
+	/* Identify the grouping semantics */
+	groupList = generate_setop_grouplist(op, tlist);
 
-	if (sortList == NIL)		/* nothing to sort on? */
+	/* punt if nothing to group on (can this happen?) */
+	if (groupList == NIL)
 	{
 		*sortClauses = NIL;
 		return plan;
 	}
 
-	plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
+	/*
+	 * Decide whether to hash or sort, and add a sort node if needed.
+	 */
+	plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+
+	/*
+	 * Finally, add a SetOp plan node to generate the correct output.
+	 */
 	switch (op->op)
 	{
 		case SETOP_INTERSECT:
@@ -403,14 +410,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
 			cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
 			break;
 		default:
-			elog(ERROR, "unrecognized set op: %d",
-				 (int) op->op);
+			elog(ERROR, "unrecognized set op: %d", (int) op->op);
 			cmd = SETOPCMD_INTERSECT;	/* keep compiler quiet */
 			break;
 	}
-	plan = (Plan *) make_setop(cmd, plan, sortList, list_length(op->colTypes) + 1);
+	plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1);
 
-	*sortClauses = sortList;
+	*sortClauses = groupList;
 
 	return plan;
 }
@@ -466,6 +472,171 @@ recurse_union_children(Node *setOp, PlannerInfo *root,
 }
 
 /*
+ * Add nodes to the given plan tree to unique-ify the result of a UNION.
+ */
+static Plan *
+make_union_unique(SetOperationStmt *op, Plan *plan,
+				  PlannerInfo *root, double tuple_fraction,
+				  List **sortClauses)
+{
+	List	   *groupList;
+	double		dNumDistinctRows;
+	long		numDistinctRows;
+
+	/* Identify the grouping semantics */
+	groupList = generate_setop_grouplist(op, plan->targetlist);
+
+	/* punt if nothing to group on (can this happen?) */
+	if (groupList == NIL)
+	{
+		*sortClauses = NIL;
+		return plan;
+	}
+
+	/*
+	 * XXX for the moment, take the number of distinct groups as being the
+	 * total input size, ie, the worst case.  This is too conservative, but
+	 * we don't want to risk having the hashtable overrun memory; also,
+	 * it's not clear how to get a decent estimate of the true size.  One
+	 * should note as well the propensity of novices to write UNION rather
+	 * than UNION ALL even when they don't expect any duplicates...
+	 */
+	dNumDistinctRows = plan->plan_rows;
+
+	/* Also convert to long int --- but 'ware overflow! */
+	numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+
+	/* Decide whether to hash or sort */
+	if (choose_hashed_setop(root, groupList, plan,
+							tuple_fraction, dNumDistinctRows,
+							"UNION"))
+	{
+		/* Hashed aggregate plan --- no sort needed */
+		plan = (Plan *) make_agg(root,
+								 plan->targetlist,
+								 NIL,
+								 AGG_HASHED,
+								 list_length(groupList),
+								 extract_grouping_cols(groupList,
+													   plan->targetlist),
+								 extract_grouping_ops(groupList),
+								 numDistinctRows,
+								 0,
+								 plan);
+		/* Hashed aggregation produces randomly-ordered results */
+		*sortClauses = NIL;
+	}
+	else
+	{
+		/* Sort and Unique */
+		plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+		plan = (Plan *) make_unique(plan, groupList);
+		plan->plan_rows = dNumDistinctRows;
+		/* We know the sort order of the result */
+		*sortClauses = groupList;
+	}
+
+	return plan;
+}
+
+/*
+ * choose_hashed_setop - should we use hashing for a set operation?
+ */
+static bool
+choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+					Plan *input_plan,
+					double tuple_fraction, double dNumDistinctRows,
+					const char *construct)
+{
+	int			numDistinctCols = list_length(groupClauses);
+	bool		can_sort;
+	bool		can_hash;
+	Size		hashentrysize;
+	List	   *needed_pathkeys;
+	Path		hashed_p;
+	Path		sorted_p;
+
+	/* Check whether the operators support sorting or hashing */
+	can_sort = grouping_is_sortable(groupClauses);
+	can_hash = grouping_is_hashable(groupClauses);
+	if (can_hash && can_sort)
+	{
+		/* we have a meaningful choice to make, continue ... */
+	}
+	else if (can_hash)
+		return true;
+	else if (can_sort)
+		return false;
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 /* translator: %s is UNION, INTERSECT, or EXCEPT */
+				 errmsg("could not implement %s", construct),
+				 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+
+	/* Prefer sorting when enable_hashagg is off */
+	if (!enable_hashagg)
+		return false;
+
+	/*
+	 * Don't do it if it doesn't look like the hashtable will fit into
+	 * work_mem.
+	 */
+	hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
+
+	if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
+		return false;
+
+	/*
+	 * See if the estimated cost is no more than doing it the other way.
+	 *
+	 * We need to consider input_plan + hashagg versus input_plan + sort +
+	 * group.  Note that the actual result plan might involve a SetOp or
+	 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
+	 * should be close enough for our purposes here.
+	 *
+	 * These path variables are dummies that just hold cost fields; we don't
+	 * make actual Paths for these steps.
+	 */
+	cost_agg(&hashed_p, root, AGG_HASHED, 0,
+			 numDistinctCols, dNumDistinctRows,
+			 input_plan->startup_cost, input_plan->total_cost,
+			 input_plan->plan_rows);
+
+	/*
+	 * Now for the sorted case.  Note that the input is *always* unsorted,
+	 * since it was made by appending unrelated sub-relations together.
+	 */
+	sorted_p.startup_cost = input_plan->startup_cost;
+	sorted_p.total_cost = input_plan->total_cost;
+	/* XXX this is more expensive than cost_sort really needs: */
+	needed_pathkeys = make_pathkeys_for_sortclauses(root,
+													groupClauses,
+													input_plan->targetlist,
+													true);
+	cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost,
+			  input_plan->plan_rows, input_plan->plan_width, -1.0);
+	cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
+			   sorted_p.startup_cost, sorted_p.total_cost,
+			   input_plan->plan_rows);
+
+	/*
+	 * Now make the decision using the top-level tuple fraction.  First we
+	 * have to convert an absolute count (LIMIT) into fractional form.
+	 */
+	if (tuple_fraction >= 1.0)
+		tuple_fraction /= dNumDistinctRows;
+
+	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+									  tuple_fraction) < 0)
+	{
+		/* Hashed is cheaper, so use it */
+		return true;
+	}
+	return false;
+}
+
+/*
  * Generate targetlist for a set-operation plan node
  *
  * colTypes: column datatypes for non-junk columns
@@ -677,30 +848,47 @@ generate_append_tlist(List *colTypes, bool flag,
 }
 
 /*
- * generate_setop_sortlist
- *		Build a SortGroupClause list enumerating all the non-resjunk
- *		tlist entries, using default ordering properties.
+ * generate_setop_grouplist
+ *		Build a SortGroupClause list defining the sort/grouping properties
+ *		of the setop's output columns.
  *
- * For now, we require all the items to be sortable.  Eventually we
- * should implement hashing setops and allow hash-only datatypes.
+ * Parse analysis already determined the properties and built a suitable
+ * list, except that the entries do not have sortgrouprefs set because
+ * the parser output representation doesn't include a tlist for each
+ * setop.  So what we need to do here is copy that list and install
+ * proper sortgrouprefs into it and into the targetlist.
  */
 static List *
-generate_setop_sortlist(List *targetlist)
+generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
 {
-	List	   *sortlist = NIL;
-	ListCell   *l;
+	List	   *grouplist = (List *) copyObject(op->groupClauses);
+	ListCell   *lg;
+	ListCell   *lt;
+	Index		refno = 1;
 
-	foreach(l, targetlist)
+	lg = list_head(grouplist);
+	foreach(lt, targetlist)
 	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
+		TargetEntry *tle = (TargetEntry *) lfirst(lt);
+		SortGroupClause *sgc;
 
-		if (!tle->resjunk)
-			sortlist = addTargetToGroupList(NULL, tle,
-											sortlist, targetlist,
-											true, /* XXX fixme someday */
-											false);
+		/* tlist shouldn't have any sortgrouprefs yet */
+		Assert(tle->ressortgroupref == 0);
+
+		if (tle->resjunk)
+			continue;			/* ignore resjunk columns */
+
+		/* non-resjunk columns should have grouping clauses */
+		Assert(lg != NULL);
+		sgc = (SortGroupClause *) lfirst(lg);
+		lg = lnext(lg);
+		Assert(sgc->tleSortGroupRef == 0);
+
+		/* we could use assignSortGroupRef here, but seems a bit silly */
+		sgc->tleSortGroupRef = tle->ressortgroupref = refno++;
 	}
-	return sortlist;
+	Assert(lg == NULL);
+	return grouplist;
 }
 
 
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 04ef2224c91..858d4abcbd8 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -3933,6 +3933,8 @@ expression_tree_walker(Node *node,
 					return true;
 				if (walker(setop->rarg, context))
 					return true;
+
+				/* groupClauses are deemed uninteresting */
 			}
 			break;
 		case T_InClauseInfo:
@@ -4535,6 +4537,7 @@ expression_tree_mutator(Node *node,
 				FLATCOPY(newnode, setop, SetOperationStmt);
 				MUTATE(newnode->larg, setop->larg, Node *);
 				MUTATE(newnode->rarg, setop->rarg, Node *);
+				/* We do not mutate groupClauses by default */
 				return (Node *) newnode;
 			}
 			break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index ad3c10ac27b..655443e9efe 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,7 +24,6 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "parser/parse_expr.h"
-#include "parser/parse_oper.h"
 #include "parser/parsetree.h"
 #include "utils/selfuncs.h"
 #include "utils/lsyscache.h"
@@ -1003,12 +1002,6 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	/*
 	 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
 	 * except with ALL.
-	 *
-	 * XXX this code knows that prepunion.c will adopt the default sort/group
-	 * operators for each column datatype to determine uniqueness.  It'd
-	 * probably be better if these operators were chosen at parse time and
-	 * stored into the parsetree, instead of leaving bits of the planner to
-	 * decide semantics.
 	 */
 	if (query->setOperations)
 	{
@@ -1019,24 +1012,26 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 
 		if (!topop->all)
 		{
+			ListCell   *lg;
+
 			/* We're good if all the nonjunk output columns are in colnos */
+			lg = list_head(topop->groupClauses);
 			foreach(l, query->targetList)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(l);
-				Oid		tle_eq_opr;
+				SortGroupClause *sgc;
 
 				if (tle->resjunk)
 					continue;	/* ignore resjunk columns */
 
+				/* non-resjunk columns should have grouping clauses */
+				Assert(lg != NULL);
+				sgc = (SortGroupClause *) lfirst(lg);
+				lg = lnext(lg);
+
 				opid = distinct_col_search(tle->resno, colnos, opids);
-				if (!OidIsValid(opid))
-					break;		/* exit early if no match */
-				/* check for compatible semantics */
-				get_sort_group_operators(exprType((Node *) tle->expr),
-										 false, false, false,
-										 NULL, &tle_eq_opr, NULL);
-				if (!OidIsValid(tle_eq_opr) ||
-					!equality_ops_are_compatible(opid, tle_eq_opr))
+				if (!OidIsValid(opid) ||
+					!equality_ops_are_compatible(opid, sgc->eqop))
 					break;		/* exit early if no match */
 			}
 			if (l == NULL)		/* had matches for all? */
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index fc0880ad56c..a2c627fd4d5 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,6 +18,7 @@
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "parser/parse_expr.h"
+#include "utils/lsyscache.h"
 
 
 /*****************************************************************************
@@ -202,6 +203,109 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList)
 }
 
 
+/*****************************************************************************
+ *		Functions to extract data from a list of SortGroupClauses
+ *
+ * These don't really belong in tlist.c, but they are sort of related to the
+ * functions just above, and they don't seem to deserve their own file.
+ *****************************************************************************/
+
+/*
+ * extract_grouping_ops - make an array of the equality operator OIDs
+ *		for a SortGroupClause list
+ */
+Oid *
+extract_grouping_ops(List *groupClause)
+{
+	int			numCols = list_length(groupClause);
+	int			colno = 0;
+	Oid		   *groupOperators;
+	ListCell   *glitem;
+
+	groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+		groupOperators[colno] = groupcl->eqop;
+		Assert(OidIsValid(groupOperators[colno]));
+		colno++;
+	}
+
+	return groupOperators;
+}
+
+/*
+ * extract_grouping_cols - make an array of the grouping column resnos
+ *		for a SortGroupClause list
+ */
+AttrNumber *
+extract_grouping_cols(List *groupClause, List *tlist)
+{
+	AttrNumber *grpColIdx;
+	int			numCols = list_length(groupClause);
+	int			colno = 0;
+	ListCell   *glitem;
+
+	grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+		TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+		grpColIdx[colno++] = tle->resno;
+	}
+
+	return grpColIdx;
+}
+
+/*
+ * grouping_is_sortable - is it possible to implement grouping list by sorting?
+ *
+ * This is easy since the parser will have included a sortop if one exists.
+ */
+bool
+grouping_is_sortable(List *groupClause)
+{
+	ListCell   *glitem;
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+		if (!OidIsValid(groupcl->sortop))
+			return false;
+	}
+	return true;
+}
+
+/*
+ * grouping_is_hashable - is it possible to implement grouping list by hashing?
+ *
+ * We assume hashing is OK if the equality operators are marked oprcanhash.
+ * (If there isn't actually a supporting hash function, the executor will
+ * complain at runtime; but this is a misdeclaration of the operator, not
+ * a system bug.)
+ */
+bool
+grouping_is_hashable(List *groupClause)
+{
+	ListCell   *glitem;
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+		if (!op_hashjoinable(groupcl->eqop))
+			return false;
+	}
+	return true;
+}
+
+
+
 /*
  * Does tlist have same output datatypes as listed in colTypes?
  *