Teach the system how to use hashing for UNION. (INTERSECT/EXCEPT will follow,

but seem like a separate patch since most of the remaining work is on the executor side.) I took the opportunity to push selection of the grouping operators for set operations into the parser where it belongs. Otherwise this is just a small exercise in making prepunion.c consider both alternatives. As with the recent DISTINCT patch, this means we can UNION on datatypes that can hash but not sort, and it means that UNION without ORDER BY is no longer certain to produce sorted output.
author: Tom Lane <tgl@sss.pgh.pa.us> 2008-08-07 01:11:52 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2008-08-07 01:11:52 +0000
commit: 2d1d96b1cea8f67a095e8f28372af4081605f681 (patch)
tree: 91be573dfa6eacbe8a4421d700af3fadc3d1bda8 /src/backend/optimizer
parent: 3d40d5e70ebe21b7d52467987bffad8aea16f29b (diff)
download: postgresql-2d1d96b1cea8f67a095e8f28372af4081605f681.tar.gz
postgresql-2d1d96b1cea8f67a095e8f28372af4081605f681.zip
5 files changed, 362 insertions, 170 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5b7fde2533a..c818f0ddf10 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.239 2008/08/05 16:03:10 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,10 +68,6 @@ static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
 static void preprocess_groupclause(PlannerInfo *root);
-static Oid *extract_grouping_ops(List *groupClause);
-static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
-static bool grouping_is_sortable(List *groupClause);
-static bool grouping_is_hashable(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root,
 					   double tuple_fraction, double limit_tuples,
 					   Path *cheapest_path, Path *sorted_path,
@@ -784,10 +780,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 		/*
 		 * If there's a top-level ORDER BY, assume we have to fetch all the
-		 * tuples.	This might seem too simplistic given all the hackery below
-		 * to possibly avoid the sort ... but a nonzero tuple_fraction is only
-		 * of use to plan_set_operations() when the setop is UNION ALL, and
-		 * the result of UNION ALL is always unsorted.
+		 * tuples.	This might be too simplistic given all the hackery below
+		 * to possibly avoid the sort; but the odds of accurate estimates
+		 * here are pretty low anyway.
 		 */
 		if (parse->sortClause)
 			tuple_fraction = 0.0;
@@ -818,7 +813,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 */
 		Assert(parse->commandType == CMD_SELECT);
 
-		tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);
+		tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
+										tlist);
 
 		/*
 		 * Can't handle FOR UPDATE/SHARE here (parser should have checked
@@ -1715,100 +1711,6 @@ preprocess_groupclause(PlannerInfo *root)
 }
 
 /*
- * extract_grouping_ops - make an array of the equality operator OIDs
- *		for a SortGroupClause list
- */
-static Oid *
-extract_grouping_ops(List *groupClause)
-{
-	int			numCols = list_length(groupClause);
-	int			colno = 0;
-	Oid		   *groupOperators;
-	ListCell   *glitem;
-
-	groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-		groupOperators[colno] = groupcl->eqop;
-		Assert(OidIsValid(groupOperators[colno]));
-		colno++;
-	}
-
-	return groupOperators;
-}
-
-/*
- * extract_grouping_cols - make an array of the grouping column resnos
- *		for a SortGroupClause list
- */
-static AttrNumber *
-extract_grouping_cols(List *groupClause, List *tlist)
-{
-	AttrNumber *grpColIdx;
-	int			numCols = list_length(groupClause);
-	int			colno = 0;
-	ListCell   *glitem;
-
-	grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-		TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
-
-		grpColIdx[colno++] = tle->resno;
-	}
-
-	return grpColIdx;
-}
-
-/*
- * grouping_is_sortable - is it possible to implement grouping list by sorting?
- *
- * This is easy since the parser will have included a sortop if one exists.
- */
-static bool
-grouping_is_sortable(List *groupClause)
-{
-	ListCell   *glitem;
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-		if (!OidIsValid(groupcl->sortop))
-			return false;
-	}
-	return true;
-}
-
-/*
- * grouping_is_hashable - is it possible to implement grouping list by hashing?
- *
- * We assume hashing is OK if the equality operators are marked oprcanhash.
- * (If there isn't actually a supporting hash function, the executor will
- * complain at runtime; but this is a misdeclaration of the operator, not
- * a system bug.)
- */
-static bool
-grouping_is_hashable(List *groupClause)
-{
-	ListCell   *glitem;
-
-	foreach(glitem, groupClause)
-	{
-		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-		if (!op_hashjoinable(groupcl->eqop))
-			return false;
-	}
-	return true;
-}
-
-/*
  * choose_hashed_grouping - should we use hashed grouping?
  *
  * Note: this is only applied when both alternatives are actually feasible.
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 31ba005edb7..750d59a9515 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,8 +32,12 @@
 #include "access/heapam.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
+#include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
 #include "optimizer/plancat.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
@@ -61,6 +65,13 @@ static List *recurse_union_children(Node *setOp, PlannerInfo *root,
 					   double tuple_fraction,
 					   SetOperationStmt *top_union,
 					   List *refnames_tlist);
+static Plan *make_union_unique(SetOperationStmt *op, Plan *plan,
+				  PlannerInfo *root, double tuple_fraction,
+				  List **sortClauses);
+static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+					Plan *input_plan,
+					double tuple_fraction, double dNumDistinctRows,
+					const char *construct);
 static List *generate_setop_tlist(List *colTypes, int flag,
 					 Index varno,
 					 bool hack_constants,
@@ -69,7 +80,7 @@ static List *generate_setop_tlist(List *colTypes, int flag,
 static List *generate_append_tlist(List *colTypes, bool flag,
 					  List *input_plans,
 					  List *refnames_tlist);
-static List *generate_setop_sortlist(List *targetlist);
+static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
 static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
 						 Index rti);
 static void make_inh_translation_lists(Relation oldrelation,
@@ -99,7 +110,8 @@ static List *adjust_inherited_tlist(List *tlist,
  * top level has already been factored into tuple_fraction.
  *
  * *sortClauses is an output argument: it is set to a list of SortGroupClauses
- * representing the result ordering of the topmost set operation.
+ * representing the result ordering of the topmost set operation.  (This will
+ * be NIL if the output isn't ordered.)
  */
 Plan *
 plan_set_operations(PlannerInfo *root, double tuple_fraction,
@@ -287,8 +299,8 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
 	/*
 	 * If any of my children are identical UNION nodes (same op, all-flag, and
 	 * colTypes) then they can be merged into this node so that we generate
-	 * only one Append and Sort for the lot.  Recurse to find such nodes and
-	 * compute their children's plans.
+	 * only one Append and unique-ification for the lot.  Recurse to find such
+	 * nodes and compute their children's plans.
 	 */
 	planlist = list_concat(recurse_union_children(op->larg, root,
 												  tuple_fraction,
@@ -314,22 +326,12 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
 
 	/*
 	 * For UNION ALL, we just need the Append plan.  For UNION, need to add
-	 * Sort and Unique nodes to produce unique output.
+	 * node(s) to remove duplicates.
 	 */
-	if (!op->all)
-	{
-		List	   *sortList;
-
-		sortList = generate_setop_sortlist(tlist);
-		if (sortList)
-		{
-			plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
-			plan = (Plan *) make_unique(plan, sortList);
-		}
-		*sortClauses = sortList;
-	}
+	if (op->all)
+		*sortClauses = NIL;		/* result of UNION ALL is always unsorted */
 	else
-		*sortClauses = NIL;
+		plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses);
 
 	return plan;
 }
@@ -346,7 +348,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
 			   *rplan,
 			   *plan;
 	List	   *tlist,
-			   *sortList,
+			   *groupList,
 			   *planlist,
 			   *child_sortclauses;
 	SetOpCmd	cmd;
@@ -381,19 +383,24 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
 	 */
 	plan = (Plan *) make_append(planlist, false, tlist);
 
-	/*
-	 * Sort the child results, then add a SetOp plan node to generate the
-	 * correct output.
-	 */
-	sortList = generate_setop_sortlist(tlist);
+	/* Identify the grouping semantics */
+	groupList = generate_setop_grouplist(op, tlist);
 
-	if (sortList == NIL)		/* nothing to sort on? */
+	/* punt if nothing to group on (can this happen?) */
+	if (groupList == NIL)
 	{
 		*sortClauses = NIL;
 		return plan;
 	}
 
-	plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
+	/*
+	 * Decide whether to hash or sort, and add a sort node if needed.
+	 */
+	plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+
+	/*
+	 * Finally, add a SetOp plan node to generate the correct output.
+	 */
 	switch (op->op)
 	{
 		case SETOP_INTERSECT:
@@ -403,14 +410,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
 			cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
 			break;
 		default:
-			elog(ERROR, "unrecognized set op: %d",
-				 (int) op->op);
+			elog(ERROR, "unrecognized set op: %d", (int) op->op);
 			cmd = SETOPCMD_INTERSECT;	/* keep compiler quiet */
 			break;
 	}
-	plan = (Plan *) make_setop(cmd, plan, sortList, list_length(op->colTypes) + 1);
+	plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1);
 
-	*sortClauses = sortList;
+	*sortClauses = groupList;
 
 	return plan;
 }
@@ -466,6 +472,171 @@ recurse_union_children(Node *setOp, PlannerInfo *root,
 }
 
 /*
+ * Add nodes to the given plan tree to unique-ify the result of a UNION.
+ */
+static Plan *
+make_union_unique(SetOperationStmt *op, Plan *plan,
+				  PlannerInfo *root, double tuple_fraction,
+				  List **sortClauses)
+{
+	List	   *groupList;
+	double		dNumDistinctRows;
+	long		numDistinctRows;
+
+	/* Identify the grouping semantics */
+	groupList = generate_setop_grouplist(op, plan->targetlist);
+
+	/* punt if nothing to group on (can this happen?) */
+	if (groupList == NIL)
+	{
+		*sortClauses = NIL;
+		return plan;
+	}
+
+	/*
+	 * XXX for the moment, take the number of distinct groups as being the
+	 * total input size, ie, the worst case.  This is too conservative, but
+	 * we don't want to risk having the hashtable overrun memory; also,
+	 * it's not clear how to get a decent estimate of the true size.  One
+	 * should note as well the propensity of novices to write UNION rather
+	 * than UNION ALL even when they don't expect any duplicates...
+	 */
+	dNumDistinctRows = plan->plan_rows;
+
+	/* Also convert to long int --- but 'ware overflow! */
+	numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+
+	/* Decide whether to hash or sort */
+	if (choose_hashed_setop(root, groupList, plan,
+							tuple_fraction, dNumDistinctRows,
+							"UNION"))
+	{
+		/* Hashed aggregate plan --- no sort needed */
+		plan = (Plan *) make_agg(root,
+								 plan->targetlist,
+								 NIL,
+								 AGG_HASHED,
+								 list_length(groupList),
+								 extract_grouping_cols(groupList,
+													   plan->targetlist),
+								 extract_grouping_ops(groupList),
+								 numDistinctRows,
+								 0,
+								 plan);
+		/* Hashed aggregation produces randomly-ordered results */
+		*sortClauses = NIL;
+	}
+	else
+	{
+		/* Sort and Unique */
+		plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+		plan = (Plan *) make_unique(plan, groupList);
+		plan->plan_rows = dNumDistinctRows;
+		/* We know the sort order of the result */
+		*sortClauses = groupList;
+	}
+
+	return plan;
+}
+
+/*
+ * choose_hashed_setop - should we use hashing for a set operation?
+ */
+static bool
+choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+					Plan *input_plan,
+					double tuple_fraction, double dNumDistinctRows,
+					const char *construct)
+{
+	int			numDistinctCols = list_length(groupClauses);
+	bool		can_sort;
+	bool		can_hash;
+	Size		hashentrysize;
+	List	   *needed_pathkeys;
+	Path		hashed_p;
+	Path		sorted_p;
+
+	/* Check whether the operators support sorting or hashing */
+	can_sort = grouping_is_sortable(groupClauses);
+	can_hash = grouping_is_hashable(groupClauses);
+	if (can_hash && can_sort)
+	{
+		/* we have a meaningful choice to make, continue ... */
+	}
+	else if (can_hash)
+		return true;
+	else if (can_sort)
+		return false;
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 /* translator: %s is UNION, INTERSECT, or EXCEPT */
+				 errmsg("could not implement %s", construct),
+				 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+
+	/* Prefer sorting when enable_hashagg is off */
+	if (!enable_hashagg)
+		return false;
+
+	/*
+	 * Don't do it if it doesn't look like the hashtable will fit into
+	 * work_mem.
+	 */
+	hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
+
+	if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
+		return false;
+
+	/*
+	 * See if the estimated cost is no more than doing it the other way.
+	 *
+	 * We need to consider input_plan + hashagg versus input_plan + sort +
+	 * group.  Note that the actual result plan might involve a SetOp or
+	 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
+	 * should be close enough for our purposes here.
+	 *
+	 * These path variables are dummies that just hold cost fields; we don't
+	 * make actual Paths for these steps.
+	 */
+	cost_agg(&hashed_p, root, AGG_HASHED, 0,
+			 numDistinctCols, dNumDistinctRows,
+			 input_plan->startup_cost, input_plan->total_cost,
+			 input_plan->plan_rows);
+
+	/*
+	 * Now for the sorted case.  Note that the input is *always* unsorted,
+	 * since it was made by appending unrelated sub-relations together.
+	 */
+	sorted_p.startup_cost = input_plan->startup_cost;
+	sorted_p.total_cost = input_plan->total_cost;
+	/* XXX this is more expensive than cost_sort really needs: */
+	needed_pathkeys = make_pathkeys_for_sortclauses(root,
+													groupClauses,
+													input_plan->targetlist,
+													true);
+	cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost,
+			  input_plan->plan_rows, input_plan->plan_width, -1.0);
+	cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
+			   sorted_p.startup_cost, sorted_p.total_cost,
+			   input_plan->plan_rows);
+
+	/*
+	 * Now make the decision using the top-level tuple fraction.  First we
+	 * have to convert an absolute count (LIMIT) into fractional form.
+	 */
+	if (tuple_fraction >= 1.0)
+		tuple_fraction /= dNumDistinctRows;
+
+	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+									  tuple_fraction) < 0)
+	{
+		/* Hashed is cheaper, so use it */
+		return true;
+	}
+	return false;
+}
+
+/*
  * Generate targetlist for a set-operation plan node
  *
  * colTypes: column datatypes for non-junk columns
@@ -677,30 +848,47 @@ generate_append_tlist(List *colTypes, bool flag,
 }
 
 /*
- * generate_setop_sortlist
- *		Build a SortGroupClause list enumerating all the non-resjunk
- *		tlist entries, using default ordering properties.
+ * generate_setop_grouplist
+ *		Build a SortGroupClause list defining the sort/grouping properties
+ *		of the setop's output columns.
  *
- * For now, we require all the items to be sortable.  Eventually we
- * should implement hashing setops and allow hash-only datatypes.
+ * Parse analysis already determined the properties and built a suitable
+ * list, except that the entries do not have sortgrouprefs set because
+ * the parser output representation doesn't include a tlist for each
+ * setop.  So what we need to do here is copy that list and install
+ * proper sortgrouprefs into it and into the targetlist.
  */
 static List *
-generate_setop_sortlist(List *targetlist)
+generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
 {
-	List	   *sortlist = NIL;
-	ListCell   *l;
+	List	   *grouplist = (List *) copyObject(op->groupClauses);
+	ListCell   *lg;
+	ListCell   *lt;
+	Index		refno = 1;
 
-	foreach(l, targetlist)
+	lg = list_head(grouplist);
+	foreach(lt, targetlist)
 	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
+		TargetEntry *tle = (TargetEntry *) lfirst(lt);
+		SortGroupClause *sgc;
 
-		if (!tle->resjunk)
-			sortlist = addTargetToGroupList(NULL, tle,
-											sortlist, targetlist,
-											true, /* XXX fixme someday */
-											false);
+		/* tlist shouldn't have any sortgrouprefs yet */
+		Assert(tle->ressortgroupref == 0);
+
+		if (tle->resjunk)
+			continue;			/* ignore resjunk columns */
+
+		/* non-resjunk columns should have grouping clauses */
+		Assert(lg != NULL);
+		sgc = (SortGroupClause *) lfirst(lg);
+		lg = lnext(lg);
+		Assert(sgc->tleSortGroupRef == 0);
+
+		/* we could use assignSortGroupRef here, but seems a bit silly */
+		sgc->tleSortGroupRef = tle->ressortgroupref = refno++;
 	}
-	return sortlist;
+	Assert(lg == NULL);
+	return grouplist;
 }
 
 
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 04ef2224c91..858d4abcbd8 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -3933,6 +3933,8 @@ expression_tree_walker(Node *node,
 					return true;
 				if (walker(setop->rarg, context))
 					return true;
+
+				/* groupClauses are deemed uninteresting */
 			}
 			break;
 		case T_InClauseInfo:
@@ -4535,6 +4537,7 @@ expression_tree_mutator(Node *node,
 				FLATCOPY(newnode, setop, SetOperationStmt);
 				MUTATE(newnode->larg, setop->larg, Node *);
 				MUTATE(newnode->rarg, setop->rarg, Node *);
+				/* We do not mutate groupClauses by default */
 				return (Node *) newnode;
 			}
 			break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index ad3c10ac27b..655443e9efe 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -24,7 +24,6 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "parser/parse_expr.h"
-#include "parser/parse_oper.h"
 #include "parser/parsetree.h"
 #include "utils/selfuncs.h"
 #include "utils/lsyscache.h"
@@ -1003,12 +1002,6 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	/*
 	 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
 	 * except with ALL.
-	 *
-	 * XXX this code knows that prepunion.c will adopt the default sort/group
-	 * operators for each column datatype to determine uniqueness.  It'd
-	 * probably be better if these operators were chosen at parse time and
-	 * stored into the parsetree, instead of leaving bits of the planner to
-	 * decide semantics.
 	 */
 	if (query->setOperations)
 	{
@@ -1019,24 +1012,26 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 
 		if (!topop->all)
 		{
+			ListCell   *lg;
+
 			/* We're good if all the nonjunk output columns are in colnos */
+			lg = list_head(topop->groupClauses);
 			foreach(l, query->targetList)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(l);
-				Oid		tle_eq_opr;
+				SortGroupClause *sgc;
 
 				if (tle->resjunk)
 					continue;	/* ignore resjunk columns */
 
+				/* non-resjunk columns should have grouping clauses */
+				Assert(lg != NULL);
+				sgc = (SortGroupClause *) lfirst(lg);
+				lg = lnext(lg);
+
 				opid = distinct_col_search(tle->resno, colnos, opids);
-				if (!OidIsValid(opid))
-					break;		/* exit early if no match */
-				/* check for compatible semantics */
-				get_sort_group_operators(exprType((Node *) tle->expr),
-										 false, false, false,
-										 NULL, &tle_eq_opr, NULL);
-				if (!OidIsValid(tle_eq_opr) ||
-					!equality_ops_are_compatible(opid, tle_eq_opr))
+				if (!OidIsValid(opid) ||
+					!equality_ops_are_compatible(opid, sgc->eqop))
 					break;		/* exit early if no match */
 			}
 			if (l == NULL)		/* had matches for all? */
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index fc0880ad56c..a2c627fd4d5 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,6 +18,7 @@
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "parser/parse_expr.h"
+#include "utils/lsyscache.h"
 
 
 /*****************************************************************************
@@ -202,6 +203,109 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList)
 }
 
 
+/*****************************************************************************
+ *		Functions to extract data from a list of SortGroupClauses
+ *
+ * These don't really belong in tlist.c, but they are sort of related to the
+ * functions just above, and they don't seem to deserve their own file.
+ *****************************************************************************/
+
+/*
+ * extract_grouping_ops - make an array of the equality operator OIDs
+ *		for a SortGroupClause list
+ */
+Oid *
+extract_grouping_ops(List *groupClause)
+{
+	int			numCols = list_length(groupClause);
+	int			colno = 0;
+	Oid		   *groupOperators;
+	ListCell   *glitem;
+
+	groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+		groupOperators[colno] = groupcl->eqop;
+		Assert(OidIsValid(groupOperators[colno]));
+		colno++;
+	}
+
+	return groupOperators;
+}
+
+/*
+ * extract_grouping_cols - make an array of the grouping column resnos
+ *		for a SortGroupClause list
+ */
+AttrNumber *
+extract_grouping_cols(List *groupClause, List *tlist)
+{
+	AttrNumber *grpColIdx;
+	int			numCols = list_length(groupClause);
+	int			colno = 0;
+	ListCell   *glitem;
+
+	grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+		TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+		grpColIdx[colno++] = tle->resno;
+	}
+
+	return grpColIdx;
+}
+
+/*
+ * grouping_is_sortable - is it possible to implement grouping list by sorting?
+ *
+ * This is easy since the parser will have included a sortop if one exists.
+ */
+bool
+grouping_is_sortable(List *groupClause)
+{
+	ListCell   *glitem;
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+		if (!OidIsValid(groupcl->sortop))
+			return false;
+	}
+	return true;
+}
+
+/*
+ * grouping_is_hashable - is it possible to implement grouping list by hashing?
+ *
+ * We assume hashing is OK if the equality operators are marked oprcanhash.
+ * (If there isn't actually a supporting hash function, the executor will
+ * complain at runtime; but this is a misdeclaration of the operator, not
+ * a system bug.)
+ */
+bool
+grouping_is_hashable(List *groupClause)
+{
+	ListCell   *glitem;
+
+	foreach(glitem, groupClause)
+	{
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+		if (!op_hashjoinable(groupcl->eqop))
+			return false;
+	}
+	return true;
+}
+
+
+
 /*
  * Does tlist have same output datatypes as listed in colTypes?
  *
author	Tom Lane <tgl@sss.pgh.pa.us>	2008-08-07 01:11:52 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2008-08-07 01:11:52 +0000
commit	2d1d96b1cea8f67a095e8f28372af4081605f681 (patch)
tree	91be573dfa6eacbe8a4421d700af3fadc3d1bda8 /src/backend/optimizer
parent	3d40d5e70ebe21b7d52467987bffad8aea16f29b (diff)
download	postgresql-2d1d96b1cea8f67a095e8f28372af4081605f681.tar.gz postgresql-2d1d96b1cea8f67a095e8f28372af4081605f681.zip