1 files changed, 609 insertions, 252 deletions
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 471acc4b3ec..ef35da6ade6 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -122,12 +122,19 @@
  *	  specific).
  *
  *	  Where more complex grouping sets are used, we break them down into
- *	  "phases", where each phase has a different sort order.  During each
- *	  phase but the last, the input tuples are additionally stored in a
- *	  tuplesort which is keyed to the next phase's sort order; during each
- *	  phase but the first, the input tuples are drawn from the previously
- *	  sorted data.  (The sorting of the data for the first phase is handled by
- *	  the planner, as it might be satisfied by underlying nodes.)
+ *	  "phases", where each phase has a different sort order (except phase 0
+ *	  which is reserved for hashing).  During each phase but the last, the
+ *	  input tuples are additionally stored in a tuplesort which is keyed to the
+ *	  next phase's sort order; during each phase but the first, the input
+ *	  tuples are drawn from the previously sorted data.  (The sorting of the
+ *	  data for the first phase is handled by the planner, as it might be
+ *	  satisfied by underlying nodes.)
+ *
+ *	  Hashing can be mixed with sorted grouping.  To do this, we have an
+ *	  AGG_MIXED strategy that populates the hashtables during the first sorted
+ *	  phase, and switches to reading them out after completing all sort phases.
+ *	  We can also support AGG_HASHED with multiple hash tables and no sorting
+ *	  at all.
  *
  *	  From the perspective of aggregate transition and final functions, the
  *	  only issue regarding grouping sets is this: a single call site (flinfo)
@@ -139,7 +146,54 @@
  *	  sensitive to the grouping set for which the aggregate function is
  *	  currently being called.
  *
- *	  TODO: AGG_HASHED doesn't support multiple grouping sets yet.
+ *	  Plan structure:
+ *
+ *	  What we get from the planner is actually one "real" Agg node which is
+ *	  part of the plan tree proper, but which optionally has an additional list
+ *	  of Agg nodes hung off the side via the "chain" field.  This is because an
+ *	  Agg node happens to be a convenient representation of all the data we
+ *	  need for grouping sets.
+ *
+ *	  For many purposes, we treat the "real" node as if it were just the first
+ *	  node in the chain.  The chain must be ordered such that hashed entries
+ *	  come before sorted/plain entries; the real node is marked AGG_MIXED if
+ *	  there are both types present (in which case the real node describes one
+ *	  of the hashed groupings, other AGG_HASHED nodes may optionally follow in
+ *	  the chain, followed in turn by AGG_SORTED or (one) AGG_PLAIN node).  If
+ *	  the real node is marked AGG_HASHED or AGG_SORTED, then all the chained
+ *	  nodes must be of the same type; if it is AGG_PLAIN, there can be no
+ *	  chained nodes.
+ *
+ *	  We collect all hashed nodes into a single "phase", numbered 0, and create
+ *	  a sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node.
+ *	  Phase 0 is allocated even if there are no hashes, but remains unused in
+ *	  that case.
+ *
+ *	  AGG_HASHED nodes actually refer to only a single grouping set each,
+ *	  because for each hashed grouping we need a separate grpColIdx and
+ *	  numGroups estimate.  AGG_SORTED nodes represent a "rollup", a list of
+ *	  grouping sets that share a sort order.  Each AGG_SORTED node other than
+ *	  the first one has an associated Sort node which describes the sort order
+ *	  to be used; the first sorted node takes its input from the outer subtree,
+ *	  which the planner has already arranged to provide ordered data.
+ *
+ *	  Memory and ExprContext usage:
+ *
+ *	  Because we're accumulating aggregate values across input rows, we need to
+ *	  use more memory contexts than just simple input/output tuple contexts.
+ *	  In fact, for a rollup, we need a separate context for each grouping set
+ *	  so that we can reset the inner (finer-grained) aggregates on their group
+ *	  boundaries while continuing to accumulate values for outer
+ *	  (coarser-grained) groupings.  On top of this, we might be simultaneously
+ *	  populating hashtables; however, we only need one context for all the
+ *	  hashtables.
+ *
+ *	  So we create an array, aggcontexts, with an ExprContext for each grouping
+ *	  set in the largest rollup that we're going to process, and use the
+ *	  per-tuple memory context of those ExprContexts to store the aggregate
+ *	  transition values.  hashcontext is the single context created to support
+ *	  all hash tables.
+ *
  *
  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -432,6 +486,7 @@ typedef struct AggStatePerGroupData
  */
 typedef struct AggStatePerPhaseData
 {
+	AggStrategy aggstrategy;	/* strategy for this phase */
 	int			numsets;		/* number of grouping sets (or 0) */
 	int		   *gset_lengths;	/* lengths of grouping sets */
 	Bitmapset **grouped_cols;	/* column groupings for rollup */
@@ -440,7 +495,30 @@ typedef struct AggStatePerPhaseData
 	Sort	   *sortnode;		/* Sort node for input ordering for phase */
 }	AggStatePerPhaseData;
 
+/*
+ * AggStatePerHashData - per-hashtable state
+ *
+ * When doing grouping sets with hashing, we have one of these for each
+ * grouping set. (When doing hashing without grouping sets, we have just one of
+ * them.)
+ */
+typedef struct AggStatePerHashData
+{
+	TupleHashTable hashtable;	/* hash table with one entry per group */
+	TupleHashIterator hashiter; /* for iterating through hash table */
+	TupleTableSlot *hashslot;	/* slot for loading hash table */
+	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
+	FmgrInfo   *eqfunctions;	/* per-grouping-field equality fns */
+	int			numCols;		/* number of hash key columns */
+	int			numhashGrpCols; /* number of columns in hash table */
+	int			largestGrpColIdx;		/* largest col required for hashing */
+	AttrNumber *hashGrpColIdxInput;		/* hash col indices in input slot */
+	AttrNumber *hashGrpColIdxHash;		/* indices in hashtbl tuples */
+	Agg		   *aggnode;		/* original Agg node, for numGroups etc. */
+} AggStatePerHashData;
+
 
+static void select_current_set(AggState *aggstate, int setno, bool is_hash);
 static void initialize_phase(AggState *aggstate, int newphase);
 static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
 static void initialize_aggregates(AggState *aggstate,
@@ -449,7 +527,8 @@ static void initialize_aggregates(AggState *aggstate,
 static void advance_transition_function(AggState *aggstate,
 							AggStatePerTrans pertrans,
 							AggStatePerGroup pergroupstate);
-static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup,
+				   AggStatePerGroup *pergroups);
 static void advance_combine_function(AggState *aggstate,
 						 AggStatePerTrans pertrans,
 						 AggStatePerGroup pergroupstate);
@@ -473,14 +552,13 @@ static void prepare_projection_slot(AggState *aggstate,
 						int currentSet);
 static void finalize_aggregates(AggState *aggstate,
 					AggStatePerAgg peragg,
-					AggStatePerGroup pergroup,
-					int currentSet);
+					AggStatePerGroup pergroup);
 static TupleTableSlot *project_aggregates(AggState *aggstate);
 static Bitmapset *find_unaggregated_cols(AggState *aggstate);
 static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
 static void build_hash_table(AggState *aggstate);
-static TupleHashEntryData *lookup_hash_entry(AggState *aggstate,
-				  TupleTableSlot *inputslot);
+static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
+static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -501,13 +579,31 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
 
 
 /*
- * Switch to phase "newphase", which must either be 0 (to reset) or
+ * Select the current grouping set; affects current_set and
+ * curaggcontext.
+ */
+static void
+select_current_set(AggState *aggstate, int setno, bool is_hash)
+{
+	if (is_hash)
+		aggstate->curaggcontext = aggstate->hashcontext;
+	else
+		aggstate->curaggcontext = aggstate->aggcontexts[setno];
+
+	aggstate->current_set = setno;
+}
+
+/*
+ * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
  * current_phase + 1. Juggle the tuplesorts accordingly.
+ *
+ * Phase 0 is for hashing, which we currently handle last in the AGG_MIXED
+ * case, so when entering phase 0, all we need to do is drop open sorts.
  */
 static void
 initialize_phase(AggState *aggstate, int newphase)
 {
-	Assert(newphase == 0 || newphase == aggstate->current_phase + 1);
+	Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
 
 	/*
 	 * Whatever the previous state, we're now done with whatever input
@@ -519,7 +615,7 @@ initialize_phase(AggState *aggstate, int newphase)
 		aggstate->sort_in = NULL;
 	}
 
-	if (newphase == 0)
+	if (newphase <= 1)
 	{
 		/*
 		 * Discard any existing output tuplesort.
@@ -546,7 +642,7 @@ initialize_phase(AggState *aggstate, int newphase)
 	 * If this isn't the last phase, we need to sort appropriately for the
 	 * next phase in sequence.
 	 */
-	if (newphase < aggstate->numphases - 1)
+	if (newphase > 0 && newphase < aggstate->numphases - 1)
 	{
 		Sort	   *sortnode = aggstate->phases[newphase + 1].sortnode;
 		PlanState  *outerNode = outerPlanState(aggstate);
@@ -567,7 +663,7 @@ initialize_phase(AggState *aggstate, int newphase)
 }
 
 /*
- * Fetch a tuple from either the outer plan (for phase 0) or from the sorter
+ * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
  * populated by the previous phase.  Copy it to the sorter for the next phase
  * if any.
  */
@@ -595,8 +691,8 @@ fetch_input_tuple(AggState *aggstate)
 /*
  * (Re)Initialize an individual aggregate.
  *
- * This function handles only one grouping set (already set in
- * aggstate->current_set).
+ * This function handles only one grouping set, already set in
+ * aggstate->current_set.
  *
  * When called, CurrentMemoryContext should be the per-query context.
  */
@@ -653,7 +749,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
 		MemoryContext oldContext;
 
 		oldContext = MemoryContextSwitchTo(
-		aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+							 aggstate->curaggcontext->ecxt_per_tuple_memory);
 		pergroupstate->transValue = datumCopy(pertrans->initValue,
 											  pertrans->transtypeByVal,
 											  pertrans->transtypeLen);
@@ -676,8 +772,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
  *
  * If there are multiple grouping sets, we initialize only the first numReset
  * of them (the grouping sets are ordered so that the most specific one, which
- * is reset most often, is first). As a convenience, if numReset is < 1, we
- * reinitialize all sets.
+ * is reset most often, is first). As a convenience, if numReset is 0, we
+ * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
+ * which case the caller must have used select_current_set appropriately.
  *
  * When called, CurrentMemoryContext should be the per-query context.
  */
@@ -689,25 +786,37 @@ initialize_aggregates(AggState *aggstate,
 	int			transno;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
 	int			setno = 0;
+	int			numTrans = aggstate->numtrans;
 	AggStatePerTrans transstates = aggstate->pertrans;
 
-	if (numReset < 1)
+	if (numReset == 0)
 		numReset = numGroupingSets;
 
-	for (transno = 0; transno < aggstate->numtrans; transno++)
+	for (transno = 0; transno < numTrans; transno++)
 	{
 		AggStatePerTrans pertrans = &transstates[transno];
 
-		for (setno = 0; setno < numReset; setno++)
+		if (numReset < 0)
 		{
 			AggStatePerGroup pergroupstate;
 
-			pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))];
-
-			aggstate->current_set = setno;
+			pergroupstate = &pergroup[transno];
 
 			initialize_aggregate(aggstate, pertrans, pergroupstate);
 		}
+		else
+		{
+			for (setno = 0; setno < numReset; setno++)
+			{
+				AggStatePerGroup pergroupstate;
+
+				pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+				select_current_set(aggstate, setno, false);
+
+				initialize_aggregate(aggstate, pertrans, pergroupstate);
+			}
+		}
 	}
 }
 
@@ -757,7 +866,7 @@ advance_transition_function(AggState *aggstate,
 			 * do not need to pfree the old transValue, since it's NULL.
 			 */
 			oldContext = MemoryContextSwitchTo(
-											   aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+							 aggstate->curaggcontext->ecxt_per_tuple_memory);
 			pergroupstate->transValue = datumCopy(fcinfo->arg[1],
 												  pertrans->transtypeByVal,
 												  pertrans->transtypeLen);
@@ -807,7 +916,7 @@ advance_transition_function(AggState *aggstate,
 	{
 		if (!fcinfo->isnull)
 		{
-			MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+			MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
 			if (DatumIsReadWriteExpandedObject(newVal,
 											   false,
 											   pertrans->transtypeLen) &&
@@ -838,17 +947,21 @@ advance_transition_function(AggState *aggstate,
 /*
  * Advance each aggregate transition state for one input tuple.  The input
  * tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
- * accessible to ExecEvalExpr.  pergroup is the array of per-group structs to
- * use (this might be in a hashtable entry).
+ * accessible to ExecEvalExpr.
+ *
+ * We have two sets of transition states to handle: one for sorted aggregation
+ * and one for hashed; we do them both here, to avoid multiple evaluation of
+ * the inputs.
  *
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static void
-advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
 {
 	int			transno;
 	int			setno = 0;
 	int			numGroupingSets = Max(aggstate->phase->numsets, 1);
+	int			numHashes = aggstate->num_hashes;
 	int			numTrans = aggstate->numtrans;
 	TupleTableSlot *slot = aggstate->evalslot;
 
@@ -880,6 +993,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 		{
 			/* DISTINCT and/or ORDER BY case */
 			Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
+			Assert(!pergroups);
 
 			/*
 			 * If the transfn is strict, we want to check for nullity before
@@ -940,13 +1054,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 				fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
 			}
 
-			for (setno = 0; setno < numGroupingSets; setno++)
+			if (pergroup)
 			{
-				AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)];
+				/* advance transition states for ordered grouping */
+
+				for (setno = 0; setno < numGroupingSets; setno++)
+				{
+					AggStatePerGroup pergroupstate;
 
-				aggstate->current_set = setno;
+					select_current_set(aggstate, setno, false);
 
-				advance_transition_function(aggstate, pertrans, pergroupstate);
+					pergroupstate = &pergroup[transno + (setno * numTrans)];
+
+					advance_transition_function(aggstate, pertrans, pergroupstate);
+				}
+			}
+
+			if (pergroups)
+			{
+				/* advance transition states for hashed grouping */
+
+				for (setno = 0; setno < numHashes; setno++)
+				{
+					AggStatePerGroup pergroupstate;
+
+					select_current_set(aggstate, setno, true);
+
+					pergroupstate = &pergroups[setno][transno];
+
+					advance_transition_function(aggstate, pertrans, pergroupstate);
+				}
 			}
 		}
 	}
@@ -967,7 +1104,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 	TupleTableSlot *slot;
 
 	/* combine not supported with grouping sets */
-	Assert(aggstate->phase->numsets == 0);
+	Assert(aggstate->phase->numsets <= 1);
 
 	/* compute input for all aggregates */
 	slot = ExecProject(aggstate->evalproj);
@@ -1060,7 +1197,7 @@ advance_combine_function(AggState *aggstate,
 			if (!pertrans->transtypeByVal)
 			{
 				oldContext = MemoryContextSwitchTo(
-												   aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+							 aggstate->curaggcontext->ecxt_per_tuple_memory);
 				pergroupstate->transValue = datumCopy(fcinfo->arg[1],
 													pertrans->transtypeByVal,
 													  pertrans->transtypeLen);
@@ -1105,7 +1242,7 @@ advance_combine_function(AggState *aggstate,
 	{
 		if (!fcinfo->isnull)
 		{
-			MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory);
+			MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
 			if (DatumIsReadWriteExpandedObject(newVal,
 											   false,
 											   pertrans->transtypeLen) &&
@@ -1559,15 +1696,16 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
 /*
  * Compute the final value of all aggregates for one group.
  *
- * This function handles only one grouping set at a time.
+ * This function handles only one grouping set at a time, which the caller must
+ * have selected.  It's also the caller's responsibility to adjust the supplied
+ * pergroup parameter to point to the current set's transvalues.
  *
  * Results are stored in the output econtext aggvalues/aggnulls.
  */
 static void
 finalize_aggregates(AggState *aggstate,
 					AggStatePerAgg peraggs,
-					AggStatePerGroup pergroup,
-					int currentSet)
+					AggStatePerGroup pergroup)
 {
 	ExprContext *econtext = aggstate->ss.ps.ps_ExprContext;
 	Datum	   *aggvalues = econtext->ecxt_aggvalues;
@@ -1575,11 +1713,6 @@ finalize_aggregates(AggState *aggstate,
 	int			aggno;
 	int			transno;
 
-	Assert(currentSet == 0 ||
-		   ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
-
-	aggstate->current_set = currentSet;
-
 	/*
 	 * If there were any DISTINCT and/or ORDER BY aggregates, sort their
 	 * inputs and run the transition functions.
@@ -1589,11 +1722,12 @@ finalize_aggregates(AggState *aggstate,
 		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
 		AggStatePerGroup pergroupstate;
 
-		pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+		pergroupstate = &pergroup[transno];
 
 		if (pertrans->numSortCols > 0)
 		{
-			Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+			Assert(aggstate->aggstrategy != AGG_HASHED &&
+				   aggstate->aggstrategy != AGG_MIXED);
 
 			if (pertrans->numInputs == 1)
 				process_ordered_aggregate_single(aggstate,
@@ -1615,7 +1749,7 @@ finalize_aggregates(AggState *aggstate,
 		int			transno = peragg->transno;
 		AggStatePerGroup pergroupstate;
 
-		pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))];
+		pergroupstate = &pergroup[transno];
 
 		if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
 			finalize_partialaggregate(aggstate, peragg, pergroupstate,
@@ -1697,7 +1831,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
 }
 
 /*
- * Initialize the hash table to empty.
+ * Initialize the hash table(s) to empty.
  *
  * To implement hashed aggregation, we need a hashtable that stores a
  * representative tuple and an array of AggStatePerGroup structs for each
@@ -1705,29 +1839,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
  * GROUP BY columns.  The per-group data is allocated in lookup_hash_entry(),
  * for each entry.
  *
- * The hash table always lives in the aggcontext memory context.
+ * We have a separate hashtable and associated perhash data structure for each
+ * grouping set for which we're doing hashing.
+ *
+ * The hash tables always live in the hashcontext's per-tuple memory context
+ * (there is only one of these for all tables together, since they are all
+ * reset at the same time).
  */
 static void
 build_hash_table(AggState *aggstate)
 {
-	Agg		   *node = (Agg *) aggstate->ss.ps.plan;
 	MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
 	Size		additionalsize;
+	int			i;
 
-	Assert(node->aggstrategy == AGG_HASHED);
-	Assert(node->numGroups > 0);
+	Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
 
-	additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData);
+	additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
 
-	aggstate->hashtable = BuildTupleHashTable(node->numCols,
-											  aggstate->hashGrpColIdxHash,
-											  aggstate->phase->eqfunctions,
-											  aggstate->hashfunctions,
-											  node->numGroups,
-											  additionalsize,
-							 aggstate->aggcontexts[0]->ecxt_per_tuple_memory,
-											  tmpmem,
+	for (i = 0; i < aggstate->num_hashes; ++i)
+	{
+		AggStatePerHash perhash = &aggstate->perhash[i];
+
+		Assert(perhash->aggnode->numGroups > 0);
+
+		perhash->hashtable = BuildTupleHashTable(perhash->numCols,
+												 perhash->hashGrpColIdxHash,
+												 perhash->eqfunctions,
+												 perhash->hashfunctions,
+												 perhash->aggnode->numGroups,
+												 additionalsize,
+								aggstate->hashcontext->ecxt_per_tuple_memory,
+												 tmpmem,
 								  DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+	}
 }
 
 /*
@@ -1750,72 +1895,98 @@ build_hash_table(AggState *aggstate)
  * the array is preserved over ExecReScanAgg, so we allocate it in the
  * per-query context (unlike the hash table itself).
  */
-static List *
+static void
 find_hash_columns(AggState *aggstate)
 {
-	Agg		   *node = (Agg *) aggstate->ss.ps.plan;
-	Bitmapset  *colnos;
-	List	   *collist;
-	TupleDesc	hashDesc;
+	Bitmapset  *base_colnos;
 	List	   *outerTlist = outerPlanState(aggstate)->plan->targetlist;
-	List		*hashTlist = NIL;
-	int			i;
-
-	aggstate->largestGrpColIdx = 0;
+	int			numHashes = aggstate->num_hashes;
+	int			j;
 
 	/* Find Vars that will be needed in tlist and qual */
-	colnos = find_unaggregated_cols(aggstate);
-	/* Add in all the grouping columns */
-	for (i = 0; i < node->numCols; i++)
-		colnos = bms_add_member(colnos, node->grpColIdx[i]);
-	/* Convert to list, using lcons so largest element ends up first */
-	collist = NIL;
+	base_colnos = find_unaggregated_cols(aggstate);
 
-	aggstate->hashGrpColIdxInput =
-		palloc(bms_num_members(colnos) * sizeof(AttrNumber));
-	aggstate->hashGrpColIdxHash =
-		palloc(node->numCols * sizeof(AttrNumber));
-
-	/*
-	 * First build mapping for columns directly hashed. These are the first,
-	 * because they'll be accessed when computing hash values and comparing
-	 * tuples for exact matches. We also build simple mapping for
-	 * execGrouping, so it knows where to find the to-be-hashed / compared
-	 * columns in the input.
-	 */
-	for (i = 0; i < node->numCols; i++)
+	for (j = 0; j < numHashes; ++j)
 	{
-		aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i];
-		aggstate->hashGrpColIdxHash[i] = i + 1;
-		aggstate->numhashGrpCols++;
-		/* delete already mapped columns */
-		bms_del_member(colnos, node->grpColIdx[i]);
-	}
+		AggStatePerHash perhash = &aggstate->perhash[j];
+		Bitmapset  *colnos = bms_copy(base_colnos);
+		AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
+		List	   *hashTlist = NIL;
+		TupleDesc	hashDesc;
+		int			i;
 
-	/* and add the remaining columns */
-	while ((i = bms_first_member(colnos)) >= 0)
-	{
-		aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i;
-		aggstate->numhashGrpCols++;
-	}
+		perhash->largestGrpColIdx = 0;
 
-	/* and build a tuple descriptor for the hashtable */
-	for (i = 0; i < aggstate->numhashGrpCols; i++)
-	{
-		int			varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+		/*
+		 * If we're doing grouping sets, then some Vars might be referenced in
+		 * tlist/qual for the benefit of other grouping sets, but not needed
+		 * when hashing; i.e. prepare_projection_slot will null them out, so
+		 * there'd be no point storing them.  Use prepare_projection_slot's
+		 * logic to determine which.
+		 */
+		if (aggstate->phases[0].grouped_cols)
+		{
+			Bitmapset  *grouped_cols = aggstate->phases[0].grouped_cols[j];
+			ListCell   *lc;
 
-		hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
-		aggstate->largestGrpColIdx =
-			Max(varNumber + 1, aggstate->largestGrpColIdx);
-	}
+			foreach(lc, aggstate->all_grouped_cols)
+			{
+				int			attnum = lfirst_int(lc);
 
-	hashDesc = ExecTypeFromTL(hashTlist, false);
-	ExecSetSlotDescriptor(aggstate->hashslot, hashDesc);
+				if (!bms_is_member(attnum, grouped_cols))
+					colnos = bms_del_member(colnos, attnum);
+			}
+		}
+		/* Add in all the grouping columns */
+		for (i = 0; i < perhash->numCols; i++)
+			colnos = bms_add_member(colnos, grpColIdx[i]);
 
-	list_free(hashTlist);
-	bms_free(colnos);
+		perhash->hashGrpColIdxInput =
+			palloc(bms_num_members(colnos) * sizeof(AttrNumber));
+		perhash->hashGrpColIdxHash =
+			palloc(perhash->numCols * sizeof(AttrNumber));
+
+		/*
+		 * First build mapping for columns directly hashed. These are the
+		 * first, because they'll be accessed when computing hash values and
+		 * comparing tuples for exact matches. We also build simple mapping
+		 * for execGrouping, so it knows where to find the to-be-hashed /
+		 * compared columns in the input.
+		 */
+		for (i = 0; i < perhash->numCols; i++)
+		{
+			perhash->hashGrpColIdxInput[i] = grpColIdx[i];
+			perhash->hashGrpColIdxHash[i] = i + 1;
+			perhash->numhashGrpCols++;
+			/* delete already mapped columns */
+			bms_del_member(colnos, grpColIdx[i]);
+		}
 
-	return collist;
+		/* and add the remaining columns */
+		while ((i = bms_first_member(colnos)) >= 0)
+		{
+			perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
+			perhash->numhashGrpCols++;
+		}
+
+		/* and build a tuple descriptor for the hashtable */
+		for (i = 0; i < perhash->numhashGrpCols; i++)
+		{
+			int			varNumber = perhash->hashGrpColIdxInput[i] - 1;
+
+			hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
+			perhash->largestGrpColIdx =
+				Max(varNumber + 1, perhash->largestGrpColIdx);
+		}
+
+		hashDesc = ExecTypeFromTL(hashTlist, false);
+		ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
+
+		list_free(hashTlist);
+		bms_free(colnos);
+	}
+
+	bms_free(base_colnos);
 }
 
 /*
@@ -1840,26 +2011,30 @@ hash_agg_entry_size(int numAggs)
 }
 
 /*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * Find or create a hashtable entry for the tuple group containing the current
+ * tuple (already set in tmpcontext's outertuple slot), in the current grouping
+ * set (which the caller must have selected - note that initialize_aggregate
+ * depends on this).
  *
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static TupleHashEntryData *
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate)
 {
-	TupleTableSlot *hashslot = aggstate->hashslot;
+	TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
+	AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+	TupleTableSlot *hashslot = perhash->hashslot;
 	TupleHashEntryData *entry;
 	bool		isnew;
-	int i;
+	int			i;
 
 	/* transfer just the needed columns into hashslot */
-	slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx);
+	slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
 	ExecClearTuple(hashslot);
 
-	for (i = 0; i < aggstate->numhashGrpCols; i++)
+	for (i = 0; i < perhash->numhashGrpCols; i++)
 	{
-		int			varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+		int			varNumber = perhash->hashGrpColIdxInput[i] - 1;
 
 		hashslot->tts_values[i] = inputslot->tts_values[varNumber];
 		hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
@@ -1867,22 +2042,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 	ExecStoreVirtualTuple(hashslot);
 
 	/* find or create the hashtable entry using the filtered tuple */
-	entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew);
+	entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
 
 	if (isnew)
 	{
 		entry->additional = (AggStatePerGroup)
-			MemoryContextAlloc(aggstate->hashtable->tablecxt,
+			MemoryContextAlloc(perhash->hashtable->tablecxt,
 						  sizeof(AggStatePerGroupData) * aggstate->numtrans);
 		/* initialize aggregates for new tuple group */
 		initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
-							  0);
+							  -1);
 	}
 
 	return entry;
 }
 
 /*
+ * Look up hash entries for the current tuple in all hashed grouping sets,
+ * returning an array of pergroup pointers suitable for advance_aggregates.
+ *
+ * Be aware that lookup_hash_entry can reset the tmpcontext.
+ */
+static AggStatePerGroup *
+lookup_hash_entries(AggState *aggstate)
+{
+	int			numHashes = aggstate->num_hashes;
+	AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+	int			setno;
+
+	for (setno = 0; setno < numHashes; setno++)
+	{
+		select_current_set(aggstate, setno, true);
+		pergroup[setno] = lookup_hash_entry(aggstate)->additional;
+	}
+
+	return pergroup;
+}
+
+/*
  * ExecAgg -
  *
  *	  ExecAgg receives tuples from its outer subplan and aggregates over
@@ -1898,19 +2095,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 TupleTableSlot *
 ExecAgg(AggState *node)
 {
-	TupleTableSlot *result;
+	TupleTableSlot *result = NULL;
 
 	if (!node->agg_done)
 	{
 		/* Dispatch based on strategy */
-		switch (node->phase->aggnode->aggstrategy)
+		switch (node->phase->aggstrategy)
 		{
 			case AGG_HASHED:
 				if (!node->table_filled)
 					agg_fill_hash_table(node);
+				/* FALLTHROUGH */
+			case AGG_MIXED:
 				result = agg_retrieve_hash_table(node);
 				break;
-			default:
+			case AGG_PLAIN:
+			case AGG_SORTED:
 				result = agg_retrieve_direct(node);
 				break;
 		}
@@ -1933,6 +2133,7 @@ agg_retrieve_direct(AggState *aggstate)
 	ExprContext *tmpcontext;
 	AggStatePerAgg peragg;
 	AggStatePerGroup pergroup;
+	AggStatePerGroup *hash_pergroups = NULL;
 	TupleTableSlot *outerslot;
 	TupleTableSlot *firstSlot;
 	TupleTableSlot *result;
@@ -2019,6 +2220,19 @@ agg_retrieve_direct(AggState *aggstate)
 				node = aggstate->phase->aggnode;
 				numReset = numGroupingSets;
 			}
+			else if (aggstate->aggstrategy == AGG_MIXED)
+			{
+				/*
+				 * Mixed mode; we've output all the grouped stuff and have
+				 * full hashtables, so switch to outputting those.
+				 */
+				initialize_phase(aggstate, 0);
+				aggstate->table_filled = true;
+				ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+									   &aggstate->perhash[0].hashiter);
+				select_current_set(aggstate, 0, true);
+				return agg_retrieve_hash_table(aggstate);
+			}
 			else
 			{
 				aggstate->agg_done = true;
@@ -2055,7 +2269,7 @@ agg_retrieve_direct(AggState *aggstate)
 		 *----------
 		 */
 		if (aggstate->input_done ||
-			(node->aggstrategy == AGG_SORTED &&
+			(node->aggstrategy != AGG_PLAIN &&
 			 aggstate->projected_set != -1 &&
 			 aggstate->projected_set < (numGroupingSets - 1) &&
 			 nextSetSize > 0 &&
@@ -2168,10 +2382,22 @@ agg_retrieve_direct(AggState *aggstate)
 				 */
 				for (;;)
 				{
+					/*
+					 * During phase 1 only of a mixed agg, we need to update
+					 * hashtables as well in advance_aggregates.
+					 */
+					if (aggstate->aggstrategy == AGG_MIXED &&
+						aggstate->current_phase == 1)
+					{
+						hash_pergroups = lookup_hash_entries(aggstate);
+					}
+					else
+						hash_pergroups = NULL;
+
 					if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
 						combine_aggregates(aggstate, pergroup);
 					else
-						advance_aggregates(aggstate, pergroup);
+						advance_aggregates(aggstate, pergroup, hash_pergroups);
 
 					/* Reset per-input-tuple context after each tuple */
 					ResetExprContext(tmpcontext);
@@ -2198,7 +2424,7 @@ agg_retrieve_direct(AggState *aggstate)
 					 * If we are grouping, check whether we've crossed a group
 					 * boundary.
 					 */
-					if (node->aggstrategy == AGG_SORTED)
+					if (node->aggstrategy != AGG_PLAIN)
 					{
 						if (!execTuplesMatch(firstSlot,
 											 outerslot,
@@ -2231,7 +2457,11 @@ agg_retrieve_direct(AggState *aggstate)
 
 		prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet);
 
-		finalize_aggregates(aggstate, peragg, pergroup, currentSet);
+		select_current_set(aggstate, currentSet, false);
+
+		finalize_aggregates(aggstate,
+							peragg,
+							pergroup + (currentSet * aggstate->numtrans));
 
 		/*
 		 * If there's no row to project right now, we must continue rather
@@ -2247,21 +2477,13 @@ agg_retrieve_direct(AggState *aggstate)
 }
 
 /*
- * ExecAgg for hashed case: phase 1, read input and build hash table
+ * ExecAgg for hashed case: read input and build hash table
  */
 static void
 agg_fill_hash_table(AggState *aggstate)
 {
-	ExprContext *tmpcontext;
-	TupleHashEntryData *entry;
 	TupleTableSlot *outerslot;
-
-	/*
-	 * get state info from node
-	 *
-	 * tmpcontext is the per-input-tuple expression context
-	 */
-	tmpcontext = aggstate->tmpcontext;
+	ExprContext *tmpcontext = aggstate->tmpcontext;
 
 	/*
 	 * Process each outer-plan tuple, and then fetch the next one, until we
@@ -2269,32 +2491,40 @@ agg_fill_hash_table(AggState *aggstate)
 	 */
 	for (;;)
 	{
+		AggStatePerGroup *pergroups;
+
 		outerslot = fetch_input_tuple(aggstate);
 		if (TupIsNull(outerslot))
 			break;
-		/* set up for advance_aggregates call */
+
+		/* set up for lookup_hash_entries and advance_aggregates */
 		tmpcontext->ecxt_outertuple = outerslot;
 
-		/* Find or build hashtable entry for this tuple's group */
-		entry = lookup_hash_entry(aggstate, outerslot);
+		/* Find or build hashtable entries */
+		pergroups = lookup_hash_entries(aggstate);
 
 		/* Advance the aggregates */
 		if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
-			combine_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+			combine_aggregates(aggstate, pergroups[0]);
 		else
-			advance_aggregates(aggstate, (AggStatePerGroup) entry->additional);
+			advance_aggregates(aggstate, NULL, pergroups);
 
-		/* Reset per-input-tuple context after each tuple */
-		ResetExprContext(tmpcontext);
+		/*
+		 * Reset per-input-tuple context after each tuple, but note that the
+		 * hash lookups do this too
+		 */
+		ResetExprContext(aggstate->tmpcontext);
 	}
 
 	aggstate->table_filled = true;
-	/* Initialize to walk the hash table */
-	ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+	/* Initialize to walk the first hash table */
+	select_current_set(aggstate, 0, true);
+	ResetTupleHashIterator(aggstate->perhash[0].hashtable,
+						   &aggstate->perhash[0].hashiter);
 }
 
 /*
- * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ * ExecAgg for hashed case: retrieving groups from hash table
  */
 static TupleTableSlot *
 agg_retrieve_hash_table(AggState *aggstate)
@@ -2305,17 +2535,22 @@ agg_retrieve_hash_table(AggState *aggstate)
 	TupleHashEntryData *entry;
 	TupleTableSlot *firstSlot;
 	TupleTableSlot *result;
-	TupleTableSlot *hashslot;
+	AggStatePerHash perhash;
 
 	/*
-	 * get state info from node
+	 * get state info from node.
+	 *
+	 * econtext is the per-output-tuple expression context.
 	 */
-	/* econtext is the per-output-tuple expression context */
 	econtext = aggstate->ss.ps.ps_ExprContext;
 	peragg = aggstate->peragg;
 	firstSlot = aggstate->ss.ss_ScanTupleSlot;
-	hashslot = aggstate->hashslot;
 
+	/*
+	 * Note that perhash (and therefore anything accessed through it) can
+	 * change inside the loop, as we change between grouping sets.
+	 */
+	perhash = &aggstate->perhash[aggstate->current_set];
 
 	/*
 	 * We loop retrieving groups until we find one satisfying
@@ -2323,17 +2558,37 @@ agg_retrieve_hash_table(AggState *aggstate)
 	 */
 	while (!aggstate->agg_done)
 	{
-		int i;
+		TupleTableSlot *hashslot = perhash->hashslot;
+		int			i;
 
 		/*
 		 * Find the next entry in the hash table
 		 */
-		entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter);
+		entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
 		if (entry == NULL)
 		{
-			/* No more entries in hashtable, so done */
-			aggstate->agg_done = TRUE;
-			return NULL;
+			int			nextset = aggstate->current_set + 1;
+
+			if (nextset < aggstate->num_hashes)
+			{
+				/*
+				 * Switch to next grouping set, reinitialize, and restart the
+				 * loop.
+				 */
+				select_current_set(aggstate, nextset, true);
+
+				perhash = &aggstate->perhash[aggstate->current_set];
+
+				ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
+
+				continue;
+			}
+			else
+			{
+				/* No more hashtables, so done */
+				aggstate->agg_done = TRUE;
+				return NULL;
+			}
 		}
 
 		/*
@@ -2356,9 +2611,9 @@ agg_retrieve_hash_table(AggState *aggstate)
 		memset(firstSlot->tts_isnull, true,
 			   firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
 
-		for (i = 0; i < aggstate->numhashGrpCols; i++)
+		for (i = 0; i < perhash->numhashGrpCols; i++)
 		{
-			int			varNumber = aggstate->hashGrpColIdxInput[i] - 1;
+			int			varNumber = perhash->hashGrpColIdxInput[i] - 1;
 
 			firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
 			firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
@@ -2367,14 +2622,18 @@ agg_retrieve_hash_table(AggState *aggstate)
 
 		pergroup = (AggStatePerGroup) entry->additional;
 
-		finalize_aggregates(aggstate, peragg, pergroup, 0);
-
 		/*
 		 * Use the representative input tuple for any references to
 		 * non-aggregated input columns in the qual and tlist.
 		 */
 		econtext->ecxt_outertuple = firstSlot;
 
+		prepare_projection_slot(aggstate,
+								econtext->ecxt_outertuple,
+								aggstate->current_set);
+
+		finalize_aggregates(aggstate, peragg, pergroup);
+
 		result = project_aggregates(aggstate);
 		if (result)
 			return result;
@@ -2388,7 +2647,8 @@ agg_retrieve_hash_table(AggState *aggstate)
  * ExecInitAgg
  *
  *	Creates the run-time information for the agg node produced by the
- *	planner and initializes its outer subtree
+ *	planner and initializes its outer subtree.
+ *
  * -----------------
  */
 AggState *
@@ -2403,14 +2663,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 				transno,
 				aggno;
 	int			phase;
+	int			phaseidx;
 	List	   *combined_inputeval;
 	ListCell   *l;
 	Bitmapset  *all_grouped_cols = NULL;
 	int			numGroupingSets = 1;
 	int			numPhases;
+	int			numHashes;
 	int			column_offset;
 	int			i = 0;
 	int			j = 0;
+	bool		use_hashing = (node->aggstrategy == AGG_HASHED ||
+							   node->aggstrategy == AGG_MIXED);
 
 	/* check for unsupported flags */
 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2425,9 +2689,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->aggs = NIL;
 	aggstate->numaggs = 0;
 	aggstate->numtrans = 0;
+	aggstate->aggstrategy = node->aggstrategy;
 	aggstate->aggsplit = node->aggsplit;
 	aggstate->maxsets = 0;
-	aggstate->hashfunctions = NULL;
 	aggstate->projected_set = -1;
 	aggstate->current_set = 0;
 	aggstate->peragg = NULL;
@@ -2437,18 +2701,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->agg_done = false;
 	aggstate->pergroup = NULL;
 	aggstate->grp_firstTuple = NULL;
-	aggstate->hashtable = NULL;
 	aggstate->sort_in = NULL;
 	aggstate->sort_out = NULL;
 
 	/*
+	 * phases[0] always exists, but is dummy in sorted/plain mode
+	 */
+	numPhases = (use_hashing ? 1 : 2);
+	numHashes = (use_hashing ? 1 : 0);
+
+	/*
 	 * Calculate the maximum number of grouping sets in any phase; this
-	 * determines the size of some allocations.
+	 * determines the size of some allocations.  Also calculate the number of
+	 * phases, since all hashed/mixed nodes contribute to only a single phase.
 	 */
 	if (node->groupingSets)
 	{
-		Assert(node->aggstrategy != AGG_HASHED);
-
 		numGroupingSets = list_length(node->groupingSets);
 
 		foreach(l, node->chain)
@@ -2457,22 +2725,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 			numGroupingSets = Max(numGroupingSets,
 								  list_length(agg->groupingSets));
+
+			/*
+			 * additional AGG_HASHED aggs become part of phase 0, but all
+			 * others add an extra phase.
+			 */
+			if (agg->aggstrategy != AGG_HASHED)
+				++numPhases;
+			else
+				++numHashes;
 		}
 	}
 
 	aggstate->maxsets = numGroupingSets;
-	aggstate->numphases = numPhases = 1 + list_length(node->chain);
+	aggstate->numphases = numPhases;
 
 	aggstate->aggcontexts = (ExprContext **)
 		palloc0(sizeof(ExprContext *) * numGroupingSets);
 
 	/*
 	 * Create expression contexts.  We need three or more, one for
-	 * per-input-tuple processing, one for per-output-tuple processing, and
-	 * one for each grouping set.  The per-tuple memory context of the
-	 * per-grouping-set ExprContexts (aggcontexts) replaces the standalone
-	 * memory context formerly used to hold transition values.  We cheat a
-	 * little by using ExecAssignExprContext() to build all of them.
+	 * per-input-tuple processing, one for per-output-tuple processing, one
+	 * for all the hashtables, and one for each grouping set.  The per-tuple
+	 * memory context of the per-grouping-set ExprContexts (aggcontexts)
+	 * replaces the standalone memory context formerly used to hold transition
+	 * values.  We cheat a little by using ExecAssignExprContext() to build
+	 * all of them.
 	 *
 	 * NOTE: the details of what is stored in aggcontexts and what is stored
 	 * in the regular per-query memory context are driven by a simple
@@ -2488,14 +2766,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
 	}
 
+	if (use_hashing)
+	{
+		ExecAssignExprContext(estate, &aggstate->ss.ps);
+		aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+	}
+
 	ExecAssignExprContext(estate, &aggstate->ss.ps);
 
 	/*
-	 * tuple table initialization
+	 * tuple table initialization.
+	 *
+	 * For hashtables, we create some additional slots below.
 	 */
 	ExecInitScanTupleSlot(estate, &aggstate->ss);
 	ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
-	aggstate->hashslot = ExecInitExtraTupleSlot(estate);
 	aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
 
 	/*
@@ -2559,19 +2844,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 * For each phase, prepare grouping set data and fmgr lookup data for
 	 * compare functions.  Accumulate all_grouped_cols in passing.
 	 */
-
 	aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
 
-	for (phase = 0; phase < numPhases; ++phase)
+	aggstate->num_hashes = numHashes;
+	if (numHashes)
+	{
+		aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+		aggstate->phases[0].numsets = 0;
+		aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
+		aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
+	}
+
+	phase = 0;
+	for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
 	{
-		AggStatePerPhase phasedata = &aggstate->phases[phase];
 		Agg		   *aggnode;
 		Sort	   *sortnode;
-		int			num_sets;
 
-		if (phase > 0)
+		if (phaseidx > 0)
 		{
-			aggnode = castNode(Agg, list_nth(node->chain, phase - 1));
+			aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1));
 			sortnode = castNode(Sort, aggnode->plan.lefttree);
 		}
 		else
@@ -2580,53 +2872,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			sortnode = NULL;
 		}
 
-		phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+		Assert(phase <= 1 || sortnode);
 
-		if (num_sets)
+		if (aggnode->aggstrategy == AGG_HASHED
+			|| aggnode->aggstrategy == AGG_MIXED)
 		{
-			phasedata->gset_lengths = palloc(num_sets * sizeof(int));
-			phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+			AggStatePerPhase phasedata = &aggstate->phases[0];
+			AggStatePerHash perhash;
+			Bitmapset  *cols = NULL;
 
-			i = 0;
-			foreach(l, aggnode->groupingSets)
-			{
-				int			current_length = list_length(lfirst(l));
-				Bitmapset  *cols = NULL;
+			Assert(phase == 0);
+			i = phasedata->numsets++;
+			perhash = &aggstate->perhash[i];
 
-				/* planner forces this to be correct */
-				for (j = 0; j < current_length; ++j)
-					cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+			/* phase 0 always points to the "real" Agg in the hash case */
+			phasedata->aggnode = node;
+			phasedata->aggstrategy = node->aggstrategy;
 
-				phasedata->grouped_cols[i] = cols;
-				phasedata->gset_lengths[i] = current_length;
-				++i;
-			}
+			/* but the actual Agg node representing this hash is saved here */
+			perhash->aggnode = aggnode;
+
+			phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
+
+			for (j = 0; j < aggnode->numCols; ++j)
+				cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+			phasedata->grouped_cols[i] = cols;
 
-			all_grouped_cols = bms_add_members(all_grouped_cols,
-											   phasedata->grouped_cols[0]);
+			all_grouped_cols = bms_add_members(all_grouped_cols, cols);
+			continue;
 		}
 		else
 		{
-			Assert(phase == 0);
+			AggStatePerPhase phasedata = &aggstate->phases[++phase];
+			int			num_sets;
 
-			phasedata->gset_lengths = NULL;
-			phasedata->grouped_cols = NULL;
-		}
+			phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
 
-		/*
-		 * If we are grouping, precompute fmgr lookup data for inner loop.
-		 */
-		if (aggnode->aggstrategy == AGG_SORTED)
-		{
-			Assert(aggnode->numCols > 0);
+			if (num_sets)
+			{
+				phasedata->gset_lengths = palloc(num_sets * sizeof(int));
+				phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
 
-			phasedata->eqfunctions =
-				execTuplesMatchPrepare(aggnode->numCols,
-									   aggnode->grpOperators);
-		}
+				i = 0;
+				foreach(l, aggnode->groupingSets)
+				{
+					int			current_length = list_length(lfirst(l));
+					Bitmapset  *cols = NULL;
+
+					/* planner forces this to be correct */
+					for (j = 0; j < current_length; ++j)
+						cols = bms_add_member(cols, aggnode->grpColIdx[j]);
+
+					phasedata->grouped_cols[i] = cols;
+					phasedata->gset_lengths[i] = current_length;
+
+					++i;
+				}
+
+				all_grouped_cols = bms_add_members(all_grouped_cols,
+												 phasedata->grouped_cols[0]);
+			}
+			else
+			{
+				Assert(phaseidx == 0);
+
+				phasedata->gset_lengths = NULL;
+				phasedata->grouped_cols = NULL;
+			}
+
+			/*
+			 * If we are grouping, precompute fmgr lookup data for inner loop.
+			 */
+			if (aggnode->aggstrategy == AGG_SORTED)
+			{
+				Assert(aggnode->numCols > 0);
+
+				phasedata->eqfunctions =
+					execTuplesMatchPrepare(aggnode->numCols,
+										   aggnode->grpOperators);
+			}
 
-		phasedata->aggnode = aggnode;
-		phasedata->sortnode = sortnode;
+			phasedata->aggnode = aggnode;
+			phasedata->aggstrategy = aggnode->aggstrategy;
+			phasedata->sortnode = sortnode;
+		}
 	}
 
 	/*
@@ -2637,13 +2967,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
 
 	/*
-	 * Initialize current phase-dependent values to initial phase
-	 */
-
-	aggstate->current_phase = 0;
-	initialize_phase(aggstate, 0);
-
-	/*
 	 * Set up aggregate-result storage in the output expr context, and also
 	 * allocate my private per-agg working storage
 	 */
@@ -2657,23 +2980,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->peragg = peraggs;
 	aggstate->pertrans = pertransstates;
 
-
 	/*
 	 * Hashing can only appear in the initial phase.
 	 */
-	if (node->aggstrategy == AGG_HASHED)
+	if (use_hashing)
 	{
-		find_hash_columns(aggstate);
+		for (i = 0; i < numHashes; ++i)
+		{
+			aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
+
+			execTuplesHashPrepare(aggstate->perhash[i].numCols,
+								  aggstate->perhash[i].aggnode->grpOperators,
+								  &aggstate->perhash[i].eqfunctions,
+								  &aggstate->perhash[i].hashfunctions);
+		}
 
-		execTuplesHashPrepare(node->numCols,
-							  node->grpOperators,
-							  &aggstate->phases[0].eqfunctions,
-							  &aggstate->hashfunctions);
+		/* this is an array of pointers, not structures */
+		aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
 
+		find_hash_columns(aggstate);
 		build_hash_table(aggstate);
 		aggstate->table_filled = false;
 	}
-	else
+
+	if (node->aggstrategy != AGG_HASHED)
 	{
 		AggStatePerGroup pergroup;
 
@@ -2684,6 +3014,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		aggstate->pergroup = pergroup;
 	}
 
+	/*
+	 * Initialize current phase-dependent values to initial phase. The initial
+	 * phase is 1 (first sort pass) for all strategies that use sorting (if
+	 * hashing is being done too, then phase 0 is processed last); but if only
+	 * hashing is being done, then phase 0 is all there is.
+	 */
+	if (node->aggstrategy == AGG_HASHED)
+	{
+		aggstate->current_phase = 0;
+		initialize_phase(aggstate, 0);
+		select_current_set(aggstate, 0, true);
+	}
+	else
+	{
+		aggstate->current_phase = 1;
+		initialize_phase(aggstate, 1);
+		select_current_set(aggstate, 0, false);
+	}
+
 	/* -----------------
 	 * Perform lookups of aggregate function info, and initialize the
 	 * unchanging fields of the per-agg and per-trans data.
@@ -3261,7 +3610,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
 		 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
 		 * (yet)
 		 */
-		Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
+		Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
 
 		/* If we have only one input, we need its len/byval info. */
 		if (numInputs == 1)
@@ -3510,6 +3859,8 @@ ExecEndAgg(AggState *node)
 	/* And ensure any agg shutdown callbacks have been called */
 	for (setno = 0; setno < numGroupingSets; setno++)
 		ReScanExprContext(node->aggcontexts[setno]);
+	if (node->hashcontext)
+		ReScanExprContext(node->hashcontext);
 
 	/*
 	 * We don't actually free any ExprContexts here (see comment in
@@ -3537,7 +3888,7 @@ ExecReScanAgg(AggState *node)
 
 	node->agg_done = false;
 
-	if (aggnode->aggstrategy == AGG_HASHED)
+	if (node->aggstrategy == AGG_HASHED)
 	{
 		/*
 		 * In the hashed case, if we haven't yet built the hash table then we
@@ -3557,7 +3908,9 @@ ExecReScanAgg(AggState *node)
 		if (outerPlan->chgParam == NULL &&
 			!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
 		{
-			ResetTupleHashIterator(node->hashtable, &node->hashiter);
+			ResetTupleHashIterator(node->perhash[0].hashtable,
+								   &node->perhash[0].hashiter);
+			select_current_set(node, 0, true);
 			return;
 		}
 	}
@@ -3582,11 +3935,7 @@ ExecReScanAgg(AggState *node)
 	 * ExecReScan already did it. But we do need to reset our per-grouping-set
 	 * contexts, which may have transvalues stored in them. (We use rescan
 	 * rather than just reset because transfns may have registered callbacks
-	 * that need to be run now.)
-	 *
-	 * Note that with AGG_HASHED, the hash table is allocated in a sub-context
-	 * of the aggcontext. This used to be an issue, but now, resetting a
-	 * context automatically deletes sub-contexts too.
+	 * that need to be run now.) For the AGG_HASHED case, see below.
 	 */
 
 	for (setno = 0; setno < numGroupingSets; setno++)
@@ -3606,13 +3955,21 @@ ExecReScanAgg(AggState *node)
 	MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
 	MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
 
-	if (aggnode->aggstrategy == AGG_HASHED)
+	/*
+	 * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
+	 * the hashcontext. This used to be an issue, but now, resetting a context
+	 * automatically deletes sub-contexts too.
+	 */
+	if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
 	{
+		ReScanExprContext(node->hashcontext);
 		/* Rebuild an empty hash table */
 		build_hash_table(node);
 		node->table_filled = false;
+		/* iterator will be reset when the table is filled */
 	}
-	else
+
+	if (node->aggstrategy != AGG_HASHED)
 	{
 		/*
 		 * Reset the per-group state (in particular, mark transvalues null)
@@ -3620,8 +3977,8 @@ ExecReScanAgg(AggState *node)
 		MemSet(node->pergroup, 0,
 			 sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
 
-		/* reset to phase 0 */
-		initialize_phase(node, 0);
+		/* reset to phase 1 */
+		initialize_phase(node, 1);
 
 		node->input_done = false;
 		node->projected_set = -1;
@@ -3662,7 +4019,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
 		if (aggcontext)
 		{
 			AggState   *aggstate = ((AggState *) fcinfo->context);
-			ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+			ExprContext *cxt = aggstate->curaggcontext;
 
 			*aggcontext = cxt->ecxt_per_tuple_memory;
 		}
@@ -3751,7 +4108,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
 	if (fcinfo->context && IsA(fcinfo->context, AggState))
 	{
 		AggState   *aggstate = (AggState *) fcinfo->context;
-		ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set];
+		ExprContext *cxt = aggstate->curaggcontext;
 
 		RegisterExprContextCallback(cxt, func, arg);