Disk-based Hash Aggregation.

While performing hash aggregation, track memory usage when adding new groups to a hash table. If the memory usage exceeds work_mem, enter "spill mode". In spill mode, new groups are not created in the hash table(s), but existing groups continue to be advanced if input tuples match. Tuples that would cause a new group to be created are instead spilled to a logical tape to be processed later. The tuples are spilled in a partitioned fashion. When all tuples from the outer plan are processed (either by advancing the group or spilling the tuple), finalize and emit the groups from the hash table. Then, create new batches of work from the spilled partitions, and select one of the saved batches and process it (possibly spilling recursively). Author: Jeff Davis Reviewed-by: Tomas Vondra, Adam Lee, Justin Pryzby, Taylor Vesely, Melanie Plageman Discussion: https://postgr.es/m/507ac540ec7c20136364b5272acbcd4574aa76ef.camel@j-davis.com
author: Jeff Davis <jdavis@postgresql.org> 2020-03-18 15:42:02 -0700
committer: Jeff Davis <jdavis@postgresql.org> 2020-03-18 15:42:02 -0700
commit: 1f39bce021540fde00990af55b4432c55ef4b3c7 (patch)
tree: c2403fb61234d93408b23350a82ad429b3625af3 /src/include
parent: e00912e11a9ec2d29274ed8a6465e81385906dc2 (diff)
download: postgresql-1f39bce021540fde00990af55b4432c55ef4b3c7.tar.gz
postgresql-1f39bce021540fde00990af55b4432c55ef4b3c7.zip
3 files changed, 32 insertions, 2 deletions
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 264916f9a92..a5b8a004d1e 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -280,6 +280,11 @@ typedef struct AggStatePerPhaseData
 	Sort	   *sortnode;		/* Sort node for input ordering for phase */
 
 	ExprState  *evaltrans;		/* evaluation of transition functions  */
+
+	/* cached variants of the compiled expression */
+	ExprState  *evaltrans_cache
+				[2]		/* 0: outerops; 1: TTSOpsMinimalTuple */
+				[2];	/* 0: no NULL check; 1: with NULL check */
 }			AggStatePerPhaseData;
 
 /*
@@ -311,5 +316,8 @@ extern void ExecReScanAgg(AggState *node);
 
 extern Size hash_agg_entry_size(int numAggs, Size tupleWidth,
 								Size transitionSpace);
+extern void hash_agg_set_limits(double hashentrysize, uint64 input_groups,
+								int used_bits, Size *mem_limit,
+								uint64 *ngroups_limit, int *num_partitions);
 
 #endif							/* NODEAGG_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index cd3ddf781f1..3d27d50f090 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2079,12 +2079,32 @@ typedef struct AggState
 	/* these fields are used in AGG_HASHED and AGG_MIXED modes: */
 	bool		table_filled;	/* hash table filled yet? */
 	int			num_hashes;
+	MemoryContext	hash_metacxt;	/* memory for hash table itself */
+	struct HashTapeInfo *hash_tapeinfo; /* metadata for spill tapes */
+	struct HashAggSpill *hash_spills; /* HashAggSpill for each grouping set,
+										 exists only during first pass */
+	TupleTableSlot *hash_spill_slot; /* slot for reading from spill files */
+	List	   *hash_batches;	/* hash batches remaining to be processed */
+	bool		hash_ever_spilled;	/* ever spilled during this execution? */
+	bool		hash_spill_mode;	/* we hit a limit during the current batch
+									   and we must not create new groups */
+	Size		hash_mem_limit;	/* limit before spilling hash table */
+	uint64		hash_ngroups_limit;	/* limit before spilling hash table */
+	int			hash_planned_partitions; /* number of partitions planned
+											for first pass */
+	double		hashentrysize;	/* estimate revised during execution */
+	Size		hash_mem_peak;	/* peak hash table memory usage */
+	uint64		hash_ngroups_current;	/* number of groups currently in
+										   memory in all hash tables */
+	uint64		hash_disk_used; /* kB of disk space used */
+	int			hash_batches_used;	/* batches used during entire execution */
+
 	AggStatePerHash perhash;	/* array of per-hashtable data */
 	AggStatePerGroup *hash_pergroup;	/* grouping set indexed array of
 										 * per-group pointers */
 
 	/* support for evaluation of agg input expressions: */
-#define FIELDNO_AGGSTATE_ALL_PERGROUPS 34
+#define FIELDNO_AGGSTATE_ALL_PERGROUPS 49
 	AggStatePerGroup *all_pergroups;	/* array of first ->pergroups, than
 										 * ->hash_pergroup */
 	ProjectionInfo *combinedproj;	/* projection machinery */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index cb012ba1980..735ba096503 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -54,6 +54,8 @@ extern PGDLLIMPORT bool enable_bitmapscan;
 extern PGDLLIMPORT bool enable_tidscan;
 extern PGDLLIMPORT bool enable_sort;
 extern PGDLLIMPORT bool enable_hashagg;
+extern PGDLLIMPORT bool enable_hashagg_disk;
+extern PGDLLIMPORT bool enable_groupingsets_hash_disk;
 extern PGDLLIMPORT bool enable_nestloop;
 extern PGDLLIMPORT bool enable_material;
 extern PGDLLIMPORT bool enable_mergejoin;
@@ -114,7 +116,7 @@ extern void cost_agg(Path *path, PlannerInfo *root,
 					 int numGroupCols, double numGroups,
 					 List *quals,
 					 Cost input_startup_cost, Cost input_total_cost,
-					 double input_tuples);
+					 double input_tuples, double input_width);
 extern void cost_windowagg(Path *path, PlannerInfo *root,
 						   List *windowFuncs, int numPartCols, int numOrderCols,
 						   Cost input_startup_cost, Cost input_total_cost,
author	Jeff Davis <jdavis@postgresql.org>	2020-03-18 15:42:02 -0700
committer	Jeff Davis <jdavis@postgresql.org>	2020-03-18 15:42:02 -0700
commit	1f39bce021540fde00990af55b4432c55ef4b3c7 (patch)
tree	c2403fb61234d93408b23350a82ad429b3625af3 /src/include
parent	e00912e11a9ec2d29274ed8a6465e81385906dc2 (diff)
download	postgresql-1f39bce021540fde00990af55b4432c55ef4b3c7.tar.gz postgresql-1f39bce021540fde00990af55b4432c55ef4b3c7.zip