Get rid of artificial restriction on hash table sizes on Windows.

The point of introducing the hash_mem_multiplier GUC was to let users reproduce the old behavior of hash aggregation, i.e. that it could use more than work_mem at need. However, the implementation failed to get the job done on Win64, where work_mem is clamped to 2GB to protect various places that calculate memory sizes using "long int". As written, the same clamp was applied to hash_mem. This resulted in severe performance regressions for queries requiring a bit more than 2GB for hash aggregation, as they now spill to disk and there's no way to stop that. Getting rid of the work_mem restriction seems like a good idea, but it's a big job and could not conceivably be back-patched. However, there's only a fairly small number of places that are concerned with the hash_mem value, and it turns out to be possible to remove the restriction there without too much code churn or any ABI breaks. So, let's do that for now to fix the regression, and leave the larger task for another day. This patch does introduce a bit more infrastructure that should help with the larger task, namely pg_bitutils.h support for working with size_t values. Per gripe from Laurent Hasson. Back-patch to v13 where the behavior change came in. Discussion: https://postgr.es/m/997817.1627074924@sss.pgh.pa.us Discussion: https://postgr.es/m/MN2PR15MB25601E80A9B6D1BA6F592B1985E39@MN2PR15MB2560.namprd15.prod.outlook.com
author: Tom Lane <tgl@sss.pgh.pa.us> 2021-07-25 14:02:27 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2021-07-25 14:02:27 -0400
commit: b154ee63bb659ce280d486db6bbbe77ddec105c5 (patch)
tree: 666939cf45003f052b415ca6526bea56a174a439 /src/backend/optimizer
parent: 3d0a4636aa4c976e971c05c77e162fc70c61f40b (diff)
download: postgresql-b154ee63bb659ce280d486db6bbbe77ddec105c5.tar.gz
postgresql-b154ee63bb659ce280d486db6bbbe77ddec105c5.zip
5 files changed, 11 insertions, 19 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b54cf34a8e1..30c8595f761 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2438,7 +2438,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	Cost		total_cost;
 
 	/* available cache space */
-	hash_mem_bytes = get_hash_mem() * 1024L;
+	hash_mem_bytes = get_hash_memory_limit();
 
 	/*
 	 * Set the number of bytes each cache entry should consume in the cache.
@@ -3860,7 +3860,6 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 	Cost		run_cost = workspace->run_cost;
 	int			numbuckets = workspace->numbuckets;
 	int			numbatches = workspace->numbatches;
-	int			hash_mem;
 	Cost		cpu_per_tuple;
 	QualCost	hash_qual_cost;
 	QualCost	qp_qual_cost;
@@ -3986,10 +3985,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 	 * that way, so it will be unable to drive the batch size below hash_mem
 	 * when this is true.)
 	 */
-	hash_mem = get_hash_mem();
 	if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
-						   inner_path->pathtarget->width) >
-		(hash_mem * 1024L))
+						   inner_path->pathtarget->width) > get_hash_memory_limit())
 		startup_cost += disable_cost;
 
 	/*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1868c4eff47..86816ffe19d 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3668,7 +3668,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 							double dNumGroups)
 {
 	Query	   *parse = root->parse;
-	int			hash_mem = get_hash_mem();
+	Size		hash_mem_limit = get_hash_memory_limit();
 
 	/*
 	 * If we're not being offered sorted input, then only consider plans that
@@ -3734,7 +3734,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 		 * with.  Override hash_mem in that case; otherwise, we'll rely on the
 		 * sorted-input case to generate usable mixed paths.
 		 */
-		if (hashsize > hash_mem * 1024L && gd->rollups)
+		if (hashsize > hash_mem_limit && gd->rollups)
 			return;				/* nope, won't fit */
 
 		/*
@@ -3853,7 +3853,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 	{
 		List	   *rollups = NIL;
 		List	   *hash_sets = list_copy(gd->unsortable_sets);
-		double		availspace = (hash_mem * 1024.0);
+		double		availspace = hash_mem_limit;
 		ListCell   *lc;
 
 		/*
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index b5a61f39335..c9f7a09d102 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -724,7 +724,6 @@ static bool
 subplan_is_hashable(Plan *plan)
 {
 	double		subquery_size;
-	int			hash_mem = get_hash_mem();
 
 	/*
 	 * The estimated size of the subquery result must fit in hash_mem. (Note:
@@ -734,7 +733,7 @@ subplan_is_hashable(Plan *plan)
 	 */
 	subquery_size = plan->plan_rows *
 		(MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader));
-	if (subquery_size > hash_mem * 1024L)
+	if (subquery_size > get_hash_memory_limit())
 		return false;
 
 	return true;
@@ -749,7 +748,6 @@ static bool
 subpath_is_hashable(Path *path)
 {
 	double		subquery_size;
-	int			hash_mem = get_hash_mem();
 
 	/*
 	 * The estimated size of the subquery result must fit in hash_mem. (Note:
@@ -759,7 +757,7 @@ subpath_is_hashable(Path *path)
 	 */
 	subquery_size = path->rows *
 		(MAXALIGN(path->pathtarget->width) + MAXALIGN(SizeofHeapTupleHeader));
-	if (subquery_size > hash_mem * 1024L)
+	if (subquery_size > get_hash_memory_limit())
 		return false;
 
 	return true;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 037dfaacfd4..e9256a2d4d2 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -1019,7 +1019,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 					const char *construct)
 {
 	int			numGroupCols = list_length(groupClauses);
-	int			hash_mem = get_hash_mem();
+	Size		hash_mem_limit = get_hash_memory_limit();
 	bool		can_sort;
 	bool		can_hash;
 	Size		hashentrysize;
@@ -1055,13 +1055,11 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	 */
 	hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
 
-	if (hashentrysize * dNumGroups > hash_mem * 1024L)
+	if (hashentrysize * dNumGroups > hash_mem_limit)
 		return false;
 
 	/*
-	 * See if the estimated cost is no more than doing it the other way.  We
-	 * deliberately give the hash case more memory when hash_mem exceeds
-	 * standard work mem (i.e. when hash_mem_multiplier exceeds 1.0).
+	 * See if the estimated cost is no more than doing it the other way.
 	 *
 	 * We need to consider input_plan + hashagg versus input_plan + sort +
 	 * group.  Note that the actual result plan might involve a SetOp or
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 0c94cbe767a..41cbf328c46 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1794,9 +1794,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		 * planner.c).
 		 */
 		int			hashentrysize = subpath->pathtarget->width + 64;
-		int			hash_mem = get_hash_mem();
 
-		if (hashentrysize * pathnode->path.rows > hash_mem * 1024L)
+		if (hashentrysize * pathnode->path.rows > get_hash_memory_limit())
 		{
 			/*
 			 * We should not try to hash.  Hack the SpecialJoinInfo to
author	Tom Lane <tgl@sss.pgh.pa.us>	2021-07-25 14:02:27 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2021-07-25 14:02:27 -0400
commit	b154ee63bb659ce280d486db6bbbe77ddec105c5 (patch)
tree	666939cf45003f052b415ca6526bea56a174a439 /src/backend/optimizer
parent	3d0a4636aa4c976e971c05c77e162fc70c61f40b (diff)
download	postgresql-b154ee63bb659ce280d486db6bbbe77ddec105c5.tar.gz postgresql-b154ee63bb659ce280d486db6bbbe77ddec105c5.zip