aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeAgg.c
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2020-03-23 13:56:28 -0700
committerJeff Davis <jdavis@postgresql.org>2020-03-23 15:43:07 -0700
commit64fe6022790920ef6edee475bfa162a961f148d8 (patch)
tree74feb997972baf2ce5cf8b2f80ff6eaa912ac755 /src/backend/executor/nodeAgg.c
parent0830d21f5b01064837dc8bd910ab31a5b7a1101a (diff)
downloadpostgresql-64fe6022790920ef6edee475bfa162a961f148d8.tar.gz
postgresql-64fe6022790920ef6edee475bfa162a961f148d8.zip
Fixes for Disk-based Hash Aggregation.
Justin Pryzby raised a couple issues with commit 1f39bce0. Fixed. Also, tweak the way the size of a hash entry is estimated and the number of buckets is estimated when calling BuildTupleHashTableExt(). Discussion: https://www.postgresql.org/message-id/20200319064222.GR26184@telsasoft.com
Diffstat (limited to 'src/backend/executor/nodeAgg.c')
-rw-r--r--src/backend/executor/nodeAgg.c19
1 files changed, 7 insertions, 12 deletions
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 44c159ab2a3..fbc0480fc64 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1873,17 +1873,12 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
aggstate->hash_disk_used = disk_used;
}
- /*
- * Update hashentrysize estimate based on contents. Don't include meta_mem
- * in the memory used, because empty buckets would inflate the per-entry
- * cost. An underestimate of the per-entry size is better than an
- * overestimate, because an overestimate could compound with each level of
- * recursion.
- */
+ /* update hashentrysize estimate based on contents */
if (aggstate->hash_ngroups_current > 0)
{
aggstate->hashentrysize =
- hash_mem / (double)aggstate->hash_ngroups_current;
+ sizeof(TupleHashEntryData) +
+ (hash_mem / (double)aggstate->hash_ngroups_current);
}
}
@@ -1899,10 +1894,10 @@ hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
max_nbuckets = memory / hashentrysize;
/*
- * Leave room for slop to avoid a case where the initial hash table size
- * exceeds the memory limit (though that may still happen in edge cases).
+ * Underestimating is better than overestimating. Too many buckets crowd
+ * out space for group keys and transition state values.
*/
- max_nbuckets *= 0.75;
+ max_nbuckets >>= 1;
if (nbuckets > max_nbuckets)
nbuckets = max_nbuckets;
@@ -3548,7 +3543,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* reasonable.
*/
for (i = 0; i < aggstate->num_hashes; i++)
- totalGroups = aggstate->perhash[i].aggnode->numGroups;
+ totalGroups += aggstate->perhash[i].aggnode->numGroups;
hash_agg_set_limits(aggstate->hashentrysize, totalGroups, 0,
&aggstate->hash_mem_limit,