diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-12-29 22:29:03 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-12-29 22:29:03 +0000 |
commit | 629df5f4895048249806170765358089708e7e83 (patch) | |
tree | 7f7f2652e3cb8cdfd5ba5e2835b84c57aea2127f /src | |
parent | b37d6373f0b2fc9f9779b2722f3a6095645cc9d3 (diff) | |
download | postgresql-629df5f4895048249806170765358089708e7e83.tar.gz postgresql-629df5f4895048249806170765358089708e7e83.zip |
Adjust hash table sizing algorithm to avoid integer overflow in
ExecHashJoinGetBatch(). Fixes core dump on large hash joins, as in
example from Rae Stiening.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/executor/nodeHash.c | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 8bb5bde84c0..4ac8aecd2d0 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * - * $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $ + * $Id: nodeHash.c,v 1.66.2.1 2002/12/29 22:29:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ */ #include "postgres.h" +#include <limits.h> #include <math.h> #include "access/hash.h" @@ -342,7 +343,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, { int tupsize; double inner_rel_bytes; - double hash_table_bytes; + long hash_table_bytes; + double dtmp; int nbatch; int nbuckets; int totalbuckets; @@ -360,20 +362,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, inner_rel_bytes = ntuples * tupsize * FUDGE_FAC; /* - * Target hashtable size is SortMem kilobytes, but not less than - * sqrt(estimated inner rel size), so as to avoid horrible - * performance. + * Target in-memory hashtable size is SortMem kilobytes. */ - hash_table_bytes = sqrt(inner_rel_bytes); - if (hash_table_bytes < (SortMem * 1024L)) - hash_table_bytes = SortMem * 1024L; + hash_table_bytes = SortMem * 1024L; /* * Count the number of hash buckets we want for the whole relation, * for an average bucket load of NTUP_PER_BUCKET (per virtual - * bucket!). + * bucket!). It has to fit in an int, however. */ - totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET); + dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET); + if (dtmp < INT_MAX) + totalbuckets = (int) dtmp; + else + totalbuckets = INT_MAX; + if (totalbuckets <= 0) + totalbuckets = 1; /* * Count the number of buckets we think will actually fit in the @@ -407,10 +411,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, * that nbatch doesn't have to have anything to do with the ratio * totalbuckets/nbuckets; in fact, it is the number of groups we * will use for the part of the data that doesn't fall into the - * first nbuckets hash buckets. + * first nbuckets hash buckets. We try to set it to make all the + * batches the same size. But we have to keep nbatch small + * enough to avoid integer overflow in ExecHashJoinGetBatch(). */ - nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) / - hash_table_bytes); + dtmp = ceil((inner_rel_bytes - hash_table_bytes) / + hash_table_bytes); + if (dtmp < INT_MAX / totalbuckets) + nbatch = (int) dtmp; + else + nbatch = INT_MAX / totalbuckets; if (nbatch <= 0) nbatch = 1; } |