aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/hash/hashfunc.c27
-rw-r--r--src/backend/executor/nodeHash.c13
-rw-r--r--src/include/access/hash.h3
3 files changed, 36 insertions, 7 deletions
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 1e2d779a14c..1f9813277a3 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.48 2006/10/04 00:29:48 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.48.2.1 2007/06/01 15:58:01 tgl Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
@@ -267,6 +267,31 @@ hash_any(register const unsigned char *k, register int keylen)
/* case 0: nothing left to add */
}
mix(a, b, c);
+
+ /* report the result */
+ return UInt32GetDatum(c);
+}
+
+/*
+ * hash_uint32() -- hash a 32-bit value
+ *
+ * This has the same result (at least on little-endian machines) as
+ * hash_any(&k, sizeof(uint32))
+ * but is faster and doesn't force the caller to store k into memory.
+ */
+Datum
+hash_uint32(uint32 k)
+{
+ register uint32 a,
+ b,
+ c;
+
+ a = 0x9e3779b9 + k;
+ b = 0x9e3779b9;
+ c = 3923095 + (uint32) sizeof(uint32);
+
+ mix(a, b, c);
+
/* report the result */
return UInt32GetDatum(c);
}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 171cfb58f28..b36832b8384 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.107 2006/07/14 04:44:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.107.2.1 2007/06/01 15:58:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -24,6 +24,7 @@
#include <math.h>
#include <limits.h>
+#include "access/hash.h"
#include "executor/execdebug.h"
#include "executor/hashjoin.h"
#include "executor/instrument.h"
@@ -719,9 +720,11 @@ ExecHashGetHashValue(HashJoinTable hashtable,
* chains), and must only cause the batch number to remain the same or
* increase. Our algorithm is
* bucketno = hashvalue MOD nbuckets
- * batchno = (hashvalue DIV nbuckets) MOD nbatch
- * where nbuckets should preferably be prime so that all bits of the
- * hash value can affect both bucketno and batchno.
+ * batchno = hash_uint32(hashvalue) MOD nbatch
+ * which gives reasonably independent bucket and batch numbers in the face
+ * of some rather poorly-implemented hash functions in hashfunc.c. (This
+ * will change in PG 8.3.)
+ *
* nbuckets doesn't change over the course of the join.
*
* nbatch is always a power of 2; we increase it only by doubling it. This
@@ -740,7 +743,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
{
*bucketno = hashvalue % nbuckets;
/* since nbatch is a power of 2, can do MOD by masking */
- *batchno = (hashvalue / nbuckets) & (nbatch - 1);
+ *batchno = hash_uint32(hashvalue) & (nbatch - 1);
}
else
{
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 016e51d09f9..4b8ce788928 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.73.2.1 2007/04/19 20:24:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.73.2.2 2007/06/01 15:58:02 tgl Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
@@ -262,6 +262,7 @@ extern Datum hashname(PG_FUNCTION_ARGS);
extern Datum hashtext(PG_FUNCTION_ARGS);
extern Datum hashvarlena(PG_FUNCTION_ARGS);
extern Datum hash_any(register const unsigned char *k, register int keylen);
+extern Datum hash_uint32(uint32 k);
/* private routines */