Change hash indexes to store only the hash code rather than the whole indexed

value. This means that hash index lookups are always lossy and have to be rechecked when the heap is visited; however, the gain in index compactness outweighs this when the indexed values are wide. Also, we only need to perform datatype comparisons when the hash codes match exactly, rather than for every entry in the hash bucket; so it could also win for datatypes that have expensive comparison functions. A small additional win is gained by keeping hash index pages sorted by hash code and using binary search to reduce the number of index tuples we have to look at. Xiao Meng This commit also incorporates Zdenek Kotala's patch to isolate hash metapages and hash bitmaps a bit better from the page header datastructures.
author: Tom Lane <tgl@sss.pgh.pa.us> 2008-09-15 18:43:41 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2008-09-15 18:43:41 +0000
commit: 4adc2f72a4ccd6e55e594aca837f09130a6af62b (patch)
tree: 6da4349e66c02ce2d76fe9600ff7ac8aeee741cb /src/backend/utils
parent: 440b3384b0741199b4f56a8aac773ecd16aba137 (diff)
download: postgresql-4adc2f72a4ccd6e55e594aca837f09130a6af62b.tar.gz
postgresql-4adc2f72a4ccd6e55e594aca837f09130a6af62b.zip
1 files changed, 7 insertions, 20 deletions
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 775840da185..29a076e1384 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -91,7 +91,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.86 2008/08/01 13:16:09 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.87 2008/09/15 18:43:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -101,7 +101,6 @@
 #include <limits.h>
 
 #include "access/genam.h"
-#include "access/hash.h"
 #include "access/nbtree.h"
 #include "catalog/pg_amop.h"
 #include "catalog/pg_operator.h"
@@ -353,7 +352,6 @@ struct Tuplesortstate
 	bool		enforceUnique;	/* complain if we find duplicate tuples */
 
 	/* These are specific to the index_hash subcase: */
-	FmgrInfo   *hash_proc;		/* call info for the hash function */
 	uint32		hash_mask;		/* mask for sortable part of hash code */
 
 	/*
@@ -689,13 +687,6 @@ tuplesort_begin_index_hash(Relation indexRel,
 
 	state->indexRel = indexRel;
 
-	/*
-	 * We look up the index column's hash function just once, to avoid
-	 * chewing lots of cycles in repeated index_getprocinfo calls.  This
-	 * assumes that our caller holds the index relation open throughout the
-	 * sort, else the pointer obtained here might cease to be valid.
-	 */
-	state->hash_proc = index_getprocinfo(indexRel, 1, HASHPROC);
 	state->hash_mask = hash_mask;
 
 	MemoryContextSwitchTo(oldcontext);
@@ -2821,11 +2812,6 @@ static int
 comparetup_index_hash(const SortTuple *a, const SortTuple *b,
 					  Tuplesortstate *state)
 {
-	/*
-	 * It's slightly annoying to redo the hash function each time, although
-	 * most hash functions ought to be cheap.  Is it worth having a variant
-	 * tuple storage format so we can store the hash code?
-	 */
 	uint32		hash1;
 	uint32		hash2;
 	IndexTuple	tuple1;
@@ -2834,13 +2820,14 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b,
 	/* Allow interrupting long sorts */
 	CHECK_FOR_INTERRUPTS();
 
-	/* Compute hash codes and mask off bits we don't want to sort by */
+	/*
+	 * Fetch hash keys and mask off bits we don't want to sort by.
+	 * We know that the first column of the index tuple is the hash key.
+	 */
 	Assert(!a->isnull1);
-	hash1 = DatumGetUInt32(FunctionCall1(state->hash_proc, a->datum1))
-		& state->hash_mask;
+	hash1 = DatumGetUInt32(a->datum1) & state->hash_mask;
 	Assert(!b->isnull1);
-	hash2 = DatumGetUInt32(FunctionCall1(state->hash_proc, b->datum1))
-		& state->hash_mask;
+	hash2 = DatumGetUInt32(b->datum1) & state->hash_mask;
 
 	if (hash1 > hash2)
 		return 1;
author	Tom Lane <tgl@sss.pgh.pa.us>	2008-09-15 18:43:41 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2008-09-15 18:43:41 +0000
commit	4adc2f72a4ccd6e55e594aca837f09130a6af62b (patch)
tree	6da4349e66c02ce2d76fe9600ff7ac8aeee741cb /src/backend/utils
parent	440b3384b0741199b4f56a8aac773ecd16aba137 (diff)
download	postgresql-4adc2f72a4ccd6e55e594aca837f09130a6af62b.tar.gz postgresql-4adc2f72a4ccd6e55e594aca837f09130a6af62b.zip