aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-07-16 15:30:15 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-07-16 15:30:15 -0400
commit9563d5b5e4c75e676d73a45546bd47b77c2bd739 (patch)
tree59b0aacc876ef9710b3f7379d4fee5915f6bfedc /src
parent278148907a971ec7fa4ffb24248103d8012155d2 (diff)
downloadpostgresql-9563d5b5e4c75e676d73a45546bd47b77c2bd739.tar.gz
postgresql-9563d5b5e4c75e676d73a45546bd47b77c2bd739.zip
Add regression test case exercising the sorting path for hash index build.
We've broken this code path at least twice in the past, so it's prudent to have a test case that covers it. To allow exercising the code path without creating a very large (and slow to run) test case, redefine the sort threshold to be bounded by maintenance_work_mem as well as the number of available buffers. While at it, fix an ancient oversight that when building a temp index, the number of available buffers is not NBuffers but NLocBuffer. Also, if assertions are enabled, apply a direct test that the sort actually does return the tuples in the expected order. Peter Geoghegan Patch: <CAM3SWZTBAo4hjbBd780+MrOKiKp_TMo1N3A0Rw9_im8gbD7fQA@mail.gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/hash/hash.c20
-rw-r--r--src/backend/access/hash/hashsort.c23
-rw-r--r--src/test/regress/expected/create_index.out7
-rw-r--r--src/test/regress/sql/create_index.sql7
4 files changed, 51 insertions, 6 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 19695ee1022..30c82e191c6 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -22,6 +22,7 @@
#include "access/relscan.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
+#include "miscadmin.h"
#include "optimizer/plancat.h"
#include "utils/index_selfuncs.h"
#include "utils/rel.h"
@@ -97,6 +98,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
double reltuples;
double allvisfrac;
uint32 num_buckets;
+ long sort_threshold;
HashBuildState buildstate;
/*
@@ -120,12 +122,24 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* then we'll thrash horribly. To prevent that scenario, we can sort the
* tuples by (expected) bucket number. However, such a sort is useless
* overhead when the index does fit in RAM. We choose to sort if the
- * initial index size exceeds NBuffers.
+ * initial index size exceeds maintenance_work_mem, or the number of
+ * buffers usable for the index, whichever is less. (Limiting by the
+ * number of buffers should reduce thrashing between PG buffers and kernel
+ * buffers, which seems useful even if no physical I/O results. Limiting
+ * by maintenance_work_mem is useful to allow easy testing of the sort
+ * code path, and may be useful to DBAs as an additional control knob.)
*
* NOTE: this test will need adjustment if a bucket is ever different from
- * one page.
+ * one page. Also, "initial index size" accounting does not include the
+ * metapage, nor the first bitmap page.
*/
- if (num_buckets >= (uint32) NBuffers)
+ sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
+ if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+ sort_threshold = Min(sort_threshold, NBuffers);
+ else
+ sort_threshold = Min(sort_threshold, NLocBuffer);
+
+ if (num_buckets >= (uint32) sort_threshold)
buildstate.spool = _h_spoolinit(heap, index, num_buckets);
else
buildstate.spool = NULL;
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c
index 51b9f3f792a..8938ab5b247 100644
--- a/src/backend/access/hash/hashsort.c
+++ b/src/backend/access/hash/hashsort.c
@@ -37,6 +37,7 @@ struct HSpool
{
Tuplesortstate *sortstate; /* state data for tuplesort.c */
Relation index;
+ uint32 hash_mask; /* bitmask for hash codes */
};
@@ -47,7 +48,6 @@ HSpool *
_h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
{
HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
- uint32 hash_mask;
hspool->index = index;
@@ -60,7 +60,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
* we could just compute num_buckets - 1. We prefer not to assume that
* here, though.
*/
- hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
+ hspool->hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
/*
* We size the sort area as maintenance_work_mem rather than work_mem to
@@ -69,7 +69,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
*/
hspool->sortstate = tuplesort_begin_index_hash(heap,
index,
- hash_mask,
+ hspool->hash_mask,
maintenance_work_mem,
false);
@@ -105,12 +105,29 @@ _h_indexbuild(HSpool *hspool)
{
IndexTuple itup;
bool should_free;
+#ifdef USE_ASSERT_CHECKING
+ uint32 hashkey = 0;
+#endif
tuplesort_performsort(hspool->sortstate);
while ((itup = tuplesort_getindextuple(hspool->sortstate,
true, &should_free)) != NULL)
{
+ /*
+ * Technically, it isn't critical that hash keys be found in sorted
+ * order, since this sorting is only used to increase locality of
+ * access as a performance optimization. It still seems like a good
+ * idea to test tuplesort.c's handling of hash index tuple sorts
+ * through an assertion, though.
+ */
+#ifdef USE_ASSERT_CHECKING
+ uint32 lasthashkey = hashkey;
+
+ hashkey = _hash_get_indextuple_hashkey(itup) & hspool->hash_mask;
+ Assert(hashkey >= lasthashkey);
+#endif
+
_hash_doinsert(hspool->index, itup);
if (should_free)
pfree(itup);
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 97cc49e6235..0be5cf2dbea 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2346,6 +2346,13 @@ CREATE UNLOGGED TABLE unlogged_hash_table (id int4);
CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops);
DROP TABLE unlogged_hash_table;
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
+-- Test hash index build tuplesorting. Force hash tuplesort using low
+-- maintenance_work_mem setting and fillfactor:
+SET maintenance_work_mem = '1MB';
+CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
+WARNING: hash indexes are not WAL-logged and their use is discouraged
+DROP INDEX hash_tuplesort_idx;
+RESET maintenance_work_mem;
--
-- Test functional index
--
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7c582ea810e..7e0bd84ff7f 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -690,6 +690,13 @@ DROP TABLE unlogged_hash_table;
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
+-- Test hash index build tuplesorting. Force hash tuplesort using low
+-- maintenance_work_mem setting and fillfactor:
+SET maintenance_work_mem = '1MB';
+CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
+DROP INDEX hash_tuplesort_idx;
+RESET maintenance_work_mem;
+
--
-- Test functional index