aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/hash/hashovfl.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/hash/hashovfl.c')
-rw-r--r--src/backend/access/hash/hashovfl.c78
1 files changed, 52 insertions, 26 deletions
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 828c5279865..4fe0301c75d 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.52 2006/03/31 23:32:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.53 2006/11/19 21:33:22 tgl Exp $
*
* NOTES
* Overflow pages look like ordinary relation pages.
@@ -20,7 +20,7 @@
#include "access/hash.h"
-static BlockNumber _hash_getovflpage(Relation rel, Buffer metabuf);
+static Buffer _hash_getovflpage(Relation rel, Buffer metabuf);
static uint32 _hash_firstfreebit(uint32 map);
@@ -99,18 +99,14 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
Buffer
_hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
{
- BlockNumber ovflblkno;
Buffer ovflbuf;
Page page;
Page ovflpage;
HashPageOpaque pageopaque;
HashPageOpaque ovflopaque;
- /* allocate an empty overflow page */
- ovflblkno = _hash_getovflpage(rel, metabuf);
-
- /* lock the overflow page */
- ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE);
+ /* allocate and lock an empty overflow page */
+ ovflbuf = _hash_getovflpage(rel, metabuf);
ovflpage = BufferGetPage(ovflbuf);
/*
@@ -150,7 +146,7 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
MarkBufferDirty(ovflbuf);
/* logically chain overflow page to previous page */
- pageopaque->hasho_nextblkno = ovflblkno;
+ pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
_hash_wrtbuf(rel, buf);
return ovflbuf;
@@ -159,16 +155,18 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
/*
* _hash_getovflpage()
*
- * Find an available overflow page and return its block number.
+ * Find an available overflow page and return it. The returned buffer
+ * is pinned and write-locked, but its contents are not initialized.
*
* The caller must hold a pin, but no lock, on the metapage buffer.
- * The buffer is returned in the same state.
+ * That buffer is left in the same state at exit.
*/
-static BlockNumber
+static Buffer
_hash_getovflpage(Relation rel, Buffer metabuf)
{
HashMetaPage metap;
Buffer mapbuf = 0;
+ Buffer newbuf;
BlockNumber blkno;
uint32 orig_firstfree;
uint32 splitnum;
@@ -243,11 +241,10 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
}
- /* No Free Page Found - have to allocate a new page */
- bit = metap->hashm_spares[splitnum];
- metap->hashm_spares[splitnum]++;
-
- /* Check if we need to allocate a new bitmap page */
+ /*
+ * No free pages --- have to extend the relation to add an overflow page.
+ * First, check to see if we have to add a new bitmap page too.
+ */
if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
{
/*
@@ -258,23 +255,40 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
* marked "in use". Subsequent pages do not exist yet, but it is
* convenient to pre-mark them as "in use" too.
*/
- _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit));
-
bit = metap->hashm_spares[splitnum];
+ _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit));
metap->hashm_spares[splitnum]++;
}
else
{
/*
- * Nothing to do here; since the page was past the last used page, we
- * know its bitmap bit was preinitialized to "in use".
+ * Nothing to do here; since the page will be past the last used page,
+ * we know its bitmap bit was preinitialized to "in use".
*/
}
/* Calculate address of the new overflow page */
+ bit = metap->hashm_spares[splitnum];
blkno = bitno_to_blkno(metap, bit);
/*
+ * We have to fetch the page with P_NEW to ensure smgr's idea of the
+ * relation length stays in sync with ours. XXX It's annoying to do this
+ * with metapage write lock held; would be better to use a lock that
+ * doesn't block incoming searches. Best way to fix it would be to stop
+ * maintaining hashm_spares[hashm_ovflpoint] and rely entirely on the
+ * smgr relation length to track where new overflow pages come from;
+ * then we could release the metapage before we do the smgrextend.
+ * FIXME later (not in beta...)
+ */
+ newbuf = _hash_getbuf(rel, P_NEW, HASH_WRITE);
+ if (BufferGetBlockNumber(newbuf) != blkno)
+ elog(ERROR, "unexpected hash relation size: %u, should be %u",
+ BufferGetBlockNumber(newbuf), blkno);
+
+ metap->hashm_spares[splitnum]++;
+
+ /*
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
* changing it if someone moved it while we were searching bitmap pages.
*/
@@ -284,7 +298,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
/* Write updated metapage and release lock, but not pin */
_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
- return blkno;
+ return newbuf;
found:
/* convert bit to bit number within page */
@@ -300,7 +314,7 @@ found:
/* convert bit to absolute bit number */
bit += (i << BMPG_SHIFT(metap));
- /* Calculate address of the new overflow page */
+ /* Calculate address of the recycled overflow page */
blkno = bitno_to_blkno(metap, bit);
/*
@@ -320,7 +334,8 @@ found:
_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
}
- return blkno;
+ /* Fetch and return the recycled page */
+ return _hash_getbuf(rel, blkno, HASH_WRITE);
}
/*
@@ -388,7 +403,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
prevblkno = ovflopaque->hasho_prevblkno;
bucket = ovflopaque->hasho_bucket;
- /* Zero the page for debugging's sake; then write and release it */
+ /*
+ * Zero the page for debugging's sake; then write and release it.
+ * (Note: if we failed to zero the page here, we'd have problems
+ * with the Assert in _hash_pageinit() when the page is reused.)
+ */
MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf));
_hash_wrtbuf(rel, ovflbuf);
@@ -488,12 +507,19 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
/*
* It is okay to write-lock the new bitmap page while holding metapage
* write lock, because no one else could be contending for the new page.
+ * Also, the metapage lock makes it safe to extend the index using P_NEW,
+ * which we want to do to ensure the smgr's idea of the relation size
+ * stays in step with ours.
*
* There is some loss of concurrency in possibly doing I/O for the new
* page while holding the metapage lock, but this path is taken so seldom
* that it's not worth worrying about.
*/
- buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+ buf = _hash_getbuf(rel, P_NEW, HASH_WRITE);
+ if (BufferGetBlockNumber(buf) != blkno)
+ elog(ERROR, "unexpected hash relation size: %u, should be %u",
+ BufferGetBlockNumber(buf), blkno);
+
pg = BufferGetPage(buf);
/* initialize the page */