diff options
Diffstat (limited to 'src/backend/access/hash/hashovfl.c')
-rw-r--r-- | src/backend/access/hash/hashovfl.c | 78 |
1 files changed, 52 insertions, 26 deletions
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 828c5279865..4fe0301c75d 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.52 2006/03/31 23:32:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.53 2006/11/19 21:33:22 tgl Exp $ * * NOTES * Overflow pages look like ordinary relation pages. @@ -20,7 +20,7 @@ #include "access/hash.h" -static BlockNumber _hash_getovflpage(Relation rel, Buffer metabuf); +static Buffer _hash_getovflpage(Relation rel, Buffer metabuf); static uint32 _hash_firstfreebit(uint32 map); @@ -99,18 +99,14 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno) Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf) { - BlockNumber ovflblkno; Buffer ovflbuf; Page page; Page ovflpage; HashPageOpaque pageopaque; HashPageOpaque ovflopaque; - /* allocate an empty overflow page */ - ovflblkno = _hash_getovflpage(rel, metabuf); - - /* lock the overflow page */ - ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE); + /* allocate and lock an empty overflow page */ + ovflbuf = _hash_getovflpage(rel, metabuf); ovflpage = BufferGetPage(ovflbuf); /* @@ -150,7 +146,7 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf) MarkBufferDirty(ovflbuf); /* logically chain overflow page to previous page */ - pageopaque->hasho_nextblkno = ovflblkno; + pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf); _hash_wrtbuf(rel, buf); return ovflbuf; @@ -159,16 +155,18 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf) /* * _hash_getovflpage() * - * Find an available overflow page and return its block number. + * Find an available overflow page and return it. The returned buffer + * is pinned and write-locked, but its contents are not initialized. * * The caller must hold a pin, but no lock, on the metapage buffer. - * The buffer is returned in the same state. + * That buffer is left in the same state at exit. */ -static BlockNumber +static Buffer _hash_getovflpage(Relation rel, Buffer metabuf) { HashMetaPage metap; Buffer mapbuf = 0; + Buffer newbuf; BlockNumber blkno; uint32 orig_firstfree; uint32 splitnum; @@ -243,11 +241,10 @@ _hash_getovflpage(Relation rel, Buffer metabuf) _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE); } - /* No Free Page Found - have to allocate a new page */ - bit = metap->hashm_spares[splitnum]; - metap->hashm_spares[splitnum]++; - - /* Check if we need to allocate a new bitmap page */ + /* + * No free pages --- have to extend the relation to add an overflow page. + * First, check to see if we have to add a new bitmap page too. + */ if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1)) { /* @@ -258,23 +255,40 @@ _hash_getovflpage(Relation rel, Buffer metabuf) * marked "in use". Subsequent pages do not exist yet, but it is * convenient to pre-mark them as "in use" too. */ - _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit)); - bit = metap->hashm_spares[splitnum]; + _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit)); metap->hashm_spares[splitnum]++; } else { /* - * Nothing to do here; since the page was past the last used page, we - * know its bitmap bit was preinitialized to "in use". + * Nothing to do here; since the page will be past the last used page, + * we know its bitmap bit was preinitialized to "in use". */ } /* Calculate address of the new overflow page */ + bit = metap->hashm_spares[splitnum]; blkno = bitno_to_blkno(metap, bit); /* + * We have to fetch the page with P_NEW to ensure smgr's idea of the + * relation length stays in sync with ours. XXX It's annoying to do this + * with metapage write lock held; would be better to use a lock that + * doesn't block incoming searches. Best way to fix it would be to stop + * maintaining hashm_spares[hashm_ovflpoint] and rely entirely on the + * smgr relation length to track where new overflow pages come from; + * then we could release the metapage before we do the smgrextend. + * FIXME later (not in beta...) + */ + newbuf = _hash_getbuf(rel, P_NEW, HASH_WRITE); + if (BufferGetBlockNumber(newbuf) != blkno) + elog(ERROR, "unexpected hash relation size: %u, should be %u", + BufferGetBlockNumber(newbuf), blkno); + + metap->hashm_spares[splitnum]++; + + /* * Adjust hashm_firstfree to avoid redundant searches. But don't risk * changing it if someone moved it while we were searching bitmap pages. */ @@ -284,7 +298,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf) /* Write updated metapage and release lock, but not pin */ _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK); - return blkno; + return newbuf; found: /* convert bit to bit number within page */ @@ -300,7 +314,7 @@ found: /* convert bit to absolute bit number */ bit += (i << BMPG_SHIFT(metap)); - /* Calculate address of the new overflow page */ + /* Calculate address of the recycled overflow page */ blkno = bitno_to_blkno(metap, bit); /* @@ -320,7 +334,8 @@ found: _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK); } - return blkno; + /* Fetch and return the recycled page */ + return _hash_getbuf(rel, blkno, HASH_WRITE); } /* @@ -388,7 +403,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) prevblkno = ovflopaque->hasho_prevblkno; bucket = ovflopaque->hasho_bucket; - /* Zero the page for debugging's sake; then write and release it */ + /* + * Zero the page for debugging's sake; then write and release it. + * (Note: if we failed to zero the page here, we'd have problems + * with the Assert in _hash_pageinit() when the page is reused.) + */ MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf)); _hash_wrtbuf(rel, ovflbuf); @@ -488,12 +507,19 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno) /* * It is okay to write-lock the new bitmap page while holding metapage * write lock, because no one else could be contending for the new page. + * Also, the metapage lock makes it safe to extend the index using P_NEW, + * which we want to do to ensure the smgr's idea of the relation size + * stays in step with ours. * * There is some loss of concurrency in possibly doing I/O for the new * page while holding the metapage lock, but this path is taken so seldom * that it's not worth worrying about. */ - buf = _hash_getbuf(rel, blkno, HASH_WRITE); + buf = _hash_getbuf(rel, P_NEW, HASH_WRITE); + if (BufferGetBlockNumber(buf) != blkno) + elog(ERROR, "unexpected hash relation size: %u, should be %u", + BufferGetBlockNumber(buf), blkno); + pg = BufferGetPage(buf); /* initialize the page */ |