diff options
Diffstat (limited to 'src/backend/access/nbtree/nbtpage.c')
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 145 |
1 files changed, 126 insertions, 19 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 75628e0eb98..70bac0052fc 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -32,6 +32,7 @@ #include "storage/indexfsm.h" #include "storage/lmgr.h" #include "storage/predicate.h" +#include "utils/memdebug.h" #include "utils/snapmgr.h" static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf); @@ -198,8 +199,8 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, } /* trade in our read lock for a write lock */ - LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); - LockBuffer(metabuf, BT_WRITE); + _bt_unlockbuf(rel, metabuf); + _bt_lockbuf(rel, metabuf, BT_WRITE); START_CRIT_SECTION(); @@ -340,8 +341,8 @@ _bt_getroot(Relation rel, int access) } /* trade in our read lock for a write lock */ - LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); - LockBuffer(metabuf, BT_WRITE); + _bt_unlockbuf(rel, metabuf); + _bt_lockbuf(rel, metabuf, BT_WRITE); /* * Race condition: if someone else initialized the metadata between @@ -434,8 +435,8 @@ _bt_getroot(Relation rel, int access) * else accessing the new root page while it's unlocked, since no one * else knows where it is yet. */ - LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK); - LockBuffer(rootbuf, BT_READ); + _bt_unlockbuf(rel, rootbuf); + _bt_lockbuf(rel, rootbuf, BT_READ); /* okay, metadata is correct, release lock on it without caching */ _bt_relbuf(rel, metabuf); @@ -783,10 +784,20 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedX * blkno == P_NEW means to get an unallocated index page. The page * will be initialized before returning it. * + * The general rule in nbtree is that it's never okay to access a + * page without holding both a buffer pin and a buffer lock on + * the page's buffer. + * * When this routine returns, the appropriate lock is set on the * requested buffer and its reference count has been incremented * (ie, the buffer is "locked and pinned"). Also, we apply - * _bt_checkpage to sanity-check the page (except in P_NEW case). + * _bt_checkpage to sanity-check the page (except in P_NEW case), + * and perform Valgrind client requests that help Valgrind detect + * unsafe page accesses. + * + * Note: raw LockBuffer() calls are disallowed in nbtree; all + * buffer lock requests need to go through wrapper functions such + * as _bt_lockbuf(). */ Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access) @@ -797,7 +808,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) { /* Read an existing block of the relation */ buf = ReadBuffer(rel, blkno); - LockBuffer(buf, access); + _bt_lockbuf(rel, buf, access); _bt_checkpage(rel, buf); } else @@ -837,7 +848,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) if (blkno == InvalidBlockNumber) break; buf = ReadBuffer(rel, blkno); - if (ConditionalLockBuffer(buf)) + if (_bt_conditionallockbuf(rel, buf)) { page = BufferGetPage(buf); if (_bt_page_recyclable(page)) @@ -890,7 +901,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) buf = ReadBuffer(rel, P_NEW); /* Acquire buffer lock on new page */ - LockBuffer(buf, BT_WRITE); + _bt_lockbuf(rel, buf, BT_WRITE); /* * Release the file-extension lock; it's now OK for someone else to @@ -931,9 +942,10 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access) Assert(blkno != P_NEW); if (BufferIsValid(obuf)) - LockBuffer(obuf, BUFFER_LOCK_UNLOCK); + _bt_unlockbuf(rel, obuf); buf = ReleaseAndReadBuffer(obuf, rel, blkno); - LockBuffer(buf, access); + _bt_lockbuf(rel, buf, access); + _bt_checkpage(rel, buf); return buf; } @@ -946,7 +958,102 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access) void _bt_relbuf(Relation rel, Buffer buf) { - UnlockReleaseBuffer(buf); + _bt_unlockbuf(rel, buf); + ReleaseBuffer(buf); +} + +/* + * _bt_lockbuf() -- lock a pinned buffer. + * + * Lock is acquired without acquiring another pin. This is like a raw + * LockBuffer() call, but performs extra steps needed by Valgrind. + * + * Note: Caller may need to call _bt_checkpage() with buf when pin on buf + * wasn't originally acquired in _bt_getbuf() or _bt_relandgetbuf(). + */ +void +_bt_lockbuf(Relation rel, Buffer buf, int access) +{ + /* LockBuffer() asserts that pin is held by this backend */ + LockBuffer(buf, access); + + /* + * It doesn't matter that _bt_unlockbuf() won't get called in the + * event of an nbtree error (e.g. a unique violation error). That + * won't cause Valgrind false positives. + * + * The nbtree client requests are superimposed on top of the + * bufmgr.c buffer pin client requests. In the event of an nbtree + * error the buffer will certainly get marked as defined when the + * backend once again acquires its first pin on the buffer. (Of + * course, if the backend never touches the buffer again then it + * doesn't matter that it remains non-accessible to Valgrind.) + * + * Note: When an IndexTuple C pointer gets computed using an + * ItemId read from a page while a lock was held, the C pointer + * becomes unsafe to dereference forever as soon as the lock is + * released. Valgrind can only detect cases where the pointer + * gets dereferenced with no _current_ lock/pin held, though. + */ + if (!RelationUsesLocalBuffers(rel)) + VALGRIND_MAKE_MEM_DEFINED(BufferGetPage(buf), BLCKSZ); +} + +/* + * _bt_unlockbuf() -- unlock a pinned buffer. + */ +void +_bt_unlockbuf(Relation rel, Buffer buf) +{ + /* + * Buffer is pinned and locked, which means that it is expected to be + * defined and addressable. Check that proactively. + */ + VALGRIND_CHECK_MEM_IS_DEFINED(BufferGetPage(buf), BLCKSZ); + + /* LockBuffer() asserts that pin is held by this backend */ + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + + if (!RelationUsesLocalBuffers(rel)) + VALGRIND_MAKE_MEM_NOACCESS(BufferGetPage(buf), BLCKSZ); +} + +/* + * _bt_conditionallockbuf() -- conditionally BT_WRITE lock pinned + * buffer. + * + * Note: Caller may need to call _bt_checkpage() with buf when pin on buf + * wasn't originally acquired in _bt_getbuf() or _bt_relandgetbuf(). + */ +bool +_bt_conditionallockbuf(Relation rel, Buffer buf) +{ + /* ConditionalLockBuffer() asserts that pin is held by this backend */ + if (!ConditionalLockBuffer(buf)) + return false; + + if (!RelationUsesLocalBuffers(rel)) + VALGRIND_MAKE_MEM_DEFINED(BufferGetPage(buf), BLCKSZ); + + return true; +} + +/* + * _bt_upgradelockbufcleanup() -- upgrade lock to super-exclusive/cleanup + * lock. + */ +void +_bt_upgradelockbufcleanup(Relation rel, Buffer buf) +{ + /* + * Buffer is pinned and locked, which means that it is expected to be + * defined and addressable. Check that proactively. + */ + VALGRIND_CHECK_MEM_IS_DEFINED(BufferGetPage(buf), BLCKSZ); + + /* LockBuffer() asserts that pin is held by this backend */ + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + LockBufferForCleanup(buf); } /* @@ -1580,7 +1687,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) * To avoid deadlocks, we'd better drop the leaf page lock * before going further. */ - LockBuffer(leafbuf, BUFFER_LOCK_UNLOCK); + _bt_unlockbuf(rel, leafbuf); /* * Check that the left sibling of leafbuf (if any) is not @@ -1616,7 +1723,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) * (Page deletion can cope with the stack being to the left of * leafbuf, but not to the right of leafbuf.) */ - LockBuffer(leafbuf, BT_WRITE); + _bt_lockbuf(rel, leafbuf, BT_WRITE); continue; } @@ -1970,7 +2077,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, leafleftsib = opaque->btpo_prev; leafrightsib = opaque->btpo_next; - LockBuffer(leafbuf, BUFFER_LOCK_UNLOCK); + _bt_unlockbuf(rel, leafbuf); /* * Check here, as calling loops will have locks held, preventing @@ -2005,7 +2112,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, * To avoid deadlocks, we'd better drop the target page lock before * going further. */ - LockBuffer(buf, BUFFER_LOCK_UNLOCK); + _bt_unlockbuf(rel, buf); } /* @@ -2022,7 +2129,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, * table.) */ if (target != leafblkno) - LockBuffer(leafbuf, BT_WRITE); + _bt_lockbuf(rel, leafbuf, BT_WRITE); if (leftsib != P_NONE) { lbuf = _bt_getbuf(rel, leftsib, BT_WRITE); @@ -2072,7 +2179,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, * rather than a superexclusive lock, since no scan will stop on an empty * page. */ - LockBuffer(buf, BT_WRITE); + _bt_lockbuf(rel, buf, BT_WRITE); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); |