aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtpage.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtpage.c')
-rw-r--r--src/backend/access/nbtree/nbtpage.c145
1 files changed, 126 insertions, 19 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 75628e0eb98..70bac0052fc 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -32,6 +32,7 @@
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
+#include "utils/memdebug.h"
#include "utils/snapmgr.h"
static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
@@ -198,8 +199,8 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
}
/* trade in our read lock for a write lock */
- LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
- LockBuffer(metabuf, BT_WRITE);
+ _bt_unlockbuf(rel, metabuf);
+ _bt_lockbuf(rel, metabuf, BT_WRITE);
START_CRIT_SECTION();
@@ -340,8 +341,8 @@ _bt_getroot(Relation rel, int access)
}
/* trade in our read lock for a write lock */
- LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
- LockBuffer(metabuf, BT_WRITE);
+ _bt_unlockbuf(rel, metabuf);
+ _bt_lockbuf(rel, metabuf, BT_WRITE);
/*
* Race condition: if someone else initialized the metadata between
@@ -434,8 +435,8 @@ _bt_getroot(Relation rel, int access)
* else accessing the new root page while it's unlocked, since no one
* else knows where it is yet.
*/
- LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
- LockBuffer(rootbuf, BT_READ);
+ _bt_unlockbuf(rel, rootbuf);
+ _bt_lockbuf(rel, rootbuf, BT_READ);
/* okay, metadata is correct, release lock on it without caching */
_bt_relbuf(rel, metabuf);
@@ -783,10 +784,20 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedX
* blkno == P_NEW means to get an unallocated index page. The page
* will be initialized before returning it.
*
+ * The general rule in nbtree is that it's never okay to access a
+ * page without holding both a buffer pin and a buffer lock on
+ * the page's buffer.
+ *
* When this routine returns, the appropriate lock is set on the
* requested buffer and its reference count has been incremented
* (ie, the buffer is "locked and pinned"). Also, we apply
- * _bt_checkpage to sanity-check the page (except in P_NEW case).
+ * _bt_checkpage to sanity-check the page (except in P_NEW case),
+ * and perform Valgrind client requests that help Valgrind detect
+ * unsafe page accesses.
+ *
+ * Note: raw LockBuffer() calls are disallowed in nbtree; all
+ * buffer lock requests need to go through wrapper functions such
+ * as _bt_lockbuf().
*/
Buffer
_bt_getbuf(Relation rel, BlockNumber blkno, int access)
@@ -797,7 +808,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
{
/* Read an existing block of the relation */
buf = ReadBuffer(rel, blkno);
- LockBuffer(buf, access);
+ _bt_lockbuf(rel, buf, access);
_bt_checkpage(rel, buf);
}
else
@@ -837,7 +848,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
if (blkno == InvalidBlockNumber)
break;
buf = ReadBuffer(rel, blkno);
- if (ConditionalLockBuffer(buf))
+ if (_bt_conditionallockbuf(rel, buf))
{
page = BufferGetPage(buf);
if (_bt_page_recyclable(page))
@@ -890,7 +901,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
buf = ReadBuffer(rel, P_NEW);
/* Acquire buffer lock on new page */
- LockBuffer(buf, BT_WRITE);
+ _bt_lockbuf(rel, buf, BT_WRITE);
/*
* Release the file-extension lock; it's now OK for someone else to
@@ -931,9 +942,10 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Assert(blkno != P_NEW);
if (BufferIsValid(obuf))
- LockBuffer(obuf, BUFFER_LOCK_UNLOCK);
+ _bt_unlockbuf(rel, obuf);
buf = ReleaseAndReadBuffer(obuf, rel, blkno);
- LockBuffer(buf, access);
+ _bt_lockbuf(rel, buf, access);
+
_bt_checkpage(rel, buf);
return buf;
}
@@ -946,7 +958,102 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
void
_bt_relbuf(Relation rel, Buffer buf)
{
- UnlockReleaseBuffer(buf);
+ _bt_unlockbuf(rel, buf);
+ ReleaseBuffer(buf);
+}
+
+/*
+ * _bt_lockbuf() -- lock a pinned buffer.
+ *
+ * Lock is acquired without acquiring another pin. This is like a raw
+ * LockBuffer() call, but performs extra steps needed by Valgrind.
+ *
+ * Note: Caller may need to call _bt_checkpage() with buf when pin on buf
+ * wasn't originally acquired in _bt_getbuf() or _bt_relandgetbuf().
+ */
+void
+_bt_lockbuf(Relation rel, Buffer buf, int access)
+{
+ /* LockBuffer() asserts that pin is held by this backend */
+ LockBuffer(buf, access);
+
+ /*
+ * It doesn't matter that _bt_unlockbuf() won't get called in the
+ * event of an nbtree error (e.g. a unique violation error). That
+ * won't cause Valgrind false positives.
+ *
+ * The nbtree client requests are superimposed on top of the
+ * bufmgr.c buffer pin client requests. In the event of an nbtree
+ * error the buffer will certainly get marked as defined when the
+ * backend once again acquires its first pin on the buffer. (Of
+ * course, if the backend never touches the buffer again then it
+ * doesn't matter that it remains non-accessible to Valgrind.)
+ *
+ * Note: When an IndexTuple C pointer gets computed using an
+ * ItemId read from a page while a lock was held, the C pointer
+ * becomes unsafe to dereference forever as soon as the lock is
+ * released. Valgrind can only detect cases where the pointer
+ * gets dereferenced with no _current_ lock/pin held, though.
+ */
+ if (!RelationUsesLocalBuffers(rel))
+ VALGRIND_MAKE_MEM_DEFINED(BufferGetPage(buf), BLCKSZ);
+}
+
+/*
+ * _bt_unlockbuf() -- unlock a pinned buffer.
+ */
+void
+_bt_unlockbuf(Relation rel, Buffer buf)
+{
+ /*
+ * Buffer is pinned and locked, which means that it is expected to be
+ * defined and addressable. Check that proactively.
+ */
+ VALGRIND_CHECK_MEM_IS_DEFINED(BufferGetPage(buf), BLCKSZ);
+
+ /* LockBuffer() asserts that pin is held by this backend */
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+
+ if (!RelationUsesLocalBuffers(rel))
+ VALGRIND_MAKE_MEM_NOACCESS(BufferGetPage(buf), BLCKSZ);
+}
+
+/*
+ * _bt_conditionallockbuf() -- conditionally BT_WRITE lock pinned
+ * buffer.
+ *
+ * Note: Caller may need to call _bt_checkpage() with buf when pin on buf
+ * wasn't originally acquired in _bt_getbuf() or _bt_relandgetbuf().
+ */
+bool
+_bt_conditionallockbuf(Relation rel, Buffer buf)
+{
+ /* ConditionalLockBuffer() asserts that pin is held by this backend */
+ if (!ConditionalLockBuffer(buf))
+ return false;
+
+ if (!RelationUsesLocalBuffers(rel))
+ VALGRIND_MAKE_MEM_DEFINED(BufferGetPage(buf), BLCKSZ);
+
+ return true;
+}
+
+/*
+ * _bt_upgradelockbufcleanup() -- upgrade lock to super-exclusive/cleanup
+ * lock.
+ */
+void
+_bt_upgradelockbufcleanup(Relation rel, Buffer buf)
+{
+ /*
+ * Buffer is pinned and locked, which means that it is expected to be
+ * defined and addressable. Check that proactively.
+ */
+ VALGRIND_CHECK_MEM_IS_DEFINED(BufferGetPage(buf), BLCKSZ);
+
+ /* LockBuffer() asserts that pin is held by this backend */
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ LockBufferForCleanup(buf);
}
/*
@@ -1580,7 +1687,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
* To avoid deadlocks, we'd better drop the leaf page lock
* before going further.
*/
- LockBuffer(leafbuf, BUFFER_LOCK_UNLOCK);
+ _bt_unlockbuf(rel, leafbuf);
/*
* Check that the left sibling of leafbuf (if any) is not
@@ -1616,7 +1723,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
* (Page deletion can cope with the stack being to the left of
* leafbuf, but not to the right of leafbuf.)
*/
- LockBuffer(leafbuf, BT_WRITE);
+ _bt_lockbuf(rel, leafbuf, BT_WRITE);
continue;
}
@@ -1970,7 +2077,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
leafleftsib = opaque->btpo_prev;
leafrightsib = opaque->btpo_next;
- LockBuffer(leafbuf, BUFFER_LOCK_UNLOCK);
+ _bt_unlockbuf(rel, leafbuf);
/*
* Check here, as calling loops will have locks held, preventing
@@ -2005,7 +2112,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
* To avoid deadlocks, we'd better drop the target page lock before
* going further.
*/
- LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ _bt_unlockbuf(rel, buf);
}
/*
@@ -2022,7 +2129,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
* table.)
*/
if (target != leafblkno)
- LockBuffer(leafbuf, BT_WRITE);
+ _bt_lockbuf(rel, leafbuf, BT_WRITE);
if (leftsib != P_NONE)
{
lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
@@ -2072,7 +2179,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
* rather than a superexclusive lock, since no scan will stop on an empty
* page.
*/
- LockBuffer(buf, BT_WRITE);
+ _bt_lockbuf(rel, buf, BT_WRITE);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);