aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2023-10-23 17:17:46 -0700
committerJeff Davis <jdavis@postgresql.org>2023-10-23 17:17:46 -0700
commit00d7fb5e2e39198387ae00af8dd18b787b6a4d63 (patch)
treef22d1985878091acbf82af156b0f4c5ca0ce4a73
parentbefe9451fbf5a4b49e80f17917d8c6d8b5edad26 (diff)
downloadpostgresql-00d7fb5e2e39198387ae00af8dd18b787b6a4d63.tar.gz
postgresql-00d7fb5e2e39198387ae00af8dd18b787b6a4d63.zip
Assert that buffers are marked dirty before XLogRegisterBuffer().
Enforce the rule from transam/README in XLogRegisterBuffer(), and update callers to follow the rule. Hash indexes sometimes register clean pages as a part of the locking protocol, so provide a REGBUF_NO_CHANGE flag to support that use. Discussion: https://postgr.es/m/c84114f8-c7f1-5b57-f85a-3adc31e1a904@iki.fi Reviewed-by: Heikki Linnakangas
-rw-r--r--src/backend/access/gin/ginbtree.c14
-rw-r--r--src/backend/access/gin/gindatapage.c6
-rw-r--r--src/backend/access/gin/ginentrypage.c3
-rw-r--r--src/backend/access/gin/ginfast.c5
-rw-r--r--src/backend/access/hash/hash.c11
-rw-r--r--src/backend/access/hash/hashovfl.c21
-rw-r--r--src/backend/access/transam/xloginsert.c16
-rw-r--r--src/backend/storage/buffer/bufmgr.c59
-rw-r--r--src/include/access/xloginsert.h1
-rw-r--r--src/include/storage/bufmgr.h2
10 files changed, 118 insertions, 20 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 06e97a7fbb8..fc694b40f14 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -387,24 +387,22 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
START_CRIT_SECTION();
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
- {
XLogBeginInsert();
- XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
- if (BufferIsValid(childbuf))
- XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
- }
- /* Perform the page update, and register any extra WAL data */
+ /*
+ * Perform the page update, dirty and register stack->buffer, and
+ * register any extra WAL data.
+ */
btree->execPlaceToPage(btree, stack->buffer, stack,
insertdata, updateblkno, ptp_workspace);
- MarkBufferDirty(stack->buffer);
-
/* An insert to an internal page finishes the split of the child. */
if (BufferIsValid(childbuf))
{
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
MarkBufferDirty(childbuf);
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+ XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
}
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 344e1c5e6bd..24942730852 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -721,9 +721,12 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
/* Apply changes to page */
dataPlaceToPageLeafRecompress(buf, leaf);
+ MarkBufferDirty(buf);
+
/* If needed, register WAL data built by computeLeafRecompressWALData */
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
}
@@ -1155,6 +1158,8 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
+ MarkBufferDirty(buf);
+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
@@ -1167,6 +1172,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.offset = off;
data.newitem = *pitem;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data,
sizeof(ginxlogInsertDataInternal));
}
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 5a8c0eb98d0..379496735ff 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,6 +571,8 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
+ MarkBufferDirty(buf);
+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
@@ -583,6 +585,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.isDelete = insertData->isDelete;
data.offset = off;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data,
offsetof(ginxlogInsertEntry, tuple));
XLogRegisterBufData(0, (char *) insertData->entry,
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index eb6c5548318..c8fe7c78a7a 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -397,6 +397,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
}
Assert((ptr - collectordata) <= collector->sumsize);
+
+ MarkBufferDirty(buffer);
+
if (needWal)
{
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
@@ -404,8 +407,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
}
metadata->tailFreeSize = PageGetExactFreeSpace(page);
-
- MarkBufferDirty(buffer);
}
/*
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index fc5d97f606e..7a025f33cfe 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -824,11 +824,16 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
/*
- * bucket buffer needs to be registered to ensure that we can
- * acquire a cleanup lock on it during replay.
+ * bucket buffer was not changed, but still needs to be
+ * registered to ensure that we can acquire a cleanup lock on
+ * it during replay.
*/
if (!xlrec.is_primary_bucket_page)
- XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+ {
+ uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
+
+ XLogRegisterBuffer(0, bucket_buf, flags);
+ }
XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
XLogRegisterBufData(1, (char *) deletable,
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 39bb2cb9f61..9d1ff20b922 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -658,11 +658,15 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
/*
- * bucket buffer needs to be registered to ensure that we can acquire
- * a cleanup lock on it during replay.
+ * bucket buffer was not changed, but still needs to be registered to
+ * ensure that we can acquire a cleanup lock on it during replay.
*/
if (!xlrec.is_prim_bucket_same_wrt)
- XLogRegisterBuffer(0, bucketbuf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+ {
+ uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
+
+ XLogRegisterBuffer(0, bucketbuf, flags);
+ }
XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
if (xlrec.ntups > 0)
@@ -960,11 +964,16 @@ readpage:
XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents);
/*
- * bucket buffer needs to be registered to ensure that
- * we can acquire a cleanup lock on it during replay.
+ * bucket buffer was not changed, but still needs to
+ * be registered to ensure that we can acquire a
+ * cleanup lock on it during replay.
*/
if (!xlrec.is_prim_bucket_same_wrt)
- XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+ {
+ int flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
+
+ XLogRegisterBuffer(0, bucket_buf, flags);
+ }
XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
XLogRegisterBufData(1, (char *) itup_offsets,
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 588626424e6..e4aaa551a04 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -248,6 +248,20 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
Assert(begininsert_called);
+ /*
+ * Ordinarily, buffer should be exclusive-locked and marked dirty before
+ * we get here, otherwise we could end up violating one of the rules in
+ * access/transam/README.
+ *
+ * Some callers intentionally register a clean page and never update that
+ * page's LSN; in that case they can pass the flag REGBUF_NO_CHANGE to
+ * bypass these checks.
+ */
+#ifdef USE_ASSERT_CHECKING
+ if (!(flags & REGBUF_NO_CHANGE))
+ Assert(BufferIsExclusiveLocked(buffer) && BufferIsDirty(buffer));
+#endif
+
if (block_id >= max_registered_block_id)
{
if (block_id >= max_registered_buffers)
@@ -1313,8 +1327,8 @@ log_newpage_range(Relation rel, ForkNumber forknum,
START_CRIT_SECTION();
for (i = 0; i < nbufs; i++)
{
- XLogRegisterBuffer(i, bufpack[i], flags);
MarkBufferDirty(bufpack[i]);
+ XLogRegisterBuffer(i, bufpack[i], flags);
}
recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 8b96759b84f..21a29f9081a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2099,6 +2099,65 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
}
/*
+ * BufferIsExclusiveLocked
+ *
+ * Checks if buffer is exclusive-locked.
+ *
+ * Buffer must be pinned.
+ */
+bool
+BufferIsExclusiveLocked(Buffer buffer)
+{
+ BufferDesc *bufHdr;
+
+ if (BufferIsLocal(buffer))
+ {
+ int bufid = -buffer - 1;
+
+ bufHdr = GetLocalBufferDescriptor(bufid);
+ }
+ else
+ {
+ bufHdr = GetBufferDescriptor(buffer - 1);
+ }
+
+ Assert(BufferIsPinned(buffer));
+ return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
+ LW_EXCLUSIVE);
+}
+
+/*
+ * BufferIsDirty
+ *
+ * Checks if buffer is already dirty.
+ *
+ * Buffer must be pinned and exclusive-locked. (Without an exclusive lock,
+ * the result may be stale before it's returned.)
+ */
+bool
+BufferIsDirty(Buffer buffer)
+{
+ BufferDesc *bufHdr;
+
+ if (BufferIsLocal(buffer))
+ {
+ int bufid = -buffer - 1;
+
+ bufHdr = GetLocalBufferDescriptor(bufid);
+ }
+ else
+ {
+ bufHdr = GetBufferDescriptor(buffer - 1);
+ }
+
+ Assert(BufferIsPinned(buffer));
+ Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
+ LW_EXCLUSIVE));
+
+ return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
+}
+
+/*
* MarkBufferDirty
*
* Marks buffer contents as dirty (actual write happens later).
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index 31785dc578f..cace8674976 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -37,6 +37,7 @@
* will be skipped) */
#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
* is taken */
+#define REGBUF_NO_CHANGE 0x20 /* intentionally register clean buffer */
/* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index d89021f9187..9241f868611 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -179,6 +179,8 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
bool permanent);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
+extern bool BufferIsExclusiveLocked(Buffer buffer);
+extern bool BufferIsDirty(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
extern void IncrBufferRefCount(Buffer buffer);
extern void CheckBufferIsPinnedOnce(Buffer buffer);