diff options
author | Jeff Davis <jdavis@postgresql.org> | 2023-10-23 17:17:46 -0700 |
---|---|---|
committer | Jeff Davis <jdavis@postgresql.org> | 2023-10-23 17:17:46 -0700 |
commit | 00d7fb5e2e39198387ae00af8dd18b787b6a4d63 (patch) | |
tree | f22d1985878091acbf82af156b0f4c5ca0ce4a73 | |
parent | befe9451fbf5a4b49e80f17917d8c6d8b5edad26 (diff) | |
download | postgresql-00d7fb5e2e39198387ae00af8dd18b787b6a4d63.tar.gz postgresql-00d7fb5e2e39198387ae00af8dd18b787b6a4d63.zip |
Assert that buffers are marked dirty before XLogRegisterBuffer().
Enforce the rule from transam/README in XLogRegisterBuffer(), and
update callers to follow the rule.
Hash indexes sometimes register clean pages as a part of the locking
protocol, so provide a REGBUF_NO_CHANGE flag to support that use.
Discussion: https://postgr.es/m/c84114f8-c7f1-5b57-f85a-3adc31e1a904@iki.fi
Reviewed-by: Heikki Linnakangas
-rw-r--r-- | src/backend/access/gin/ginbtree.c | 14 | ||||
-rw-r--r-- | src/backend/access/gin/gindatapage.c | 6 | ||||
-rw-r--r-- | src/backend/access/gin/ginentrypage.c | 3 | ||||
-rw-r--r-- | src/backend/access/gin/ginfast.c | 5 | ||||
-rw-r--r-- | src/backend/access/hash/hash.c | 11 | ||||
-rw-r--r-- | src/backend/access/hash/hashovfl.c | 21 | ||||
-rw-r--r-- | src/backend/access/transam/xloginsert.c | 16 | ||||
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 59 | ||||
-rw-r--r-- | src/include/access/xloginsert.h | 1 | ||||
-rw-r--r-- | src/include/storage/bufmgr.h | 2 |
10 files changed, 118 insertions, 20 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 06e97a7fbb8..fc694b40f14 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -387,24 +387,22 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, START_CRIT_SECTION(); if (RelationNeedsWAL(btree->index) && !btree->isBuild) - { XLogBeginInsert(); - XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD); - if (BufferIsValid(childbuf)) - XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD); - } - /* Perform the page update, and register any extra WAL data */ + /* + * Perform the page update, dirty and register stack->buffer, and + * register any extra WAL data. + */ btree->execPlaceToPage(btree, stack->buffer, stack, insertdata, updateblkno, ptp_workspace); - MarkBufferDirty(stack->buffer); - /* An insert to an internal page finishes the split of the child. */ if (BufferIsValid(childbuf)) { GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; MarkBufferDirty(childbuf); + if (RelationNeedsWAL(btree->index) && !btree->isBuild) + XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD); } if (RelationNeedsWAL(btree->index) && !btree->isBuild) diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index 344e1c5e6bd..24942730852 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -721,9 +721,12 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, /* Apply changes to page */ dataPlaceToPageLeafRecompress(buf, leaf); + MarkBufferDirty(buf); + /* If needed, register WAL data built by computeLeafRecompressWALData */ if (RelationNeedsWAL(btree->index) && !btree->isBuild) { + XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen); } } @@ -1155,6 +1158,8 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, pitem = (PostingItem *) insertdata; GinDataPageAddPostingItem(page, pitem, off); + MarkBufferDirty(buf); + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { /* @@ -1167,6 +1172,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, data.offset = off; data.newitem = *pitem; + XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) &data, sizeof(ginxlogInsertDataInternal)); } diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 5a8c0eb98d0..379496735ff 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -571,6 +571,8 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); + MarkBufferDirty(buf); + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { /* @@ -583,6 +585,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, data.isDelete = insertData->isDelete; data.offset = off; + XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) &data, offsetof(ginxlogInsertEntry, tuple)); XLogRegisterBufData(0, (char *) insertData->entry, diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index eb6c5548318..c8fe7c78a7a 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -397,6 +397,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) } Assert((ptr - collectordata) <= collector->sumsize); + + MarkBufferDirty(buffer); + if (needWal) { XLogRegisterBuffer(1, buffer, REGBUF_STANDARD); @@ -404,8 +407,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) } metadata->tailFreeSize = PageGetExactFreeSpace(page); - - MarkBufferDirty(buffer); } /* diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index fc5d97f606e..7a025f33cfe 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -824,11 +824,16 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, XLogRegisterData((char *) &xlrec, SizeOfHashDelete); /* - * bucket buffer needs to be registered to ensure that we can - * acquire a cleanup lock on it during replay. + * bucket buffer was not changed, but still needs to be + * registered to ensure that we can acquire a cleanup lock on + * it during replay. */ if (!xlrec.is_primary_bucket_page) - XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE); + { + uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE; + + XLogRegisterBuffer(0, bucket_buf, flags); + } XLogRegisterBuffer(1, buf, REGBUF_STANDARD); XLogRegisterBufData(1, (char *) deletable, diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 39bb2cb9f61..9d1ff20b922 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -658,11 +658,15 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage); /* - * bucket buffer needs to be registered to ensure that we can acquire - * a cleanup lock on it during replay. + * bucket buffer was not changed, but still needs to be registered to + * ensure that we can acquire a cleanup lock on it during replay. */ if (!xlrec.is_prim_bucket_same_wrt) - XLogRegisterBuffer(0, bucketbuf, REGBUF_STANDARD | REGBUF_NO_IMAGE); + { + uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE; + + XLogRegisterBuffer(0, bucketbuf, flags); + } XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD); if (xlrec.ntups > 0) @@ -960,11 +964,16 @@ readpage: XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents); /* - * bucket buffer needs to be registered to ensure that - * we can acquire a cleanup lock on it during replay. + * bucket buffer was not changed, but still needs to + * be registered to ensure that we can acquire a + * cleanup lock on it during replay. */ if (!xlrec.is_prim_bucket_same_wrt) - XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE); + { + int flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE; + + XLogRegisterBuffer(0, bucket_buf, flags); + } XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD); XLogRegisterBufData(1, (char *) itup_offsets, diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 588626424e6..e4aaa551a04 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -248,6 +248,20 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags) Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE)))); Assert(begininsert_called); + /* + * Ordinarily, buffer should be exclusive-locked and marked dirty before + * we get here, otherwise we could end up violating one of the rules in + * access/transam/README. + * + * Some callers intentionally register a clean page and never update that + * page's LSN; in that case they can pass the flag REGBUF_NO_CHANGE to + * bypass these checks. + */ +#ifdef USE_ASSERT_CHECKING + if (!(flags & REGBUF_NO_CHANGE)) + Assert(BufferIsExclusiveLocked(buffer) && BufferIsDirty(buffer)); +#endif + if (block_id >= max_registered_block_id) { if (block_id >= max_registered_buffers) @@ -1313,8 +1327,8 @@ log_newpage_range(Relation rel, ForkNumber forknum, START_CRIT_SECTION(); for (i = 0; i < nbufs; i++) { - XLogRegisterBuffer(i, bufpack[i], flags); MarkBufferDirty(bufpack[i]); + XLogRegisterBuffer(i, bufpack[i], flags); } recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 8b96759b84f..21a29f9081a 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2099,6 +2099,65 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, } /* + * BufferIsExclusiveLocked + * + * Checks if buffer is exclusive-locked. + * + * Buffer must be pinned. + */ +bool +BufferIsExclusiveLocked(Buffer buffer) +{ + BufferDesc *bufHdr; + + if (BufferIsLocal(buffer)) + { + int bufid = -buffer - 1; + + bufHdr = GetLocalBufferDescriptor(bufid); + } + else + { + bufHdr = GetBufferDescriptor(buffer - 1); + } + + Assert(BufferIsPinned(buffer)); + return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr), + LW_EXCLUSIVE); +} + +/* + * BufferIsDirty + * + * Checks if buffer is already dirty. + * + * Buffer must be pinned and exclusive-locked. (Without an exclusive lock, + * the result may be stale before it's returned.) + */ +bool +BufferIsDirty(Buffer buffer) +{ + BufferDesc *bufHdr; + + if (BufferIsLocal(buffer)) + { + int bufid = -buffer - 1; + + bufHdr = GetLocalBufferDescriptor(bufid); + } + else + { + bufHdr = GetBufferDescriptor(buffer - 1); + } + + Assert(BufferIsPinned(buffer)); + Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr), + LW_EXCLUSIVE)); + + return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY; +} + +/* * MarkBufferDirty * * Marks buffer contents as dirty (actual write happens later). diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index 31785dc578f..cace8674976 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -37,6 +37,7 @@ * will be skipped) */ #define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image * is taken */ +#define REGBUF_NO_CHANGE 0x20 /* intentionally register clean buffer */ /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index d89021f9187..9241f868611 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -179,6 +179,8 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, bool permanent); extern void ReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer); +extern bool BufferIsExclusiveLocked(Buffer buffer); +extern bool BufferIsDirty(Buffer buffer); extern void MarkBufferDirty(Buffer buffer); extern void IncrBufferRefCount(Buffer buffer); extern void CheckBufferIsPinnedOnce(Buffer buffer); |