aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/bufmgr.c
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-13 19:47:44 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-13 20:02:37 +0200
commit81c45081960f39351c38cd53554bb3788af54023 (patch)
treeebb3af10b826276e1a74ef38a4487aea0a0ef78e /src/backend/storage/buffer/bufmgr.c
parent35fed51626328a3ff54adae4749bef956e1e1099 (diff)
downloadpostgresql-81c45081960f39351c38cd53554bb3788af54023.tar.gz
postgresql-81c45081960f39351c38cd53554bb3788af54023.zip
Fix race condition between hot standby and restoring a full-page image.
There was a window in RestoreBackupBlock where a page would be zeroed out, but not yet locked. If a backend pinned and locked the page in that window, it saw the zeroed page instead of the old page or new page contents, which could lead to missing rows in a result set, or errors. To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins, zeroes, and locks the page, if it's not in the buffer cache already. In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE, to avoid breaking any 3rd party extensions that might use RBM_ZERO. More importantly, this avoids renumbering the other enum values, which would cause even bigger confusion in extensions that use ReadBufferExtended, but haven't been recompiled. Backpatch to all supported versions; this has been racy since hot standby was introduced.
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r--src/backend/storage/buffer/bufmgr.c41
1 files changed, 37 insertions, 4 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index cbfe05e2b5c..9b62aecd917 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -499,14 +499,19 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
* valid, the page is zeroed instead of throwing an error. This is intended
* for non-critical data, where the caller is prepared to repair errors.
*
- * In RBM_ZERO mode, if the page isn't in buffer cache already, it's filled
- * with zeros instead of reading it from disk. Useful when the caller is
- * going to fill the page from scratch, since this saves I/O and avoids
+ * In RBM_ZERO_AND_LOCK mode, if the page isn't in buffer cache already, it's
+ * filled with zeros instead of reading it from disk. Useful when the caller
+ * is going to fill the page from scratch, since this saves I/O and avoids
* unnecessary failure if the page-on-disk has corrupt page headers.
+ * The page is returned locked to ensure that the caller has a chance to
+ * initialize the page before it's made visible to others.
* Caution: do not use this mode to read a page that is beyond the relation's
* current physical EOF; that is likely to cause problems in md.c when
* the page is modified and written out. P_NEW is OK, though.
*
+ * RBM_ZERO_AND_CLEANUP_LOCK is the same as RBM_ZERO_AND_LOCK, but acquires
+ * a cleanup-strength lock on the page.
+ *
* RBM_NORMAL_NO_LOG mode is treated the same as RBM_NORMAL here.
*
* If strategy is not NULL, a nondefault buffer access strategy is used.
@@ -648,6 +653,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
isExtend,
found);
+ /*
+ * In RBM_ZERO_AND_LOCK mode the caller expects the page to
+ * be locked on return.
+ */
+ if (!isLocalBuf)
+ {
+ if (mode == RBM_ZERO_AND_LOCK)
+ LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
+ else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
+ LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
+ }
+
return BufferDescriptorGetBuffer(bufHdr);
}
@@ -729,7 +746,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* Read in the page, unless the caller intends to overwrite it and
* just wants us to allocate a buffer.
*/
- if (mode == RBM_ZERO)
+ if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
MemSet((char *) bufBlock, 0, BLCKSZ);
else
{
@@ -771,6 +788,22 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
}
}
+ /*
+ * In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
+ * the page as valid, to make sure that no other backend sees the zeroed
+ * page before the caller has had a chance to initialize it.
+ *
+ * Since no-one else can be looking at the page contents yet, there is no
+ * difference between an exclusive lock and a cleanup-strength lock.
+ * (Note that we cannot use LockBuffer() of LockBufferForCleanup() here,
+ * because they assert that the buffer is already valid.)
+ */
+ if ((mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) &&
+ !isLocalBuf)
+ {
+ LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
+ }
+
if (isLocalBuf)
{
/* Only need to adjust flags */