From 5f1d931cf3fd3653c3f11835b4aa1dc04edb63ad Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 13 Nov 2014 19:47:44 +0200 Subject: Fix race condition between hot standby and restoring a full-page image. There was a window in RestoreBackupBlock where a page would be zeroed out, but not yet locked. If a backend pinned and locked the page in that window, it saw the zeroed page instead of the old page or new page contents, which could lead to missing rows in a result set, or errors. To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins, zeroes, and locks the page, if it's not in the buffer cache already. In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE, to avoid breaking any 3rd party extensions that might use RBM_ZERO. More importantly, this avoids renumbering the other enum values, which would cause even bigger confusion in extensions that use ReadBufferExtended, but haven't been recompiled. Backpatch to all supported versions; this has been racy since hot standby was introduced. --- src/backend/access/transam/xlogutils.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/backend/access/transam/xlogutils.c') diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 3593a4e384d..546e3ded250 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -234,7 +234,8 @@ XLogCheckInvalidPages(void) * The returned buffer is exclusively-locked. * * For historical reasons, instead of a ReadBufferMode argument, this only - * supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes. + * supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false) + * modes. */ Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) @@ -242,8 +243,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) Buffer buf; buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, - init ? RBM_ZERO : RBM_NORMAL); - if (BufferIsValid(buf)) + init ? RBM_ZERO_AND_LOCK : RBM_NORMAL); + if (BufferIsValid(buf) && !init) LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); return buf; @@ -262,8 +263,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) * dropped or truncated. If we don't see evidence of that later in the WAL * sequence, we'll complain at the end of WAL replay.) * - * In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the - * relation is extended with all-zeroes pages up to the given block number. + * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended + * with all-zeroes pages up to the given block number. * * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't * exist, and we don't check for all-zeroes. Thus, no log entry is made @@ -317,7 +318,11 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, do { if (buffer != InvalidBuffer) + { + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); + } buffer = ReadBufferWithoutRelcache(rnode, forknum, P_NEW, mode, NULL); } @@ -325,6 +330,8 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, /* Handle the corner case that P_NEW returns non-consecutive pages */ if (BufferGetBlockNumber(buffer) != blkno) { + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); -- cgit v1.2.3