diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2014-11-13 19:47:44 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2014-11-13 20:02:09 +0200 |
commit | 8fc23a9ed0a040d039431ef79b1bf166395ed180 (patch) | |
tree | 8e9b0b6fba1a54727dc186c831b0f615224f3fe4 /src/backend/access/transam/xlogutils.c | |
parent | 955b4ba7f6948359efcef1c47035d382f937e9f3 (diff) | |
download | postgresql-8fc23a9ed0a040d039431ef79b1bf166395ed180.tar.gz postgresql-8fc23a9ed0a040d039431ef79b1bf166395ed180.zip |
Fix race condition between hot standby and restoring a full-page image.
There was a window in RestoreBackupBlock where a page would be zeroed out,
but not yet locked. If a backend pinned and locked the page in that window,
it saw the zeroed page instead of the old page or new page contents, which
could lead to missing rows in a result set, or errors.
To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins,
zeroes, and locks the page, if it's not in the buffer cache already.
In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE,
to avoid breaking any 3rd party extensions that might use RBM_ZERO. More
importantly, this avoids renumbering the other enum values, which would
cause even bigger confusion in extensions that use ReadBufferExtended, but
haven't been recompiled.
Backpatch to all supported versions; this has been racy since hot standby
was introduced.
Diffstat (limited to 'src/backend/access/transam/xlogutils.c')
-rw-r--r-- | src/backend/access/transam/xlogutils.c | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index b7829ff4c6d..9c8dce9532a 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -257,7 +257,8 @@ XLogCheckInvalidPages(void) * The returned buffer is exclusively-locked. * * For historical reasons, instead of a ReadBufferMode argument, this only - * supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes. + * supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false) + * modes. */ Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) @@ -265,8 +266,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) Buffer buf; buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, - init ? RBM_ZERO : RBM_NORMAL); - if (BufferIsValid(buf)) + init ? RBM_ZERO_AND_LOCK : RBM_NORMAL); + if (BufferIsValid(buf) && !init) LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); return buf; @@ -285,8 +286,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) * dropped or truncated. If we don't see evidence of that later in the WAL * sequence, we'll complain at the end of WAL replay.) * - * In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the - * relation is extended with all-zeroes pages up to the given block number. + * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended + * with all-zeroes pages up to the given block number. * * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't * exist, and we don't check for all-zeroes. Thus, no log entry is made @@ -340,7 +341,11 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, do { if (buffer != InvalidBuffer) + { + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); + } buffer = ReadBufferWithoutRelcache(rnode, forknum, P_NEW, mode, NULL); } @@ -348,6 +353,8 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, /* Handle the corner case that P_NEW returns non-consecutive pages */ if (BufferGetBlockNumber(buffer) != blkno) { + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); |