diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2014-11-13 19:47:44 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2014-11-13 20:01:09 +0200 |
commit | 5f1d931cf3fd3653c3f11835b4aa1dc04edb63ad (patch) | |
tree | 1ef36169ba54a211bc23af400b6109032dfc18c6 /src/backend/access/transam/xlogutils.c | |
parent | 4ddd9e72ffaab50aa755f54e5feffc30a0850930 (diff) | |
download | postgresql-5f1d931cf3fd3653c3f11835b4aa1dc04edb63ad.tar.gz postgresql-5f1d931cf3fd3653c3f11835b4aa1dc04edb63ad.zip |
Fix race condition between hot standby and restoring a full-page image.
There was a window in RestoreBackupBlock where a page would be zeroed out,
but not yet locked. If a backend pinned and locked the page in that window,
it saw the zeroed page instead of the old page or new page contents, which
could lead to missing rows in a result set, or errors.
To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins,
zeroes, and locks the page, if it's not in the buffer cache already.
In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE,
to avoid breaking any 3rd party extensions that might use RBM_ZERO. More
importantly, this avoids renumbering the other enum values, which would
cause even bigger confusion in extensions that use ReadBufferExtended, but
haven't been recompiled.
Backpatch to all supported versions; this has been racy since hot standby
was introduced.
Diffstat (limited to 'src/backend/access/transam/xlogutils.c')
-rw-r--r-- | src/backend/access/transam/xlogutils.c | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 3593a4e384d..546e3ded250 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -234,7 +234,8 @@ XLogCheckInvalidPages(void) * The returned buffer is exclusively-locked. * * For historical reasons, instead of a ReadBufferMode argument, this only - * supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes. + * supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false) + * modes. */ Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) @@ -242,8 +243,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) Buffer buf; buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, - init ? RBM_ZERO : RBM_NORMAL); - if (BufferIsValid(buf)) + init ? RBM_ZERO_AND_LOCK : RBM_NORMAL); + if (BufferIsValid(buf) && !init) LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); return buf; @@ -262,8 +263,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) * dropped or truncated. If we don't see evidence of that later in the WAL * sequence, we'll complain at the end of WAL replay.) * - * In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the - * relation is extended with all-zeroes pages up to the given block number. + * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended + * with all-zeroes pages up to the given block number. * * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't * exist, and we don't check for all-zeroes. Thus, no log entry is made @@ -317,7 +318,11 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, do { if (buffer != InvalidBuffer) + { + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); + } buffer = ReadBufferWithoutRelcache(rnode, forknum, P_NEW, mode, NULL); } @@ -325,6 +330,8 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, /* Handle the corner case that P_NEW returns non-consecutive pages */ if (BufferGetBlockNumber(buffer) != blkno) { + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); |