aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2012-12-11 15:57:24 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2012-12-11 18:55:44 +0200
commit8b6b374b39d992adea42f703baf28a19909ef747 (patch)
tree4c5021ee36c27f09d774298757da782995d29284
parent5dd1c287c2866213a753495551dd75d9c18edbcb (diff)
downloadpostgresql-8b6b374b39d992adea42f703baf28a19909ef747.tar.gz
postgresql-8b6b374b39d992adea42f703baf28a19909ef747.zip
Consistency check should compare last record replayed, not last record read.
EndRecPtr is the last record that we've read, but not necessarily yet replayed. CheckRecoveryConsistency should compare minRecoveryPoint with the last replayed record instead. This caused recovery to think it's reached consistency too early. Now that we do the check in CheckRecoveryConsistency correctly, we have to move the call of that function to after redoing a record. The current place, after reading a record but before replaying it, is wrong. In particular, if there are no more records after the one ending at minRecoveryPoint, we don't enter hot standby until one extra record is generated and read by the standby, and CheckRecoveryConsistency is called. These two bugs conspired to make the code appear to work correctly, except for the small window between reading the last record that reaches minRecoveryPoint, and replaying it. In the passing, rename recoveryLastRecPtr, which is the last record replayed, to lastReplayedEndRecPtr. This makes it slightly less confusing with replayEndRecPtr, which is the last record read that we're about to replay. Original report from Kyotaro HORIGUCHI, further diagnosis by Fujii Masao. Backpatch to 9.0, where Hot Standby subtly changed the test from "minRecoveryPoint < EndRecPtr" to "minRecoveryPoint <= EndRecPtr". The former works because where the test is performed, we have always read one more record than we've replayed.
-rw-r--r--src/backend/access/transam/xlog.c33
1 files changed, 19 insertions, 14 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 368a0255f59..0694128e1ed 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -440,10 +440,14 @@ typedef struct XLogCtlData
XLogRecPtr lastCheckPointRecPtr;
CheckPoint lastCheckPoint;
- /* end+1 of the last record replayed (or being replayed) */
+ /*
+ * lastReplayedEndRecPtr points to end+1 of the last record successfully
+ * replayed. When we're currently replaying a record, ie. in a redo
+ * function, replayEndRecPtr points to the end+1 of the record being
+ * replayed, otherwise it's equal to lastReplayedEndRecPtr.
+ */
+ XLogRecPtr lastReplayedEndRecPtr;
XLogRecPtr replayEndRecPtr;
- /* end+1 of the last record replayed */
- XLogRecPtr recoveryLastRecPtr;
/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
TimestampTz recoveryLastXTime;
/* Are we requested to pause recovery? */
@@ -6493,7 +6497,7 @@ StartupXLOG(void)
}
/*
- * Initialize shared replayEndRecPtr, recoveryLastRecPtr, and
+ * Initialize shared replayEndRecPtr, lastReplayedEndRecPtr, and
* recoveryLastXTime.
*
* This is slightly confusing if we're starting from an online
@@ -6506,7 +6510,7 @@ StartupXLOG(void)
*/
SpinLockAcquire(&xlogctl->info_lck);
xlogctl->replayEndRecPtr = ReadRecPtr;
- xlogctl->recoveryLastRecPtr = EndRecPtr;
+ xlogctl->lastReplayedEndRecPtr = EndRecPtr;
xlogctl->recoveryLastXTime = 0;
xlogctl->recoveryPause = false;
SpinLockRelease(&xlogctl->info_lck);
@@ -6596,9 +6600,6 @@ StartupXLOG(void)
/* Handle interrupt signals of startup process */
HandleStartupProcInterrupts();
- /* Allow read-only connections if we're consistent now */
- CheckRecoveryConsistency();
-
/*
* Pause WAL replay, if requested by a hot-standby session via
* SetRecoveryPause().
@@ -6669,16 +6670,19 @@ StartupXLOG(void)
error_context_stack = errcontext.previous;
/*
- * Update shared recoveryLastRecPtr after this record has been
- * replayed.
+ * Update lastReplayedEndRecPtr after this record has been
+ * successfully replayed.
*/
SpinLockAcquire(&xlogctl->info_lck);
- xlogctl->recoveryLastRecPtr = EndRecPtr;
+ xlogctl->lastReplayedEndRecPtr = EndRecPtr;
SpinLockRelease(&xlogctl->info_lck);
/* Remember this record as the last-applied one */
LastRec = ReadRecPtr;
+ /* Allow read-only connections if we're consistent now */
+ CheckRecoveryConsistency();
+
/* Exit loop if we reached inclusive recovery target */
if (!recoveryContinue)
break;
@@ -7032,13 +7036,14 @@ CheckRecoveryConsistency(void)
* Have we passed our safe starting point?
*/
if (!reachedConsistency &&
- XLByteLE(minRecoveryPoint, EndRecPtr) &&
+ XLByteLE(minRecoveryPoint, XLogCtl->lastReplayedEndRecPtr) &&
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
{
reachedConsistency = true;
ereport(LOG,
(errmsg("consistent recovery state reached at %X/%X",
- EndRecPtr.xlogid, EndRecPtr.xrecoff)));
+ XLogCtl->lastReplayedEndRecPtr.xlogid,
+ XLogCtl->lastReplayedEndRecPtr.xrecoff)));
}
/*
@@ -9676,7 +9681,7 @@ GetXLogReplayRecPtr(void)
XLogRecPtr recptr;
SpinLockAcquire(&xlogctl->info_lck);
- recptr = xlogctl->recoveryLastRecPtr;
+ recptr = xlogctl->lastReplayedEndRecPtr;
SpinLockRelease(&xlogctl->info_lck);
return recptr;