diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2012-12-20 14:23:31 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2012-12-20 14:39:04 +0200 |
commit | af275a12dfeecd621ed9899a9382f26a68310263 (patch) | |
tree | 71752c7e4e3ed62acd0568b65ab135518ff76c4e /src/backend/access/transam/xlog.c | |
parent | 1a11d4609efaae39d9b7472fb965bca1c0aeda01 (diff) | |
download | postgresql-af275a12dfeecd621ed9899a9382f26a68310263.tar.gz postgresql-af275a12dfeecd621ed9899a9382f26a68310263.zip |
Follow TLI of last replayed record, not recovery target TLI, in walsenders.
Most of the time, the last replayed record comes from the recovery target
timeline, but there is a corner case where it makes a difference. When
the startup process scans for a new timeline, and decides to change recovery
target timeline, there is a window where the recovery target TLI has already
been bumped, but there are no WAL segments from the new timeline in pg_xlog
yet. For example, if we have just replayed up to point 0/30002D8, on
timeline 1, there is a WAL file called 000000010000000000000003 in pg_xlog
that contains the WAL up to that point. When recovery switches recovery
target timeline to 2, a walsender can immediately try to read WAL from
0/30002D8, from timeline 2, so it will try to open WAL file
000000020000000000000003. However, that doesn't exist yet - the startup
process hasn't copied that file from the archive yet nor has the walreceiver
streamed it yet, so walsender fails with error "requested WAL segment
000000020000000000000003 has already been removed". That's harmless, in that
the standby will try to reconnect later and by that time the segment is
already created, but error messages that should be ignored are not good.
To fix that, have walsender track the TLI of the last replayed record,
instead of the recovery target timeline. That way walsender will not try to
read anything from timeline 2, until the WAL segment has been created and at
least one record has been replayed from it. The recovery target timeline is
now xlog.c's internal affair, it doesn't need to be exposed in shared memory
anymore.
This fixes the error reported by Thom Brown. depesz the same error message,
but I'm not sure if this fixes his scenario.
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r-- | src/backend/access/transam/xlog.c | 81 |
1 files changed, 27 insertions, 54 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d7e83a937c9..d808607ecdb 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -453,6 +453,7 @@ typedef struct XLogCtlData * replayed, otherwise it's equal to lastReplayedEndRecPtr. */ XLogRecPtr lastReplayedEndRecPtr; + TimeLineID lastReplayedTLI; XLogRecPtr replayEndRecPtr; TimeLineID replayEndTLI; /* timestamp of last COMMIT/ABORT record replayed (or being replayed) */ @@ -3829,7 +3830,6 @@ rescanLatestTimeLine(void) TimeLineID newtarget; TimeLineHistoryEntry *currentTle = NULL; /* use volatile pointer to prevent code rearrangement */ - volatile XLogCtlData *xlogctl = XLogCtl; newtarget = findNewestTimeLine(recoveryTargetTLI); if (newtarget == recoveryTargetTLI) @@ -3888,20 +3888,10 @@ rescanLatestTimeLine(void) list_free_deep(expectedTLEs); expectedTLEs = newExpectedTLEs; - SpinLockAcquire(&xlogctl->info_lck); - xlogctl->RecoveryTargetTLI = recoveryTargetTLI; - SpinLockRelease(&xlogctl->info_lck); - ereport(LOG, (errmsg("new target timeline is %u", recoveryTargetTLI))); - /* - * Wake up any walsenders to notice that we have a new target timeline. - */ - if (AllowCascadeReplication()) - WalSndWakeup(); - return true; } @@ -5389,11 +5379,9 @@ StartupXLOG(void) ControlFile->minRecoveryPointTLI))); /* - * Save the selected recovery target timeline ID and - * archive_cleanup_command in shared memory so that other processes can - * see them + * Save archive_cleanup_command in shared memory so that other processes + * can see it. */ - XLogCtl->RecoveryTargetTLI = recoveryTargetTLI; strncpy(XLogCtl->archiveCleanupCommand, archiveCleanupCommand ? archiveCleanupCommand : "", sizeof(XLogCtl->archiveCleanupCommand)); @@ -5770,6 +5758,7 @@ StartupXLOG(void) xlogctl->replayEndRecPtr = ReadRecPtr; xlogctl->replayEndTLI = ThisTimeLineID; xlogctl->lastReplayedEndRecPtr = EndRecPtr; + xlogctl->lastReplayedEndRecPtr = ThisTimeLineID; xlogctl->recoveryLastXTime = 0; xlogctl->currentChunkStartTime = 0; xlogctl->recoveryPause = false; @@ -5837,6 +5826,7 @@ StartupXLOG(void) */ do { + bool switchedTLI = false; #ifdef WAL_DEBUG if (XLOG_DEBUG || (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || @@ -5942,6 +5932,7 @@ StartupXLOG(void) /* Following WAL records should be run with new TLI */ ThisTimeLineID = newTLI; + switchedTLI = true; } } @@ -5974,6 +5965,7 @@ StartupXLOG(void) */ SpinLockAcquire(&xlogctl->info_lck); xlogctl->lastReplayedEndRecPtr = EndRecPtr; + xlogctl->lastReplayedTLI = ThisTimeLineID; SpinLockRelease(&xlogctl->info_lck); /* Remember this record as the last-applied one */ @@ -5982,6 +5974,13 @@ StartupXLOG(void) /* Allow read-only connections if we're consistent now */ CheckRecoveryConsistency(); + /* + * If this record was a timeline switch, wake up any + * walsenders to notice that we are on a new timeline. + */ + if (switchedTLI && AllowCascadeReplication()) + WalSndWakeup(); + /* Exit loop if we reached inclusive recovery target */ if (!recoveryContinue) break; @@ -6823,23 +6822,6 @@ GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch) } /* - * GetRecoveryTargetTLI - get the current recovery target timeline ID - */ -TimeLineID -GetRecoveryTargetTLI(void) -{ - /* use volatile pointer to prevent code rearrangement */ - volatile XLogCtlData *xlogctl = XLogCtl; - TimeLineID result; - - SpinLockAcquire(&xlogctl->info_lck); - result = xlogctl->RecoveryTargetTLI; - SpinLockRelease(&xlogctl->info_lck); - - return result; -} - -/* * This must be called ONCE during postmaster or standalone-backend shutdown */ void @@ -7642,10 +7624,16 @@ CreateRestartPoint(int flags) */ if (_logSegNo) { + XLogRecPtr receivePtr; + XLogRecPtr replayPtr; XLogRecPtr endptr; - /* Get the current (or recent) end of xlog */ - endptr = GetStandbyFlushRecPtr(); + /* + * Get the current end of xlog replayed or received, whichever is later. + */ + receivePtr = GetWalRcvWriteRecPtr(NULL, NULL); + replayPtr = GetXLogReplayRecPtr(NULL); + endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr; KeepLogSeg(endptr, &_logSegNo); _logSegNo--; @@ -9109,39 +9097,24 @@ do_pg_abort_backup(void) * Exported to allow WALReceiver to read the pointer directly. */ XLogRecPtr -GetXLogReplayRecPtr(void) +GetXLogReplayRecPtr(TimeLineID *replayTLI) { /* use volatile pointer to prevent code rearrangement */ volatile XLogCtlData *xlogctl = XLogCtl; XLogRecPtr recptr; + TimeLineID tli; SpinLockAcquire(&xlogctl->info_lck); recptr = xlogctl->lastReplayedEndRecPtr; + tli = xlogctl->lastReplayedTLI; SpinLockRelease(&xlogctl->info_lck); + if (replayTLI) + *replayTLI = tli; return recptr; } /* - * Get current standby flush position, ie, the last WAL position - * known to be fsync'd to disk in standby. - */ -XLogRecPtr -GetStandbyFlushRecPtr(void) -{ - XLogRecPtr receivePtr; - XLogRecPtr replayPtr; - - receivePtr = GetWalRcvWriteRecPtr(NULL, NULL); - replayPtr = GetXLogReplayRecPtr(); - - if (XLByteLT(receivePtr, replayPtr)) - return replayPtr; - else - return receivePtr; -} - -/* * Get latest WAL insert pointer */ XLogRecPtr |