diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2012-12-13 19:00:00 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2012-12-13 19:17:32 +0200 |
commit | abfd192b1b5ba5216ac4b1f31dcd553106304b19 (patch) | |
tree | 9dc145a8f72c500e06ccc779a2d54784ff1681c1 /src/backend/access | |
parent | 527668717a660e67c2a6cfd4e85f7a513f99f6f2 (diff) | |
download | postgresql-abfd192b1b5ba5216ac4b1f31dcd553106304b19.tar.gz postgresql-abfd192b1b5ba5216ac4b1f31dcd553106304b19.zip |
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/transam/timeline.c | 83 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 55 | ||||
-rw-r--r-- | src/backend/access/transam/xlogfuncs.c | 4 |
3 files changed, 115 insertions, 27 deletions
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index 0681944ae5e..b33d230c701 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -411,6 +411,89 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, } /* + * Writes a history file for given timeline and contents. + * + * Currently this is only used in the walreceiver process, and so there are + * no locking considerations. But we should be just as tense as XLogFileInit + * to avoid emplacing a bogus file. + */ +void +writeTimeLineHistoryFile(TimeLineID tli, char *content, int size) +{ + char path[MAXPGPATH]; + char tmppath[MAXPGPATH]; + int fd; + + /* + * Write into a temp file name. + */ + snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid()); + + unlink(tmppath); + + /* do not use get_sync_bit() here --- want to fsync only at end of fill */ + fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL, + S_IRUSR | S_IWUSR); + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", tmppath))); + + errno = 0; + if ((int) write(fd, content, size) != size) + { + int save_errno = errno; + + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(tmppath); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); + } + + if (pg_fsync(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", tmppath))); + + if (CloseTransientFile(fd)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", tmppath))); + + + /* + * Now move the completed history file into place with its final name. + */ + TLHistoryFilePath(path, tli); + + /* + * Prefer link() to rename() here just to be really sure that we don't + * overwrite an existing logfile. However, there shouldn't be one, so + * rename() is an acceptable substitute except for the truly paranoid. + */ +#if HAVE_WORKING_LINK + if (link(tmppath, path) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not link file \"%s\" to \"%s\": %m", + tmppath, path))); + unlink(tmppath); +#else + if (rename(tmppath, path) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + tmppath, path))); +#endif +} + +/* * Returns true if 'expectedTLEs' contains a timeline with id 'tli' */ bool diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 2d4b62aa840..2deb7e5d89b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -153,6 +153,7 @@ static XLogRecPtr LastRec; /* Local copy of WalRcv->receivedUpto */ static XLogRecPtr receivedUpto = 0; +static TimeLineID receiveTLI = 0; /* * During recovery, lastFullPageWrites keeps track of full_page_writes that @@ -6366,6 +6367,12 @@ StartupXLOG(void) xlogctl->SharedRecoveryInProgress = false; SpinLockRelease(&xlogctl->info_lck); } + + /* + * If there were cascading standby servers connected to us, nudge any + * wal sender processes to notice that we've been promoted. + */ + WalSndWakeup(); } /* @@ -7626,7 +7633,7 @@ CreateRestartPoint(int flags) XLogRecPtr endptr; /* Get the current (or recent) end of xlog */ - endptr = GetStandbyFlushRecPtr(NULL); + endptr = GetStandbyFlushRecPtr(); KeepLogSeg(endptr, &_logSegNo); _logSegNo--; @@ -9087,13 +9094,10 @@ do_pg_abort_backup(void) /* * Get latest redo apply position. * - * Optionally, returns the current recovery target timeline. Callers not - * interested in that may pass NULL for targetTLI. - * * Exported to allow WALReceiver to read the pointer directly. */ XLogRecPtr -GetXLogReplayRecPtr(TimeLineID *targetTLI) +GetXLogReplayRecPtr(void) { /* use volatile pointer to prevent code rearrangement */ volatile XLogCtlData *xlogctl = XLogCtl; @@ -9101,8 +9105,6 @@ GetXLogReplayRecPtr(TimeLineID *targetTLI) SpinLockAcquire(&xlogctl->info_lck); recptr = xlogctl->lastReplayedEndRecPtr; - if (targetTLI) - *targetTLI = xlogctl->RecoveryTargetTLI; SpinLockRelease(&xlogctl->info_lck); return recptr; @@ -9111,18 +9113,15 @@ GetXLogReplayRecPtr(TimeLineID *targetTLI) /* * Get current standby flush position, ie, the last WAL position * known to be fsync'd to disk in standby. - * - * If 'targetTLI' is not NULL, it's set to the current recovery target - * timeline. */ XLogRecPtr -GetStandbyFlushRecPtr(TimeLineID *targetTLI) +GetStandbyFlushRecPtr(void) { XLogRecPtr receivePtr; XLogRecPtr replayPtr; - receivePtr = GetWalRcvWriteRecPtr(NULL); - replayPtr = GetXLogReplayRecPtr(targetTLI); + receivePtr = GetWalRcvWriteRecPtr(NULL, NULL); + replayPtr = GetXLogReplayRecPtr(); if (XLByteLT(receivePtr, replayPtr)) return replayPtr; @@ -9611,7 +9610,10 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * archive and pg_xlog before failover. */ if (CheckForStandbyTrigger()) + { + ShutdownWalRcv(); return false; + } /* * If primary_conninfo is set, launch walreceiver to try to @@ -9626,8 +9628,14 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, if (PrimaryConnInfo) { XLogRecPtr ptr = fetching_ckpt ? RedoStartLSN : RecPtr; - - RequestXLogStreaming(ptr, PrimaryConnInfo); + TimeLineID tli = tliOfPointInHistory(ptr, expectedTLEs); + + if (curFileTLI > 0 && tli < curFileTLI) + elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", + (uint32) (ptr >> 32), (uint32) ptr, + tli, curFileTLI); + curFileTLI = tli; + RequestXLogStreaming(curFileTLI, ptr, PrimaryConnInfo); } /* * Move to XLOG_FROM_STREAM state in either case. We'll get @@ -9653,10 +9661,10 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, */ /* * Before we leave XLOG_FROM_STREAM state, make sure that - * walreceiver is not running, so that it won't overwrite - * any WAL that we restore from archive. + * walreceiver is not active, so that it won't overwrite + * WAL that we restore from archive. */ - if (WalRcvInProgress()) + if (WalRcvStreaming()) ShutdownWalRcv(); /* @@ -9749,7 +9757,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, /* * Check if WAL receiver is still active. */ - if (!WalRcvInProgress()) + if (!WalRcvStreaming()) { lastSourceFailed = true; break; @@ -9772,8 +9780,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, { XLogRecPtr latestChunkStart; - receivedUpto = GetWalRcvWriteRecPtr(&latestChunkStart); - if (XLByteLT(RecPtr, receivedUpto)) + receivedUpto = GetWalRcvWriteRecPtr(&latestChunkStart, &receiveTLI); + if (XLByteLT(RecPtr, receivedUpto) && receiveTLI == curFileTLI) { havedata = true; if (!XLByteLT(RecPtr, latestChunkStart)) @@ -9888,8 +9896,7 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr) /* * Check to see whether the user-specified trigger file exists and whether a - * promote request has arrived. If either condition holds, request postmaster - * to shut down walreceiver, wait for it to exit, and return true. + * promote request has arrived. If either condition holds, return true. */ static bool CheckForStandbyTrigger(void) @@ -9904,7 +9911,6 @@ CheckForStandbyTrigger(void) { ereport(LOG, (errmsg("received promote request"))); - ShutdownWalRcv(); ResetPromoteTriggered(); triggered = true; return true; @@ -9917,7 +9923,6 @@ CheckForStandbyTrigger(void) { ereport(LOG, (errmsg("trigger file found: %s", TriggerFile))); - ShutdownWalRcv(); unlink(TriggerFile); triggered = true; return true; diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 40c0bd67b57..e91bdc3f4af 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -226,7 +226,7 @@ pg_last_xlog_receive_location(PG_FUNCTION_ARGS) XLogRecPtr recptr; char location[MAXFNAMELEN]; - recptr = GetWalRcvWriteRecPtr(NULL); + recptr = GetWalRcvWriteRecPtr(NULL, NULL); if (recptr == 0) PG_RETURN_NULL(); @@ -248,7 +248,7 @@ pg_last_xlog_replay_location(PG_FUNCTION_ARGS) XLogRecPtr recptr; char location[MAXFNAMELEN]; - recptr = GetXLogReplayRecPtr(NULL); + recptr = GetXLogReplayRecPtr(); if (recptr == 0) PG_RETURN_NULL(); |