aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/transam/Makefile3
-rw-r--r--src/backend/access/transam/timeline.c378
-rw-r--r--src/backend/access/transam/xlog.c988
-rw-r--r--src/backend/access/transam/xlogarchive.c572
-rw-r--r--src/include/access/timeline.h23
-rw-r--r--src/include/access/xlog_internal.h23
6 files changed, 1058 insertions, 929 deletions
diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index f82f10e989e..700cfd85c08 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -13,7 +13,8 @@ top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = clog.o transam.o varsup.o xact.o rmgr.o slru.o subtrans.o multixact.o \
- twophase.o twophase_rmgr.o xlog.o xlogfuncs.o xlogutils.o
+ timeline.o twophase.o twophase_rmgr.o xlog.o xlogarchive.o xlogfuncs.o \
+ xlogutils.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
new file mode 100644
index 00000000000..89d31c5a1bf
--- /dev/null
+++ b/src/backend/access/transam/timeline.c
@@ -0,0 +1,378 @@
+/*-------------------------------------------------------------------------
+ *
+ * timeline.c
+ * Functions for reading and writing timeline history files.
+ *
+ * A timeline history file lists the timeline changes of the timeline, in
+ * a simple text format. They are archived along with the WAL segments.
+ *
+ * The files are named like "<WAL segment>.history". For example, if the
+ * database starts up and switches to timeline 5, while processing WAL
+ * segment 000000030000002A00000006 (the old timeline was 3), the timeline
+ * history file would be called "000000050000002A00000006.history".
+ *
+ * Each line in the file represents a timeline switch:
+ *
+ * <parentTLI> <xlogfname> <reason>
+ *
+ * parentTLI ID of the parent timeline
+ * xlogfname filename of the WAL segment where the switch happened
+ * reason human-readable explanation of why the timeline was changed
+ *
+ * The fields are separated by tabs. Lines beginning with # are comments, and
+ * are ignored. Empty lines are also ignored.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/timeline.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "access/timeline.h"
+#include "access/xlog_internal.h"
+#include "access/xlogdefs.h"
+#include "storage/fd.h"
+
+/*
+ * Try to read a timeline's history file.
+ *
+ * If successful, return the list of component TLIs (the given TLI followed by
+ * its ancestor TLIs). If we can't find the history file, assume that the
+ * timeline has no parents, and return a list of just the specified timeline
+ * ID.
+ */
+List *
+readTimeLineHistory(TimeLineID targetTLI)
+{
+ List *result;
+ char path[MAXPGPATH];
+ char histfname[MAXFNAMELEN];
+ char fline[MAXPGPATH];
+ FILE *fd;
+
+ /* Timeline 1 does not have a history file, so no need to check */
+ if (targetTLI == 1)
+ return list_make1_int((int) targetTLI);
+
+ if (InArchiveRecovery)
+ {
+ TLHistoryFileName(histfname, targetTLI);
+ RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+ }
+ else
+ TLHistoryFilePath(path, targetTLI);
+
+ fd = AllocateFile(path, "r");
+ if (fd == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+ /* Not there, so assume no parents */
+ return list_make1_int((int) targetTLI);
+ }
+
+ result = NIL;
+
+ /*
+ * Parse the file...
+ */
+ while (fgets(fline, sizeof(fline), fd) != NULL)
+ {
+ /* skip leading whitespace and check for # comment */
+ char *ptr;
+ char *endptr;
+ TimeLineID tli;
+
+ for (ptr = fline; *ptr; ptr++)
+ {
+ if (!isspace((unsigned char) *ptr))
+ break;
+ }
+ if (*ptr == '\0' || *ptr == '#')
+ continue;
+
+ /* expect a numeric timeline ID as first field of line */
+ tli = (TimeLineID) strtoul(ptr, &endptr, 0);
+ if (endptr == ptr)
+ ereport(FATAL,
+ (errmsg("syntax error in history file: %s", fline),
+ errhint("Expected a numeric timeline ID.")));
+
+ if (result &&
+ tli <= (TimeLineID) linitial_int(result))
+ ereport(FATAL,
+ (errmsg("invalid data in history file: %s", fline),
+ errhint("Timeline IDs must be in increasing sequence.")));
+
+ /* Build list with newest item first */
+ result = lcons_int((int) tli, result);
+
+ /* we ignore the remainder of each line */
+ }
+
+ FreeFile(fd);
+
+ if (result &&
+ targetTLI <= (TimeLineID) linitial_int(result))
+ ereport(FATAL,
+ (errmsg("invalid data in history file \"%s\"", path),
+ errhint("Timeline IDs must be less than child timeline's ID.")));
+
+ result = lcons_int((int) targetTLI, result);
+
+ ereport(DEBUG3,
+ (errmsg_internal("history of timeline %u is %s",
+ targetTLI, nodeToString(result))));
+
+ return result;
+}
+
+/*
+ * Probe whether a timeline history file exists for the given timeline ID
+ */
+bool
+existsTimeLineHistory(TimeLineID probeTLI)
+{
+ char path[MAXPGPATH];
+ char histfname[MAXFNAMELEN];
+ FILE *fd;
+
+ /* Timeline 1 does not have a history file, so no need to check */
+ if (probeTLI == 1)
+ return false;
+
+ if (InArchiveRecovery)
+ {
+ TLHistoryFileName(histfname, probeTLI);
+ RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+ }
+ else
+ TLHistoryFilePath(path, probeTLI);
+
+ fd = AllocateFile(path, "r");
+ if (fd != NULL)
+ {
+ FreeFile(fd);
+ return true;
+ }
+ else
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+ return false;
+ }
+}
+
+/*
+ * Find the newest existing timeline, assuming that startTLI exists.
+ *
+ * Note: while this is somewhat heuristic, it does positively guarantee
+ * that (result + 1) is not a known timeline, and therefore it should
+ * be safe to assign that ID to a new timeline.
+ */
+TimeLineID
+findNewestTimeLine(TimeLineID startTLI)
+{
+ TimeLineID newestTLI;
+ TimeLineID probeTLI;
+
+ /*
+ * The algorithm is just to probe for the existence of timeline history
+ * files. XXX is it useful to allow gaps in the sequence?
+ */
+ newestTLI = startTLI;
+
+ for (probeTLI = startTLI + 1;; probeTLI++)
+ {
+ if (existsTimeLineHistory(probeTLI))
+ {
+ newestTLI = probeTLI; /* probeTLI exists */
+ }
+ else
+ {
+ /* doesn't exist, assume we're done */
+ break;
+ }
+ }
+
+ return newestTLI;
+}
+
+/*
+ * Create a new timeline history file.
+ *
+ * newTLI: ID of the new timeline
+ * parentTLI: ID of its immediate parent
+ * endTLI et al: ID of the last used WAL file, for annotation purposes
+ * reason: human-readable explanation of why the timeline was switched
+ *
+ * Currently this is only used at the end recovery, and so there are no locking
+ * considerations. But we should be just as tense as XLogFileInit to avoid
+ * emplacing a bogus file.
+ */
+void
+writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+ TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason)
+{
+ char path[MAXPGPATH];
+ char tmppath[MAXPGPATH];
+ char histfname[MAXFNAMELEN];
+ char xlogfname[MAXFNAMELEN];
+ char buffer[BLCKSZ];
+ int srcfd;
+ int fd;
+ int nbytes;
+
+ Assert(newTLI > parentTLI); /* else bad selection of newTLI */
+
+ /*
+ * Write into a temp file name.
+ */
+ snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
+
+ unlink(tmppath);
+
+ /* do not use get_sync_bit() here --- want to fsync only at end of fill */
+ fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", tmppath)));
+
+ /*
+ * If a history file exists for the parent, copy it verbatim
+ */
+ if (InArchiveRecovery)
+ {
+ TLHistoryFileName(histfname, parentTLI);
+ RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+ }
+ else
+ TLHistoryFilePath(path, parentTLI);
+
+ srcfd = BasicOpenFile(path, O_RDONLY, 0);
+ if (srcfd < 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+ /* Not there, so assume parent has no parents */
+ }
+ else
+ {
+ for (;;)
+ {
+ errno = 0;
+ nbytes = (int) read(srcfd, buffer, sizeof(buffer));
+ if (nbytes < 0 || errno != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", path)));
+ if (nbytes == 0)
+ break;
+ errno = 0;
+ if ((int) write(fd, buffer, nbytes) != nbytes)
+ {
+ int save_errno = errno;
+
+ /*
+ * If we fail to make the file, delete it to release disk
+ * space
+ */
+ unlink(tmppath);
+
+ /*
+ * if write didn't set errno, assume problem is no disk space
+ */
+ errno = save_errno ? save_errno : ENOSPC;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", tmppath)));
+ }
+ }
+ close(srcfd);
+ }
+
+ /*
+ * Append one line with the details of this timeline split.
+ *
+ * If we did have a parent file, insert an extra newline just in case the
+ * parent file failed to end with one.
+ */
+ XLogFileName(xlogfname, endTLI, endLogSegNo);
+
+ snprintf(buffer, sizeof(buffer),
+ "%s%u\t%s\t%s\n",
+ (srcfd < 0) ? "" : "\n",
+ parentTLI,
+ xlogfname,
+ reason);
+
+ nbytes = strlen(buffer);
+ errno = 0;
+ if ((int) write(fd, buffer, nbytes) != nbytes)
+ {
+ int save_errno = errno;
+
+ /*
+ * If we fail to make the file, delete it to release disk space
+ */
+ unlink(tmppath);
+ /* if write didn't set errno, assume problem is no disk space */
+ errno = save_errno ? save_errno : ENOSPC;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", tmppath)));
+ }
+
+ if (pg_fsync(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", tmppath)));
+
+ if (close(fd))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", tmppath)));
+
+
+ /*
+ * Now move the completed history file into place with its final name.
+ */
+ TLHistoryFilePath(path, newTLI);
+
+ /*
+ * Prefer link() to rename() here just to be really sure that we don't
+ * overwrite an existing logfile. However, there shouldn't be one, so
+ * rename() is an acceptable substitute except for the truly paranoid.
+ */
+#if HAVE_WORKING_LINK
+ if (link(tmppath, path) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not link file \"%s\" to \"%s\": %m",
+ tmppath, path)));
+ unlink(tmppath);
+#else
+ if (rename(tmppath, path) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ tmppath, path)));
+#endif
+}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ff56c26ab4c..b3519b04b33 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -26,6 +26,7 @@
#include "access/clog.h"
#include "access/multixact.h"
#include "access/subtrans.h"
+#include "access/timeline.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
#include "access/twophase.h"
@@ -190,14 +191,14 @@ static bool LocalHotStandbyActive = false;
*/
static int LocalXLogInsertAllowed = -1;
-/* Are we recovering using offline XLOG archives? */
-static bool InArchiveRecovery = false;
+/* Are we recovering using offline XLOG archives? (only valid in the startup process) */
+bool InArchiveRecovery = false;
/* Was the last xlog file restored from archive, or local? */
static bool restoredFromArchive = false;
/* options taken from recovery.conf for archive recovery */
-static char *recoveryRestoreCommand = NULL;
+char *recoveryRestoreCommand = NULL;
static char *recoveryEndCommand = NULL;
static char *archiveCleanupCommand = NULL;
static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
@@ -208,7 +209,7 @@ static TimestampTz recoveryTargetTime;
static char *recoveryTargetName;
/* options taken from recovery.conf for XLOG streaming */
-static bool StandbyMode = false;
+bool StandbyMode = false;
static char *PrimaryConnInfo = NULL;
static char *TriggerFile = NULL;
@@ -604,11 +605,6 @@ typedef struct xl_restore_point
} xl_restore_point;
-static void XLogArchiveNotify(const char *xlog);
-static void XLogArchiveNotifySeg(XLogSegNo segno);
-static bool XLogArchiveCheckDone(const char *xlog);
-static bool XLogArchiveIsBusy(const char *xlog);
-static void XLogArchiveCleanup(const char *xlog);
static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
@@ -636,10 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
bool randAccess);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
static void XLogFileClose(void);
-static bool RestoreArchivedFile(char *path, const char *xlogfname,
- const char *recovername, off_t expectedSize);
-static void ExecuteRecoveryCommand(char *command, char *commandName,
- bool failOnerror);
static void PreallocXlogFiles(XLogRecPtr endptr);
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr);
static void UpdateLastRemovedPtr(char *filename);
@@ -652,12 +644,7 @@ static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode);
static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record,
int emode, bool randAccess);
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
-static List *readTimeLineHistory(TimeLineID targetTLI);
-static bool existsTimeLineHistory(TimeLineID probeTLI);
static bool rescanLatestTimeLine(void);
-static TimeLineID findNewestTimeLine(TimeLineID startTLI);
-static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
- TimeLineID endTLI, XLogSegNo endLogSegNo);
static void WriteControlFile(void);
static void ReadControlFile(void);
static char *str_time(pg_time_t tnow);
@@ -1279,168 +1266,6 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites,
}
/*
- * XLogArchiveNotify
- *
- * Create an archive notification file
- *
- * The name of the notification file is the message that will be picked up
- * by the archiver, e.g. we write 0000000100000001000000C6.ready
- * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
- * then when complete, rename it to 0000000100000001000000C6.done
- */
-static void
-XLogArchiveNotify(const char *xlog)
-{
- char archiveStatusPath[MAXPGPATH];
- FILE *fd;
-
- /* insert an otherwise empty file called <XLOG>.ready */
- StatusFilePath(archiveStatusPath, xlog, ".ready");
- fd = AllocateFile(archiveStatusPath, "w");
- if (fd == NULL)
- {
- ereport(LOG,
- (errcode_for_file_access(),
- errmsg("could not create archive status file \"%s\": %m",
- archiveStatusPath)));
- return;
- }
- if (FreeFile(fd))
- {
- ereport(LOG,
- (errcode_for_file_access(),
- errmsg("could not write archive status file \"%s\": %m",
- archiveStatusPath)));
- return;
- }
-
- /* Notify archiver that it's got something to do */
- if (IsUnderPostmaster)
- SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
-}
-
-/*
- * Convenience routine to notify using segment number representation of filename
- */
-static void
-XLogArchiveNotifySeg(XLogSegNo segno)
-{
- char xlog[MAXFNAMELEN];
-
- XLogFileName(xlog, ThisTimeLineID, segno);
- XLogArchiveNotify(xlog);
-}
-
-/*
- * XLogArchiveCheckDone
- *
- * This is called when we are ready to delete or recycle an old XLOG segment
- * file or backup history file. If it is okay to delete it then return true.
- * If it is not time to delete it, make sure a .ready file exists, and return
- * false.
- *
- * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
- * then return false; else create <XLOG>.ready and return false.
- *
- * The reason we do things this way is so that if the original attempt to
- * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
- */
-static bool
-XLogArchiveCheckDone(const char *xlog)
-{
- char archiveStatusPath[MAXPGPATH];
- struct stat stat_buf;
-
- /* Always deletable if archiving is off */
- if (!XLogArchivingActive())
- return true;
-
- /* First check for .done --- this means archiver is done with it */
- StatusFilePath(archiveStatusPath, xlog, ".done");
- if (stat(archiveStatusPath, &stat_buf) == 0)
- return true;
-
- /* check for .ready --- this means archiver is still busy with it */
- StatusFilePath(archiveStatusPath, xlog, ".ready");
- if (stat(archiveStatusPath, &stat_buf) == 0)
- return false;
-
- /* Race condition --- maybe archiver just finished, so recheck */
- StatusFilePath(archiveStatusPath, xlog, ".done");
- if (stat(archiveStatusPath, &stat_buf) == 0)
- return true;
-
- /* Retry creation of the .ready file */
- XLogArchiveNotify(xlog);
- return false;
-}
-
-/*
- * XLogArchiveIsBusy
- *
- * Check to see if an XLOG segment file is still unarchived.
- * This is almost but not quite the inverse of XLogArchiveCheckDone: in
- * the first place we aren't chartered to recreate the .ready file, and
- * in the second place we should consider that if the file is already gone
- * then it's not busy. (This check is needed to handle the race condition
- * that a checkpoint already deleted the no-longer-needed file.)
- */
-static bool
-XLogArchiveIsBusy(const char *xlog)
-{
- char archiveStatusPath[MAXPGPATH];
- struct stat stat_buf;
-
- /* First check for .done --- this means archiver is done with it */
- StatusFilePath(archiveStatusPath, xlog, ".done");
- if (stat(archiveStatusPath, &stat_buf) == 0)
- return false;
-
- /* check for .ready --- this means archiver is still busy with it */
- StatusFilePath(archiveStatusPath, xlog, ".ready");
- if (stat(archiveStatusPath, &stat_buf) == 0)
- return true;
-
- /* Race condition --- maybe archiver just finished, so recheck */
- StatusFilePath(archiveStatusPath, xlog, ".done");
- if (stat(archiveStatusPath, &stat_buf) == 0)
- return false;
-
- /*
- * Check to see if the WAL file has been removed by checkpoint, which
- * implies it has already been archived, and explains why we can't see a
- * status file for it.
- */
- snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
- if (stat(archiveStatusPath, &stat_buf) != 0 &&
- errno == ENOENT)
- return false;
-
- return true;
-}
-
-/*
- * XLogArchiveCleanup
- *
- * Cleanup archive notification file(s) for a particular xlog segment
- */
-static void
-XLogArchiveCleanup(const char *xlog)
-{
- char archiveStatusPath[MAXPGPATH];
-
- /* Remove the .done file */
- StatusFilePath(archiveStatusPath, xlog, ".done");
- unlink(archiveStatusPath);
- /* should we complain about failure? */
-
- /* Remove the .ready file if present --- normally it shouldn't be */
- StatusFilePath(archiveStatusPath, xlog, ".ready");
- unlink(archiveStatusPath);
- /* should we complain about failure? */
-}
-
-/*
* Advance the Insert state to the next buffer page, writing out the next
* buffer if it still contains unwritten data.
*
@@ -2955,386 +2780,6 @@ XLogFileClose(void)
}
/*
- * Attempt to retrieve the specified file from off-line archival storage.
- * If successful, fill "path" with its complete path (note that this will be
- * a temp file name that doesn't follow the normal naming convention), and
- * return TRUE.
- *
- * If not successful, fill "path" with the name of the normal on-line file
- * (which may or may not actually exist, but we'll try to use it), and return
- * FALSE.
- *
- * For fixed-size files, the caller may pass the expected size as an
- * additional crosscheck on successful recovery. If the file size is not
- * known, set expectedSize = 0.
- */
-static bool
-RestoreArchivedFile(char *path, const char *xlogfname,
- const char *recovername, off_t expectedSize)
-{
- char xlogpath[MAXPGPATH];
- char xlogRestoreCmd[MAXPGPATH];
- char lastRestartPointFname[MAXPGPATH];
- char *dp;
- char *endp;
- const char *sp;
- int rc;
- bool signaled;
- struct stat stat_buf;
- XLogSegNo restartSegNo;
-
- /* In standby mode, restore_command might not be supplied */
- if (recoveryRestoreCommand == NULL)
- goto not_available;
-
- /*
- * When doing archive recovery, we always prefer an archived log file even
- * if a file of the same name exists in XLOGDIR. The reason is that the
- * file in XLOGDIR could be an old, un-filled or partly-filled version
- * that was copied and restored as part of backing up $PGDATA.
- *
- * We could try to optimize this slightly by checking the local copy
- * lastchange timestamp against the archived copy, but we have no API to
- * do this, nor can we guarantee that the lastchange timestamp was
- * preserved correctly when we copied to archive. Our aim is robustness,
- * so we elect not to do this.
- *
- * If we cannot obtain the log file from the archive, however, we will try
- * to use the XLOGDIR file if it exists. This is so that we can make use
- * of log segments that weren't yet transferred to the archive.
- *
- * Notice that we don't actually overwrite any files when we copy back
- * from archive because the recoveryRestoreCommand may inadvertently
- * restore inappropriate xlogs, or they may be corrupt, so we may wish to
- * fallback to the segments remaining in current XLOGDIR later. The
- * copy-from-archive filename is always the same, ensuring that we don't
- * run out of disk space on long recoveries.
- */
- snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
-
- /*
- * Make sure there is no existing file named recovername.
- */
- if (stat(xlogpath, &stat_buf) != 0)
- {
- if (errno != ENOENT)
- ereport(FATAL,
- (errcode_for_file_access(),
- errmsg("could not stat file \"%s\": %m",
- xlogpath)));
- }
- else
- {
- if (unlink(xlogpath) != 0)
- ereport(FATAL,
- (errcode_for_file_access(),
- errmsg("could not remove file \"%s\": %m",
- xlogpath)));
- }
-
- /*
- * Calculate the archive file cutoff point for use during log shipping
- * replication. All files earlier than this point can be deleted from the
- * archive, though there is no requirement to do so.
- *
- * We initialise this with the filename of an InvalidXLogRecPtr, which
- * will prevent the deletion of any WAL files from the archive because of
- * the alphabetic sorting property of WAL filenames.
- *
- * Once we have successfully located the redo pointer of the checkpoint
- * from which we start recovery we never request a file prior to the redo
- * pointer of the last restartpoint. When redo begins we know that we have
- * successfully located it, so there is no need for additional status
- * flags to signify the point when we can begin deleting WAL files from
- * the archive.
- */
- if (InRedo)
- {
- XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo);
- XLogFileName(lastRestartPointFname,
- ControlFile->checkPointCopy.ThisTimeLineID,
- restartSegNo);
- /* we shouldn't need anything earlier than last restart point */
- Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
- }
- else
- XLogFileName(lastRestartPointFname, 0, 0L);
-
- /*
- * construct the command to be executed
- */
- dp = xlogRestoreCmd;
- endp = xlogRestoreCmd + MAXPGPATH - 1;
- *endp = '\0';
-
- for (sp = recoveryRestoreCommand; *sp; sp++)
- {
- if (*sp == '%')
- {
- switch (sp[1])
- {
- case 'p':
- /* %p: relative path of target file */
- sp++;
- StrNCpy(dp, xlogpath, endp - dp);
- make_native_path(dp);
- dp += strlen(dp);
- break;
- case 'f':
- /* %f: filename of desired file */
- sp++;
- StrNCpy(dp, xlogfname, endp - dp);
- dp += strlen(dp);
- break;
- case 'r':
- /* %r: filename of last restartpoint */
- sp++;
- StrNCpy(dp, lastRestartPointFname, endp - dp);
- dp += strlen(dp);
- break;
- case '%':
- /* convert %% to a single % */
- sp++;
- if (dp < endp)
- *dp++ = *sp;
- break;
- default:
- /* otherwise treat the % as not special */
- if (dp < endp)
- *dp++ = *sp;
- break;
- }
- }
- else
- {
- if (dp < endp)
- *dp++ = *sp;
- }
- }
- *dp = '\0';
-
- ereport(DEBUG3,
- (errmsg_internal("executing restore command \"%s\"",
- xlogRestoreCmd)));
-
- /*
- * Check signals before restore command and reset afterwards.
- */
- PreRestoreCommand();
-
- /*
- * Copy xlog from archival storage to XLOGDIR
- */
- rc = system(xlogRestoreCmd);
-
- PostRestoreCommand();
-
- if (rc == 0)
- {
- /*
- * command apparently succeeded, but let's make sure the file is
- * really there now and has the correct size.
- */
- if (stat(xlogpath, &stat_buf) == 0)
- {
- if (expectedSize > 0 && stat_buf.st_size != expectedSize)
- {
- int elevel;
-
- /*
- * If we find a partial file in standby mode, we assume it's
- * because it's just being copied to the archive, and keep
- * trying.
- *
- * Otherwise treat a wrong-sized file as FATAL to ensure the
- * DBA would notice it, but is that too strong? We could try
- * to plow ahead with a local copy of the file ... but the
- * problem is that there probably isn't one, and we'd
- * incorrectly conclude we've reached the end of WAL and we're
- * done recovering ...
- */
- if (StandbyMode && stat_buf.st_size < expectedSize)
- elevel = DEBUG1;
- else
- elevel = FATAL;
- ereport(elevel,
- (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
- xlogfname,
- (unsigned long) stat_buf.st_size,
- (unsigned long) expectedSize)));
- return false;
- }
- else
- {
- ereport(LOG,
- (errmsg("restored log file \"%s\" from archive",
- xlogfname)));
- strcpy(path, xlogpath);
- return true;
- }
- }
- else
- {
- /* stat failed */
- if (errno != ENOENT)
- ereport(FATAL,
- (errcode_for_file_access(),
- errmsg("could not stat file \"%s\": %m",
- xlogpath)));
- }
- }
-
- /*
- * Remember, we rollforward UNTIL the restore fails so failure here is
- * just part of the process... that makes it difficult to determine
- * whether the restore failed because there isn't an archive to restore,
- * or because the administrator has specified the restore program
- * incorrectly. We have to assume the former.
- *
- * However, if the failure was due to any sort of signal, it's best to
- * punt and abort recovery. (If we "return false" here, upper levels will
- * assume that recovery is complete and start up the database!) It's
- * essential to abort on child SIGINT and SIGQUIT, because per spec
- * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
- * those it's a good bet we should have gotten it too.
- *
- * On SIGTERM, assume we have received a fast shutdown request, and exit
- * cleanly. It's pure chance whether we receive the SIGTERM first, or the
- * child process. If we receive it first, the signal handler will call
- * proc_exit, otherwise we do it here. If we or the child process received
- * SIGTERM for any other reason than a fast shutdown request, postmaster
- * will perform an immediate shutdown when it sees us exiting
- * unexpectedly.
- *
- * Per the Single Unix Spec, shells report exit status > 128 when a called
- * command died on a signal. Also, 126 and 127 are used to report
- * problems such as an unfindable command; treat those as fatal errors
- * too.
- */
- if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
- proc_exit(1);
-
- signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
-
- ereport(signaled ? FATAL : DEBUG2,
- (errmsg("could not restore file \"%s\" from archive: return code %d",
- xlogfname, rc)));
-
-not_available:
-
- /*
- * if an archived file is not available, there might still be a version of
- * this file in XLOGDIR, so return that as the filename to open.
- *
- * In many recovery scenarios we expect this to fail also, but if so that
- * just means we've reached the end of WAL.
- */
- snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
- return false;
-}
-
-/*
- * Attempt to execute an external shell command during recovery.
- *
- * 'command' is the shell command to be executed, 'commandName' is a
- * human-readable name describing the command emitted in the logs. If
- * 'failOnSignal' is true and the command is killed by a signal, a FATAL
- * error is thrown. Otherwise a WARNING is emitted.
- *
- * This is currently used for recovery_end_command and archive_cleanup_command.
- */
-static void
-ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
-{
- char xlogRecoveryCmd[MAXPGPATH];
- char lastRestartPointFname[MAXPGPATH];
- char *dp;
- char *endp;
- const char *sp;
- int rc;
- bool signaled;
- XLogSegNo restartSegNo;
-
- Assert(command && commandName);
-
- /*
- * Calculate the archive file cutoff point for use during log shipping
- * replication. All files earlier than this point can be deleted from the
- * archive, though there is no requirement to do so.
- */
- LWLockAcquire(ControlFileLock, LW_SHARED);
- XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo);
- XLogFileName(lastRestartPointFname,
- ControlFile->checkPointCopy.ThisTimeLineID,
- restartSegNo);
- LWLockRelease(ControlFileLock);
-
- /*
- * construct the command to be executed
- */
- dp = xlogRecoveryCmd;
- endp = xlogRecoveryCmd + MAXPGPATH - 1;
- *endp = '\0';
-
- for (sp = command; *sp; sp++)
- {
- if (*sp == '%')
- {
- switch (sp[1])
- {
- case 'r':
- /* %r: filename of last restartpoint */
- sp++;
- StrNCpy(dp, lastRestartPointFname, endp - dp);
- dp += strlen(dp);
- break;
- case '%':
- /* convert %% to a single % */
- sp++;
- if (dp < endp)
- *dp++ = *sp;
- break;
- default:
- /* otherwise treat the % as not special */
- if (dp < endp)
- *dp++ = *sp;
- break;
- }
- }
- else
- {
- if (dp < endp)
- *dp++ = *sp;
- }
- }
- *dp = '\0';
-
- ereport(DEBUG3,
- (errmsg_internal("executing %s \"%s\"", commandName, command)));
-
- /*
- * execute the constructed command
- */
- rc = system(xlogRecoveryCmd);
- if (rc != 0)
- {
- /*
- * If the failure was due to any sort of signal, it's best to punt and
- * abort recovery. See also detailed comments on signals in
- * RestoreArchivedFile().
- */
- signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
-
- ereport((signaled && failOnSignal) ? FATAL : WARNING,
- /*------
- translator: First %s represents a recovery.conf parameter name like
- "recovery_end_command", and the 2nd is the value of that parameter. */
- (errmsg("%s \"%s\": return code %d", commandName,
- command, rc)));
- }
-}
-
-/*
* Preallocate log files beyond the specified log endpoint.
*
* XXX this is currently extremely conservative, since it forces only one
@@ -4305,140 +3750,6 @@ ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode,
}
/*
- * Try to read a timeline's history file.
- *
- * If successful, return the list of component TLIs (the given TLI followed by
- * its ancestor TLIs). If we can't find the history file, assume that the
- * timeline has no parents, and return a list of just the specified timeline
- * ID.
- */
-static List *
-readTimeLineHistory(TimeLineID targetTLI)
-{
- List *result;
- char path[MAXPGPATH];
- char histfname[MAXFNAMELEN];
- char fline[MAXPGPATH];
- FILE *fd;
-
- /* Timeline 1 does not have a history file, so no need to check */
- if (targetTLI == 1)
- return list_make1_int((int) targetTLI);
-
- if (InArchiveRecovery)
- {
- TLHistoryFileName(histfname, targetTLI);
- RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
- }
- else
- TLHistoryFilePath(path, targetTLI);
-
- fd = AllocateFile(path, "r");
- if (fd == NULL)
- {
- if (errno != ENOENT)
- ereport(FATAL,
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\": %m", path)));
- /* Not there, so assume no parents */
- return list_make1_int((int) targetTLI);
- }
-
- result = NIL;
-
- /*
- * Parse the file...
- */
- while (fgets(fline, sizeof(fline), fd) != NULL)
- {
- /* skip leading whitespace and check for # comment */
- char *ptr;
- char *endptr;
- TimeLineID tli;
-
- for (ptr = fline; *ptr; ptr++)
- {
- if (!isspace((unsigned char) *ptr))
- break;
- }
- if (*ptr == '\0' || *ptr == '#')
- continue;
-
- /* expect a numeric timeline ID as first field of line */
- tli = (TimeLineID) strtoul(ptr, &endptr, 0);
- if (endptr == ptr)
- ereport(FATAL,
- (errmsg("syntax error in history file: %s", fline),
- errhint("Expected a numeric timeline ID.")));
-
- if (result &&
- tli <= (TimeLineID) linitial_int(result))
- ereport(FATAL,
- (errmsg("invalid data in history file: %s", fline),
- errhint("Timeline IDs must be in increasing sequence.")));
-
- /* Build list with newest item first */
- result = lcons_int((int) tli, result);
-
- /* we ignore the remainder of each line */
- }
-
- FreeFile(fd);
-
- if (result &&
- targetTLI <= (TimeLineID) linitial_int(result))
- ereport(FATAL,
- (errmsg("invalid data in history file \"%s\"", path),
- errhint("Timeline IDs must be less than child timeline's ID.")));
-
- result = lcons_int((int) targetTLI, result);
-
- ereport(DEBUG3,
- (errmsg_internal("history of timeline %u is %s",
- targetTLI, nodeToString(result))));
-
- return result;
-}
-
-/*
- * Probe whether a timeline history file exists for the given timeline ID
- */
-static bool
-existsTimeLineHistory(TimeLineID probeTLI)
-{
- char path[MAXPGPATH];
- char histfname[MAXFNAMELEN];
- FILE *fd;
-
- /* Timeline 1 does not have a history file, so no need to check */
- if (probeTLI == 1)
- return false;
-
- if (InArchiveRecovery)
- {
- TLHistoryFileName(histfname, probeTLI);
- RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
- }
- else
- TLHistoryFilePath(path, probeTLI);
-
- fd = AllocateFile(path, "r");
- if (fd != NULL)
- {
- FreeFile(fd);
- return true;
- }
- else
- {
- if (errno != ENOENT)
- ereport(FATAL,
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\": %m", path)));
- return false;
- }
-}
-
-/*
* Scan for new timelines that might have appeared in the archive since we
* started recovery.
*
@@ -4502,239 +3813,6 @@ rescanLatestTimeLine(void)
}
/*
- * Find the newest existing timeline, assuming that startTLI exists.
- *
- * Note: while this is somewhat heuristic, it does positively guarantee
- * that (result + 1) is not a known timeline, and therefore it should
- * be safe to assign that ID to a new timeline.
- */
-static TimeLineID
-findNewestTimeLine(TimeLineID startTLI)
-{
- TimeLineID newestTLI;
- TimeLineID probeTLI;
-
- /*
- * The algorithm is just to probe for the existence of timeline history
- * files. XXX is it useful to allow gaps in the sequence?
- */
- newestTLI = startTLI;
-
- for (probeTLI = startTLI + 1;; probeTLI++)
- {
- if (existsTimeLineHistory(probeTLI))
- {
- newestTLI = probeTLI; /* probeTLI exists */
- }
- else
- {
- /* doesn't exist, assume we're done */
- break;
- }
- }
-
- return newestTLI;
-}
-
-/*
- * Create a new timeline history file.
- *
- * newTLI: ID of the new timeline
- * parentTLI: ID of its immediate parent
- * endTLI et al: ID of the last used WAL file, for annotation purposes
- *
- * Currently this is only used during recovery, and so there are no locking
- * considerations. But we should be just as tense as XLogFileInit to avoid
- * emplacing a bogus file.
- */
-static void
-writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
- TimeLineID endTLI, XLogSegNo endLogSegNo)
-{
- char path[MAXPGPATH];
- char tmppath[MAXPGPATH];
- char histfname[MAXFNAMELEN];
- char xlogfname[MAXFNAMELEN];
- char buffer[BLCKSZ];
- int srcfd;
- int fd;
- int nbytes;
-
- Assert(newTLI > parentTLI); /* else bad selection of newTLI */
-
- /*
- * Write into a temp file name.
- */
- snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
-
- unlink(tmppath);
-
- /* do not use get_sync_bit() here --- want to fsync only at end of fill */
- fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
- S_IRUSR | S_IWUSR);
- if (fd < 0)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not create file \"%s\": %m", tmppath)));
-
- /*
- * If a history file exists for the parent, copy it verbatim
- */
- if (InArchiveRecovery)
- {
- TLHistoryFileName(histfname, parentTLI);
- RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
- }
- else
- TLHistoryFilePath(path, parentTLI);
-
- srcfd = BasicOpenFile(path, O_RDONLY, 0);
- if (srcfd < 0)
- {
- if (errno != ENOENT)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\": %m", path)));
- /* Not there, so assume parent has no parents */
- }
- else
- {
- for (;;)
- {
- errno = 0;
- nbytes = (int) read(srcfd, buffer, sizeof(buffer));
- if (nbytes < 0 || errno != 0)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not read file \"%s\": %m", path)));
- if (nbytes == 0)
- break;
- errno = 0;
- if ((int) write(fd, buffer, nbytes) != nbytes)
- {
- int save_errno = errno;
-
- /*
- * If we fail to make the file, delete it to release disk
- * space
- */
- unlink(tmppath);
-
- /*
- * if write didn't set errno, assume problem is no disk space
- */
- errno = save_errno ? save_errno : ENOSPC;
-
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not write to file \"%s\": %m", tmppath)));
- }
- }
- close(srcfd);
- }
-
- /*
- * Append one line with the details of this timeline split.
- *
- * If we did have a parent file, insert an extra newline just in case the
- * parent file failed to end with one.
- */
- XLogFileName(xlogfname, endTLI, endLogSegNo);
-
- /*
- * Write comment to history file to explain why and where timeline
- * changed. Comment varies according to the recovery target used.
- */
- if (recoveryTarget == RECOVERY_TARGET_XID)
- snprintf(buffer, sizeof(buffer),
- "%s%u\t%s\t%s transaction %u\n",
- (srcfd < 0) ? "" : "\n",
- parentTLI,
- xlogfname,
- recoveryStopAfter ? "after" : "before",
- recoveryStopXid);
- else if (recoveryTarget == RECOVERY_TARGET_TIME)
- snprintf(buffer, sizeof(buffer),
- "%s%u\t%s\t%s %s\n",
- (srcfd < 0) ? "" : "\n",
- parentTLI,
- xlogfname,
- recoveryStopAfter ? "after" : "before",
- timestamptz_to_str(recoveryStopTime));
- else if (recoveryTarget == RECOVERY_TARGET_NAME)
- snprintf(buffer, sizeof(buffer),
- "%s%u\t%s\tat restore point \"%s\"\n",
- (srcfd < 0) ? "" : "\n",
- parentTLI,
- xlogfname,
- recoveryStopName);
- else
- snprintf(buffer, sizeof(buffer),
- "%s%u\t%s\tno recovery target specified\n",
- (srcfd < 0) ? "" : "\n",
- parentTLI,
- xlogfname);
-
- nbytes = strlen(buffer);
- errno = 0;
- if ((int) write(fd, buffer, nbytes) != nbytes)
- {
- int save_errno = errno;
-
- /*
- * If we fail to make the file, delete it to release disk space
- */
- unlink(tmppath);
- /* if write didn't set errno, assume problem is no disk space */
- errno = save_errno ? save_errno : ENOSPC;
-
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not write to file \"%s\": %m", tmppath)));
- }
-
- if (pg_fsync(fd) != 0)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not fsync file \"%s\": %m", tmppath)));
-
- if (close(fd))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not close file \"%s\": %m", tmppath)));
-
-
- /*
- * Now move the completed history file into place with its final name.
- */
- TLHistoryFilePath(path, newTLI);
-
- /*
- * Prefer link() to rename() here just to be really sure that we don't
- * overwrite an existing logfile. However, there shouldn't be one, so
- * rename() is an acceptable substitute except for the truly paranoid.
- */
-#if HAVE_WORKING_LINK
- if (link(tmppath, path) < 0)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not link file \"%s\" to \"%s\": %m",
- tmppath, path)));
- unlink(tmppath);
-#else
- if (rename(tmppath, path) < 0)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not rename file \"%s\" to \"%s\": %m",
- tmppath, path)));
-#endif
-
- /* The history file can be archived immediately. */
- TLHistoryFileName(histfname, newTLI);
- XLogArchiveNotify(histfname);
-}
-
-/*
* I/O routines for pg_control
*
* *ControlFile is a buffer in shared memory that holds an image of the
@@ -6869,11 +5947,35 @@ StartupXLOG(void)
*/
if (InArchiveRecovery)
{
+ char reason[200];
+
ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;
ereport(LOG,
(errmsg("selected new timeline ID: %u", ThisTimeLineID)));
+
+ /*
+ * Write comment to history file to explain why and where timeline
+ * changed. Comment varies according to the recovery target used.
+ */
+ if (recoveryTarget == RECOVERY_TARGET_XID)
+ snprintf(reason, sizeof(reason),
+ "%s transaction %u",
+ recoveryStopAfter ? "after" : "before",
+ recoveryStopXid);
+ else if (recoveryTarget == RECOVERY_TARGET_TIME)
+ snprintf(reason, sizeof(reason),
+ "%s %s\n",
+ recoveryStopAfter ? "after" : "before",
+ timestamptz_to_str(recoveryStopTime));
+ else if (recoveryTarget == RECOVERY_TARGET_NAME)
+ snprintf(reason, sizeof(reason),
+ "at restore point \"%s\"",
+ recoveryStopName);
+ else
+ snprintf(reason, sizeof(reason), "no recovery target specified");
+
writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
- curFileTLI, endLogSegNo);
+ curFileTLI, endLogSegNo, reason);
}
/* Save the selected TimeLineID in shared memory, too */
@@ -9919,6 +9021,36 @@ GetXLogWriteRecPtr(void)
}
/*
+ * Returns the redo pointer of the last restartpoint. This is the oldest
+ * point in WAL that we still need, if we have to restart recovery. Returns
+ * InvalidXLogRecPtr if we don't reliably know that point yet, that is,
+ * before we have started WAL redo.
+ *
+ * This function only works in the startup process, and only while we are
+ * in WAL redo. It's important to not return a value before redo has started,
+ * to avoid deleting WAL files that we might still need, but there's no
+ * fundamental reason why this couldn't return a valid value after redo has
+ * finished, or in other processes. This is enough for the current usage,
+ * however.
+ */
+void
+GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
+{
+ if (InRedo)
+ {
+ LWLockAcquire(ControlFileLock, LW_SHARED);
+ *oldrecptr = ControlFile->checkPointCopy.redo;
+ *oldtli = ControlFile->checkPointCopy.ThisTimeLineID;
+ LWLockRelease(ControlFileLock);
+ }
+ else
+ {
+ *oldrecptr = InvalidXLogRecPtr;
+ *oldtli = 0;
+ }
+}
+
+/*
* read_backup_label: check to see if a backup_label file is present
*
* If we see a backup_label during recovery, we assume that we are recovering
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
new file mode 100644
index 00000000000..e72795783fc
--- /dev/null
+++ b/src/backend/access/transam/xlogarchive.c
@@ -0,0 +1,572 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogarchive.c
+ * Functions for archiving WAL files and restoring from the archive.
+ *
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/xlogarchive.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "access/xlog_internal.h"
+#include "miscadmin.h"
+#include "postmaster/startup.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
+
+/*
+ * Attempt to retrieve the specified file from off-line archival storage.
+ * If successful, fill "path" with its complete path (note that this will be
+ * a temp file name that doesn't follow the normal naming convention), and
+ * return TRUE.
+ *
+ * If not successful, fill "path" with the name of the normal on-line file
+ * (which may or may not actually exist, but we'll try to use it), and return
+ * FALSE.
+ *
+ * For fixed-size files, the caller may pass the expected size as an
+ * additional crosscheck on successful recovery. If the file size is not
+ * known, set expectedSize = 0.
+ */
+bool
+RestoreArchivedFile(char *path, const char *xlogfname,
+ const char *recovername, off_t expectedSize)
+{
+ char xlogpath[MAXPGPATH];
+ char xlogRestoreCmd[MAXPGPATH];
+ char lastRestartPointFname[MAXPGPATH];
+ char *dp;
+ char *endp;
+ const char *sp;
+ int rc;
+ bool signaled;
+ struct stat stat_buf;
+ XLogSegNo restartSegNo;
+ XLogRecPtr restartRedoPtr;
+ TimeLineID restartTli;
+
+ /* In standby mode, restore_command might not be supplied */
+ if (recoveryRestoreCommand == NULL)
+ goto not_available;
+
+ /*
+ * When doing archive recovery, we always prefer an archived log file even
+ * if a file of the same name exists in XLOGDIR. The reason is that the
+ * file in XLOGDIR could be an old, un-filled or partly-filled version
+ * that was copied and restored as part of backing up $PGDATA.
+ *
+ * We could try to optimize this slightly by checking the local copy
+ * lastchange timestamp against the archived copy, but we have no API to
+ * do this, nor can we guarantee that the lastchange timestamp was
+ * preserved correctly when we copied to archive. Our aim is robustness,
+ * so we elect not to do this.
+ *
+ * If we cannot obtain the log file from the archive, however, we will try
+ * to use the XLOGDIR file if it exists. This is so that we can make use
+ * of log segments that weren't yet transferred to the archive.
+ *
+ * Notice that we don't actually overwrite any files when we copy back
+ * from archive because the restore_command may inadvertently
+ * restore inappropriate xlogs, or they may be corrupt, so we may wish to
+ * fallback to the segments remaining in current XLOGDIR later. The
+ * copy-from-archive filename is always the same, ensuring that we don't
+ * run out of disk space on long recoveries.
+ */
+ snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
+
+ /*
+ * Make sure there is no existing file named recovername.
+ */
+ if (stat(xlogpath, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ xlogpath)));
+ }
+ else
+ {
+ if (unlink(xlogpath) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m",
+ xlogpath)));
+ }
+
+ /*
+ * Calculate the archive file cutoff point for use during log shipping
+ * replication. All files earlier than this point can be deleted from the
+ * archive, though there is no requirement to do so.
+ *
+ * We initialise this with the filename of an InvalidXLogRecPtr, which
+ * will prevent the deletion of any WAL files from the archive because of
+ * the alphabetic sorting property of WAL filenames.
+ *
+ * Once we have successfully located the redo pointer of the checkpoint
+ * from which we start recovery we never request a file prior to the redo
+ * pointer of the last restartpoint. When redo begins we know that we have
+ * successfully located it, so there is no need for additional status
+ * flags to signify the point when we can begin deleting WAL files from
+ * the archive.
+ */
+ GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+ if (!XLogRecPtrIsInvalid(restartRedoPtr))
+ {
+ XLByteToSeg(restartRedoPtr, restartSegNo);
+ XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
+ /* we shouldn't need anything earlier than last restart point */
+ Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
+ }
+ else
+ XLogFileName(lastRestartPointFname, 0, 0L);
+
+ /*
+ * construct the command to be executed
+ */
+ dp = xlogRestoreCmd;
+ endp = xlogRestoreCmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = recoveryRestoreCommand; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'p':
+ /* %p: relative path of target file */
+ sp++;
+ StrNCpy(dp, xlogpath, endp - dp);
+ make_native_path(dp);
+ dp += strlen(dp);
+ break;
+ case 'f':
+ /* %f: filename of desired file */
+ sp++;
+ StrNCpy(dp, xlogfname, endp - dp);
+ dp += strlen(dp);
+ break;
+ case 'r':
+ /* %r: filename of last restartpoint */
+ sp++;
+ StrNCpy(dp, lastRestartPointFname, endp - dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+
+ ereport(DEBUG3,
+ (errmsg_internal("executing restore command \"%s\"",
+ xlogRestoreCmd)));
+
+ /*
+ * Check signals before restore command and reset afterwards.
+ */
+ PreRestoreCommand();
+
+ /*
+ * Copy xlog from archival storage to XLOGDIR
+ */
+ rc = system(xlogRestoreCmd);
+
+ PostRestoreCommand();
+
+ if (rc == 0)
+ {
+ /*
+ * command apparently succeeded, but let's make sure the file is
+ * really there now and has the correct size.
+ */
+ if (stat(xlogpath, &stat_buf) == 0)
+ {
+ if (expectedSize > 0 && stat_buf.st_size != expectedSize)
+ {
+ int elevel;
+
+ /*
+ * If we find a partial file in standby mode, we assume it's
+ * because it's just being copied to the archive, and keep
+ * trying.
+ *
+ * Otherwise treat a wrong-sized file as FATAL to ensure the
+ * DBA would notice it, but is that too strong? We could try
+ * to plow ahead with a local copy of the file ... but the
+ * problem is that there probably isn't one, and we'd
+ * incorrectly conclude we've reached the end of WAL and we're
+ * done recovering ...
+ */
+ if (StandbyMode && stat_buf.st_size < expectedSize)
+ elevel = DEBUG1;
+ else
+ elevel = FATAL;
+ ereport(elevel,
+ (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
+ xlogfname,
+ (unsigned long) stat_buf.st_size,
+ (unsigned long) expectedSize)));
+ return false;
+ }
+ else
+ {
+ ereport(LOG,
+ (errmsg("restored log file \"%s\" from archive",
+ xlogfname)));
+ strcpy(path, xlogpath);
+ return true;
+ }
+ }
+ else
+ {
+ /* stat failed */
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ xlogpath)));
+ }
+ }
+
+ /*
+ * Remember, we rollforward UNTIL the restore fails so failure here is
+ * just part of the process... that makes it difficult to determine
+ * whether the restore failed because there isn't an archive to restore,
+ * or because the administrator has specified the restore program
+ * incorrectly. We have to assume the former.
+ *
+ * However, if the failure was due to any sort of signal, it's best to
+ * punt and abort recovery. (If we "return false" here, upper levels will
+ * assume that recovery is complete and start up the database!) It's
+ * essential to abort on child SIGINT and SIGQUIT, because per spec
+ * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
+ * those it's a good bet we should have gotten it too.
+ *
+ * On SIGTERM, assume we have received a fast shutdown request, and exit
+ * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+ * child process. If we receive it first, the signal handler will call
+ * proc_exit, otherwise we do it here. If we or the child process received
+ * SIGTERM for any other reason than a fast shutdown request, postmaster
+ * will perform an immediate shutdown when it sees us exiting
+ * unexpectedly.
+ *
+ * Per the Single Unix Spec, shells report exit status > 128 when a called
+ * command died on a signal. Also, 126 and 127 are used to report
+ * problems such as an unfindable command; treat those as fatal errors
+ * too.
+ */
+ if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
+ proc_exit(1);
+
+ signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+
+ ereport(signaled ? FATAL : DEBUG2,
+ (errmsg("could not restore file \"%s\" from archive: return code %d",
+ xlogfname, rc)));
+
+not_available:
+
+ /*
+ * if an archived file is not available, there might still be a version of
+ * this file in XLOGDIR, so return that as the filename to open.
+ *
+ * In many recovery scenarios we expect this to fail also, but if so that
+ * just means we've reached the end of WAL.
+ */
+ snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
+ return false;
+}
+
+/*
+ * Attempt to execute an external shell command during recovery.
+ *
+ * 'command' is the shell command to be executed, 'commandName' is a
+ * human-readable name describing the command emitted in the logs. If
+ * 'failOnSignal' is true and the command is killed by a signal, a FATAL
+ * error is thrown. Otherwise a WARNING is emitted.
+ *
+ * This is currently used for recovery_end_command and archive_cleanup_command.
+ */
+void
+ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
+{
+ char xlogRecoveryCmd[MAXPGPATH];
+ char lastRestartPointFname[MAXPGPATH];
+ char *dp;
+ char *endp;
+ const char *sp;
+ int rc;
+ bool signaled;
+ XLogSegNo restartSegNo;
+ XLogRecPtr restartRedoPtr;
+ TimeLineID restartTli;
+
+ Assert(command && commandName);
+
+ /*
+ * Calculate the archive file cutoff point for use during log shipping
+ * replication. All files earlier than this point can be deleted from the
+ * archive, though there is no requirement to do so.
+ */
+ GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+ XLByteToSeg(restartRedoPtr, restartSegNo);
+ XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
+ LWLockRelease(ControlFileLock);
+
+ /*
+ * construct the command to be executed
+ */
+ dp = xlogRecoveryCmd;
+ endp = xlogRecoveryCmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = command; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'r':
+ /* %r: filename of last restartpoint */
+ sp++;
+ StrNCpy(dp, lastRestartPointFname, endp - dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+
+ ereport(DEBUG3,
+ (errmsg_internal("executing %s \"%s\"", commandName, command)));
+
+ /*
+ * execute the constructed command
+ */
+ rc = system(xlogRecoveryCmd);
+ if (rc != 0)
+ {
+ /*
+ * If the failure was due to any sort of signal, it's best to punt and
+ * abort recovery. See also detailed comments on signals in
+ * RestoreArchivedFile().
+ */
+ signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+
+ ereport((signaled && failOnSignal) ? FATAL : WARNING,
+ /*------
+ translator: First %s represents a recovery.conf parameter name like
+ "recovery_end_command", and the 2nd is the value of that parameter. */
+ (errmsg("%s \"%s\": return code %d", commandName,
+ command, rc)));
+ }
+}
+
+
+/*
+ * XLogArchiveNotify
+ *
+ * Create an archive notification file
+ *
+ * The name of the notification file is the message that will be picked up
+ * by the archiver, e.g. we write 0000000100000001000000C6.ready
+ * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
+ * then when complete, rename it to 0000000100000001000000C6.done
+ */
+void
+XLogArchiveNotify(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ FILE *fd;
+
+ /* insert an otherwise empty file called <XLOG>.ready */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ fd = AllocateFile(archiveStatusPath, "w");
+ if (fd == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not create archive status file \"%s\": %m",
+ archiveStatusPath)));
+ return;
+ }
+ if (FreeFile(fd))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write archive status file \"%s\": %m",
+ archiveStatusPath)));
+ return;
+ }
+
+ /* Notify archiver that it's got something to do */
+ if (IsUnderPostmaster)
+ SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
+}
+
+/*
+ * Convenience routine to notify using segment number representation of filename
+ */
+void
+XLogArchiveNotifySeg(XLogSegNo segno)
+{
+ char xlog[MAXFNAMELEN];
+
+ XLogFileName(xlog, ThisTimeLineID, segno);
+ XLogArchiveNotify(xlog);
+}
+
+/*
+ * XLogArchiveCheckDone
+ *
+ * This is called when we are ready to delete or recycle an old XLOG segment
+ * file or backup history file. If it is okay to delete it then return true.
+ * If it is not time to delete it, make sure a .ready file exists, and return
+ * false.
+ *
+ * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
+ * then return false; else create <XLOG>.ready and return false.
+ *
+ * The reason we do things this way is so that if the original attempt to
+ * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
+ */
+bool
+XLogArchiveCheckDone(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ struct stat stat_buf;
+
+ /* Always deletable if archiving is off */
+ if (!XLogArchivingActive())
+ return true;
+
+ /* First check for .done --- this means archiver is done with it */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* check for .ready --- this means archiver is still busy with it */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return false;
+
+ /* Race condition --- maybe archiver just finished, so recheck */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* Retry creation of the .ready file */
+ XLogArchiveNotify(xlog);
+ return false;
+}
+
+/*
+ * XLogArchiveIsBusy
+ *
+ * Check to see if an XLOG segment file is still unarchived.
+ * This is almost but not quite the inverse of XLogArchiveCheckDone: in
+ * the first place we aren't chartered to recreate the .ready file, and
+ * in the second place we should consider that if the file is already gone
+ * then it's not busy. (This check is needed to handle the race condition
+ * that a checkpoint already deleted the no-longer-needed file.)
+ */
+bool
+XLogArchiveIsBusy(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ struct stat stat_buf;
+
+ /* First check for .done --- this means archiver is done with it */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return false;
+
+ /* check for .ready --- this means archiver is still busy with it */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* Race condition --- maybe archiver just finished, so recheck */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return false;
+
+ /*
+ * Check to see if the WAL file has been removed by checkpoint, which
+ * implies it has already been archived, and explains why we can't see a
+ * status file for it.
+ */
+ snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
+ if (stat(archiveStatusPath, &stat_buf) != 0 &&
+ errno == ENOENT)
+ return false;
+
+ return true;
+}
+
+/*
+ * XLogArchiveCleanup
+ *
+ * Cleanup archive notification file(s) for a particular xlog segment
+ */
+void
+XLogArchiveCleanup(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+
+ /* Remove the .done file */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ unlink(archiveStatusPath);
+ /* should we complain about failure? */
+
+ /* Remove the .ready file if present --- normally it shouldn't be */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ unlink(archiveStatusPath);
+ /* should we complain about failure? */
+}
diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h
new file mode 100644
index 00000000000..f2a7658bc45
--- /dev/null
+++ b/src/include/access/timeline.h
@@ -0,0 +1,23 @@
+/*
+ * timeline.h
+ *
+ * Functions for reading and writing timeline history files.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/timeline.h
+ */
+#ifndef TIMELINE_H
+#define TIMELINE_H
+
+#include "access/xlogdefs.h"
+#include "nodes/pg_list.h"
+
+extern List *readTimeLineHistory(TimeLineID targetTLI);
+extern bool existsTimeLineHistory(TimeLineID probeTLI);
+extern TimeLineID findNewestTimeLine(TimeLineID startTLI);
+extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+ TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason);
+
+#endif /* TIMELINE_H */
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index b5bfb7b4072..2c66b2feb8a 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -229,6 +229,29 @@ extern const RmgrData RmgrTable[];
extern pg_time_t GetLastSegSwitchTime(void);
extern XLogRecPtr RequestXLogSwitch(void);
+extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
+
+/*
+ * Exported for the functions in timeline.c and xlogarchive.c. Only valid
+ * in the startup process.
+ */
+extern bool InArchiveRecovery;
+extern bool StandbyMode;
+extern char *recoveryRestoreCommand;
+
+/*
+ * Prototypes for functions in xlogarchive.c
+ */
+extern bool RestoreArchivedFile(char *path, const char *xlogfname,
+ const char *recovername, off_t expectedSize);
+extern void ExecuteRecoveryCommand(char *command, char *commandName,
+ bool failOnerror);
+extern void XLogArchiveNotify(const char *xlog);
+extern void XLogArchiveNotifySeg(XLogSegNo segno);
+extern bool XLogArchiveCheckDone(const char *xlog);
+extern bool XLogArchiveIsBusy(const char *xlog);
+extern void XLogArchiveCleanup(const char *xlog);
+
/*
* These aren't in xlog.h because I'd rather not include fmgr.h there.
*/