aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlogarchive.c
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2012-10-02 13:37:19 +0300
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2012-10-02 13:37:19 +0300
commitd5497b95f3ca2fc50c6eef46d3394ab6e6855956 (patch)
treeec2c8b40f5a99f399720406987e7c31f187d2c5a /src/backend/access/transam/xlogarchive.c
parent0899556e92958712cc31953497b82657cc706f94 (diff)
downloadpostgresql-d5497b95f3ca2fc50c6eef46d3394ab6e6855956.tar.gz
postgresql-d5497b95f3ca2fc50c6eef46d3394ab6e6855956.zip
Split off functions related to timeline history files and XLOG archiving.
This is just refactoring, to make the functions accessible outside xlog.c. A followup patch will make use of that, to allow fetching timeline history files over streaming replication.
Diffstat (limited to 'src/backend/access/transam/xlogarchive.c')
-rw-r--r--src/backend/access/transam/xlogarchive.c572
1 files changed, 572 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
new file mode 100644
index 00000000000..e72795783fc
--- /dev/null
+++ b/src/backend/access/transam/xlogarchive.c
@@ -0,0 +1,572 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogarchive.c
+ * Functions for archiving WAL files and restoring from the archive.
+ *
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/xlogarchive.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "access/xlog_internal.h"
+#include "miscadmin.h"
+#include "postmaster/startup.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
+
+/*
+ * Attempt to retrieve the specified file from off-line archival storage.
+ * If successful, fill "path" with its complete path (note that this will be
+ * a temp file name that doesn't follow the normal naming convention), and
+ * return TRUE.
+ *
+ * If not successful, fill "path" with the name of the normal on-line file
+ * (which may or may not actually exist, but we'll try to use it), and return
+ * FALSE.
+ *
+ * For fixed-size files, the caller may pass the expected size as an
+ * additional crosscheck on successful recovery. If the file size is not
+ * known, set expectedSize = 0.
+ */
+bool
+RestoreArchivedFile(char *path, const char *xlogfname,
+ const char *recovername, off_t expectedSize)
+{
+ char xlogpath[MAXPGPATH];
+ char xlogRestoreCmd[MAXPGPATH];
+ char lastRestartPointFname[MAXPGPATH];
+ char *dp;
+ char *endp;
+ const char *sp;
+ int rc;
+ bool signaled;
+ struct stat stat_buf;
+ XLogSegNo restartSegNo;
+ XLogRecPtr restartRedoPtr;
+ TimeLineID restartTli;
+
+ /* In standby mode, restore_command might not be supplied */
+ if (recoveryRestoreCommand == NULL)
+ goto not_available;
+
+ /*
+ * When doing archive recovery, we always prefer an archived log file even
+ * if a file of the same name exists in XLOGDIR. The reason is that the
+ * file in XLOGDIR could be an old, un-filled or partly-filled version
+ * that was copied and restored as part of backing up $PGDATA.
+ *
+ * We could try to optimize this slightly by checking the local copy
+ * lastchange timestamp against the archived copy, but we have no API to
+ * do this, nor can we guarantee that the lastchange timestamp was
+ * preserved correctly when we copied to archive. Our aim is robustness,
+ * so we elect not to do this.
+ *
+ * If we cannot obtain the log file from the archive, however, we will try
+ * to use the XLOGDIR file if it exists. This is so that we can make use
+ * of log segments that weren't yet transferred to the archive.
+ *
+ * Notice that we don't actually overwrite any files when we copy back
+ * from archive because the restore_command may inadvertently
+ * restore inappropriate xlogs, or they may be corrupt, so we may wish to
+ * fallback to the segments remaining in current XLOGDIR later. The
+ * copy-from-archive filename is always the same, ensuring that we don't
+ * run out of disk space on long recoveries.
+ */
+ snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
+
+ /*
+ * Make sure there is no existing file named recovername.
+ */
+ if (stat(xlogpath, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ xlogpath)));
+ }
+ else
+ {
+ if (unlink(xlogpath) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m",
+ xlogpath)));
+ }
+
+ /*
+ * Calculate the archive file cutoff point for use during log shipping
+ * replication. All files earlier than this point can be deleted from the
+ * archive, though there is no requirement to do so.
+ *
+ * We initialise this with the filename of an InvalidXLogRecPtr, which
+ * will prevent the deletion of any WAL files from the archive because of
+ * the alphabetic sorting property of WAL filenames.
+ *
+ * Once we have successfully located the redo pointer of the checkpoint
+ * from which we start recovery we never request a file prior to the redo
+ * pointer of the last restartpoint. When redo begins we know that we have
+ * successfully located it, so there is no need for additional status
+ * flags to signify the point when we can begin deleting WAL files from
+ * the archive.
+ */
+ GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+ if (!XLogRecPtrIsInvalid(restartRedoPtr))
+ {
+ XLByteToSeg(restartRedoPtr, restartSegNo);
+ XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
+ /* we shouldn't need anything earlier than last restart point */
+ Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
+ }
+ else
+ XLogFileName(lastRestartPointFname, 0, 0L);
+
+ /*
+ * construct the command to be executed
+ */
+ dp = xlogRestoreCmd;
+ endp = xlogRestoreCmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = recoveryRestoreCommand; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'p':
+ /* %p: relative path of target file */
+ sp++;
+ StrNCpy(dp, xlogpath, endp - dp);
+ make_native_path(dp);
+ dp += strlen(dp);
+ break;
+ case 'f':
+ /* %f: filename of desired file */
+ sp++;
+ StrNCpy(dp, xlogfname, endp - dp);
+ dp += strlen(dp);
+ break;
+ case 'r':
+ /* %r: filename of last restartpoint */
+ sp++;
+ StrNCpy(dp, lastRestartPointFname, endp - dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+
+ ereport(DEBUG3,
+ (errmsg_internal("executing restore command \"%s\"",
+ xlogRestoreCmd)));
+
+ /*
+ * Check signals before restore command and reset afterwards.
+ */
+ PreRestoreCommand();
+
+ /*
+ * Copy xlog from archival storage to XLOGDIR
+ */
+ rc = system(xlogRestoreCmd);
+
+ PostRestoreCommand();
+
+ if (rc == 0)
+ {
+ /*
+ * command apparently succeeded, but let's make sure the file is
+ * really there now and has the correct size.
+ */
+ if (stat(xlogpath, &stat_buf) == 0)
+ {
+ if (expectedSize > 0 && stat_buf.st_size != expectedSize)
+ {
+ int elevel;
+
+ /*
+ * If we find a partial file in standby mode, we assume it's
+ * because it's just being copied to the archive, and keep
+ * trying.
+ *
+ * Otherwise treat a wrong-sized file as FATAL to ensure the
+ * DBA would notice it, but is that too strong? We could try
+ * to plow ahead with a local copy of the file ... but the
+ * problem is that there probably isn't one, and we'd
+ * incorrectly conclude we've reached the end of WAL and we're
+ * done recovering ...
+ */
+ if (StandbyMode && stat_buf.st_size < expectedSize)
+ elevel = DEBUG1;
+ else
+ elevel = FATAL;
+ ereport(elevel,
+ (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
+ xlogfname,
+ (unsigned long) stat_buf.st_size,
+ (unsigned long) expectedSize)));
+ return false;
+ }
+ else
+ {
+ ereport(LOG,
+ (errmsg("restored log file \"%s\" from archive",
+ xlogfname)));
+ strcpy(path, xlogpath);
+ return true;
+ }
+ }
+ else
+ {
+ /* stat failed */
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ xlogpath)));
+ }
+ }
+
+ /*
+ * Remember, we rollforward UNTIL the restore fails so failure here is
+ * just part of the process... that makes it difficult to determine
+ * whether the restore failed because there isn't an archive to restore,
+ * or because the administrator has specified the restore program
+ * incorrectly. We have to assume the former.
+ *
+ * However, if the failure was due to any sort of signal, it's best to
+ * punt and abort recovery. (If we "return false" here, upper levels will
+ * assume that recovery is complete and start up the database!) It's
+ * essential to abort on child SIGINT and SIGQUIT, because per spec
+ * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
+ * those it's a good bet we should have gotten it too.
+ *
+ * On SIGTERM, assume we have received a fast shutdown request, and exit
+ * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+ * child process. If we receive it first, the signal handler will call
+ * proc_exit, otherwise we do it here. If we or the child process received
+ * SIGTERM for any other reason than a fast shutdown request, postmaster
+ * will perform an immediate shutdown when it sees us exiting
+ * unexpectedly.
+ *
+ * Per the Single Unix Spec, shells report exit status > 128 when a called
+ * command died on a signal. Also, 126 and 127 are used to report
+ * problems such as an unfindable command; treat those as fatal errors
+ * too.
+ */
+ if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
+ proc_exit(1);
+
+ signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+
+ ereport(signaled ? FATAL : DEBUG2,
+ (errmsg("could not restore file \"%s\" from archive: return code %d",
+ xlogfname, rc)));
+
+not_available:
+
+ /*
+ * if an archived file is not available, there might still be a version of
+ * this file in XLOGDIR, so return that as the filename to open.
+ *
+ * In many recovery scenarios we expect this to fail also, but if so that
+ * just means we've reached the end of WAL.
+ */
+ snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
+ return false;
+}
+
+/*
+ * Attempt to execute an external shell command during recovery.
+ *
+ * 'command' is the shell command to be executed, 'commandName' is a
+ * human-readable name describing the command emitted in the logs. If
+ * 'failOnSignal' is true and the command is killed by a signal, a FATAL
+ * error is thrown. Otherwise a WARNING is emitted.
+ *
+ * This is currently used for recovery_end_command and archive_cleanup_command.
+ */
+void
+ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
+{
+ char xlogRecoveryCmd[MAXPGPATH];
+ char lastRestartPointFname[MAXPGPATH];
+ char *dp;
+ char *endp;
+ const char *sp;
+ int rc;
+ bool signaled;
+ XLogSegNo restartSegNo;
+ XLogRecPtr restartRedoPtr;
+ TimeLineID restartTli;
+
+ Assert(command && commandName);
+
+ /*
+ * Calculate the archive file cutoff point for use during log shipping
+ * replication. All files earlier than this point can be deleted from the
+ * archive, though there is no requirement to do so.
+ */
+ GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+ XLByteToSeg(restartRedoPtr, restartSegNo);
+ XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
+ LWLockRelease(ControlFileLock);
+
+ /*
+ * construct the command to be executed
+ */
+ dp = xlogRecoveryCmd;
+ endp = xlogRecoveryCmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = command; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'r':
+ /* %r: filename of last restartpoint */
+ sp++;
+ StrNCpy(dp, lastRestartPointFname, endp - dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+
+ ereport(DEBUG3,
+ (errmsg_internal("executing %s \"%s\"", commandName, command)));
+
+ /*
+ * execute the constructed command
+ */
+ rc = system(xlogRecoveryCmd);
+ if (rc != 0)
+ {
+ /*
+ * If the failure was due to any sort of signal, it's best to punt and
+ * abort recovery. See also detailed comments on signals in
+ * RestoreArchivedFile().
+ */
+ signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+
+ ereport((signaled && failOnSignal) ? FATAL : WARNING,
+ /*------
+ translator: First %s represents a recovery.conf parameter name like
+ "recovery_end_command", and the 2nd is the value of that parameter. */
+ (errmsg("%s \"%s\": return code %d", commandName,
+ command, rc)));
+ }
+}
+
+
+/*
+ * XLogArchiveNotify
+ *
+ * Create an archive notification file
+ *
+ * The name of the notification file is the message that will be picked up
+ * by the archiver, e.g. we write 0000000100000001000000C6.ready
+ * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
+ * then when complete, rename it to 0000000100000001000000C6.done
+ */
+void
+XLogArchiveNotify(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ FILE *fd;
+
+ /* insert an otherwise empty file called <XLOG>.ready */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ fd = AllocateFile(archiveStatusPath, "w");
+ if (fd == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not create archive status file \"%s\": %m",
+ archiveStatusPath)));
+ return;
+ }
+ if (FreeFile(fd))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write archive status file \"%s\": %m",
+ archiveStatusPath)));
+ return;
+ }
+
+ /* Notify archiver that it's got something to do */
+ if (IsUnderPostmaster)
+ SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
+}
+
+/*
+ * Convenience routine to notify using segment number representation of filename
+ */
+void
+XLogArchiveNotifySeg(XLogSegNo segno)
+{
+ char xlog[MAXFNAMELEN];
+
+ XLogFileName(xlog, ThisTimeLineID, segno);
+ XLogArchiveNotify(xlog);
+}
+
+/*
+ * XLogArchiveCheckDone
+ *
+ * This is called when we are ready to delete or recycle an old XLOG segment
+ * file or backup history file. If it is okay to delete it then return true.
+ * If it is not time to delete it, make sure a .ready file exists, and return
+ * false.
+ *
+ * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
+ * then return false; else create <XLOG>.ready and return false.
+ *
+ * The reason we do things this way is so that if the original attempt to
+ * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
+ */
+bool
+XLogArchiveCheckDone(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ struct stat stat_buf;
+
+ /* Always deletable if archiving is off */
+ if (!XLogArchivingActive())
+ return true;
+
+ /* First check for .done --- this means archiver is done with it */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* check for .ready --- this means archiver is still busy with it */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return false;
+
+ /* Race condition --- maybe archiver just finished, so recheck */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* Retry creation of the .ready file */
+ XLogArchiveNotify(xlog);
+ return false;
+}
+
+/*
+ * XLogArchiveIsBusy
+ *
+ * Check to see if an XLOG segment file is still unarchived.
+ * This is almost but not quite the inverse of XLogArchiveCheckDone: in
+ * the first place we aren't chartered to recreate the .ready file, and
+ * in the second place we should consider that if the file is already gone
+ * then it's not busy. (This check is needed to handle the race condition
+ * that a checkpoint already deleted the no-longer-needed file.)
+ */
+bool
+XLogArchiveIsBusy(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ struct stat stat_buf;
+
+ /* First check for .done --- this means archiver is done with it */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return false;
+
+ /* check for .ready --- this means archiver is still busy with it */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* Race condition --- maybe archiver just finished, so recheck */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return false;
+
+ /*
+ * Check to see if the WAL file has been removed by checkpoint, which
+ * implies it has already been archived, and explains why we can't see a
+ * status file for it.
+ */
+ snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
+ if (stat(archiveStatusPath, &stat_buf) != 0 &&
+ errno == ENOENT)
+ return false;
+
+ return true;
+}
+
+/*
+ * XLogArchiveCleanup
+ *
+ * Cleanup archive notification file(s) for a particular xlog segment
+ */
+void
+XLogArchiveCleanup(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+
+ /* Remove the .done file */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ unlink(archiveStatusPath);
+ /* should we complain about failure? */
+
+ /* Remove the .ready file if present --- normally it shouldn't be */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ unlink(archiveStatusPath);
+ /* should we complain about failure? */
+}