diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2012-10-02 13:37:19 +0300 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2012-10-02 13:37:19 +0300 |
commit | d5497b95f3ca2fc50c6eef46d3394ab6e6855956 (patch) | |
tree | ec2c8b40f5a99f399720406987e7c31f187d2c5a /src/backend/access/transam/xlogarchive.c | |
parent | 0899556e92958712cc31953497b82657cc706f94 (diff) | |
download | postgresql-d5497b95f3ca2fc50c6eef46d3394ab6e6855956.tar.gz postgresql-d5497b95f3ca2fc50c6eef46d3394ab6e6855956.zip |
Split off functions related to timeline history files and XLOG archiving.
This is just refactoring, to make the functions accessible outside xlog.c.
A followup patch will make use of that, to allow fetching timeline history
files over streaming replication.
Diffstat (limited to 'src/backend/access/transam/xlogarchive.c')
-rw-r--r-- | src/backend/access/transam/xlogarchive.c | 572 |
1 files changed, 572 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c new file mode 100644 index 00000000000..e72795783fc --- /dev/null +++ b/src/backend/access/transam/xlogarchive.c @@ -0,0 +1,572 @@ +/*------------------------------------------------------------------------- + * + * xlogarchive.c + * Functions for archiving WAL files and restoring from the archive. + * + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/xlogarchive.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <signal.h> +#include <unistd.h> + +#include "access/xlog_internal.h" +#include "miscadmin.h" +#include "postmaster/startup.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/pmsignal.h" + +/* + * Attempt to retrieve the specified file from off-line archival storage. + * If successful, fill "path" with its complete path (note that this will be + * a temp file name that doesn't follow the normal naming convention), and + * return TRUE. + * + * If not successful, fill "path" with the name of the normal on-line file + * (which may or may not actually exist, but we'll try to use it), and return + * FALSE. + * + * For fixed-size files, the caller may pass the expected size as an + * additional crosscheck on successful recovery. If the file size is not + * known, set expectedSize = 0. + */ +bool +RestoreArchivedFile(char *path, const char *xlogfname, + const char *recovername, off_t expectedSize) +{ + char xlogpath[MAXPGPATH]; + char xlogRestoreCmd[MAXPGPATH]; + char lastRestartPointFname[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + bool signaled; + struct stat stat_buf; + XLogSegNo restartSegNo; + XLogRecPtr restartRedoPtr; + TimeLineID restartTli; + + /* In standby mode, restore_command might not be supplied */ + if (recoveryRestoreCommand == NULL) + goto not_available; + + /* + * When doing archive recovery, we always prefer an archived log file even + * if a file of the same name exists in XLOGDIR. The reason is that the + * file in XLOGDIR could be an old, un-filled or partly-filled version + * that was copied and restored as part of backing up $PGDATA. + * + * We could try to optimize this slightly by checking the local copy + * lastchange timestamp against the archived copy, but we have no API to + * do this, nor can we guarantee that the lastchange timestamp was + * preserved correctly when we copied to archive. Our aim is robustness, + * so we elect not to do this. + * + * If we cannot obtain the log file from the archive, however, we will try + * to use the XLOGDIR file if it exists. This is so that we can make use + * of log segments that weren't yet transferred to the archive. + * + * Notice that we don't actually overwrite any files when we copy back + * from archive because the restore_command may inadvertently + * restore inappropriate xlogs, or they may be corrupt, so we may wish to + * fallback to the segments remaining in current XLOGDIR later. The + * copy-from-archive filename is always the same, ensuring that we don't + * run out of disk space on long recoveries. + */ + snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername); + + /* + * Make sure there is no existing file named recovername. + */ + if (stat(xlogpath, &stat_buf) != 0) + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + xlogpath))); + } + else + { + if (unlink(xlogpath) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", + xlogpath))); + } + + /* + * Calculate the archive file cutoff point for use during log shipping + * replication. All files earlier than this point can be deleted from the + * archive, though there is no requirement to do so. + * + * We initialise this with the filename of an InvalidXLogRecPtr, which + * will prevent the deletion of any WAL files from the archive because of + * the alphabetic sorting property of WAL filenames. + * + * Once we have successfully located the redo pointer of the checkpoint + * from which we start recovery we never request a file prior to the redo + * pointer of the last restartpoint. When redo begins we know that we have + * successfully located it, so there is no need for additional status + * flags to signify the point when we can begin deleting WAL files from + * the archive. + */ + GetOldestRestartPoint(&restartRedoPtr, &restartTli); + if (!XLogRecPtrIsInvalid(restartRedoPtr)) + { + XLByteToSeg(restartRedoPtr, restartSegNo); + XLogFileName(lastRestartPointFname, restartTli, restartSegNo); + /* we shouldn't need anything earlier than last restart point */ + Assert(strcmp(lastRestartPointFname, xlogfname) <= 0); + } + else + XLogFileName(lastRestartPointFname, 0, 0L); + + /* + * construct the command to be executed + */ + dp = xlogRestoreCmd; + endp = xlogRestoreCmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = recoveryRestoreCommand; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'p': + /* %p: relative path of target file */ + sp++; + StrNCpy(dp, xlogpath, endp - dp); + make_native_path(dp); + dp += strlen(dp); + break; + case 'f': + /* %f: filename of desired file */ + sp++; + StrNCpy(dp, xlogfname, endp - dp); + dp += strlen(dp); + break; + case 'r': + /* %r: filename of last restartpoint */ + sp++; + StrNCpy(dp, lastRestartPointFname, endp - dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + ereport(DEBUG3, + (errmsg_internal("executing restore command \"%s\"", + xlogRestoreCmd))); + + /* + * Check signals before restore command and reset afterwards. + */ + PreRestoreCommand(); + + /* + * Copy xlog from archival storage to XLOGDIR + */ + rc = system(xlogRestoreCmd); + + PostRestoreCommand(); + + if (rc == 0) + { + /* + * command apparently succeeded, but let's make sure the file is + * really there now and has the correct size. + */ + if (stat(xlogpath, &stat_buf) == 0) + { + if (expectedSize > 0 && stat_buf.st_size != expectedSize) + { + int elevel; + + /* + * If we find a partial file in standby mode, we assume it's + * because it's just being copied to the archive, and keep + * trying. + * + * Otherwise treat a wrong-sized file as FATAL to ensure the + * DBA would notice it, but is that too strong? We could try + * to plow ahead with a local copy of the file ... but the + * problem is that there probably isn't one, and we'd + * incorrectly conclude we've reached the end of WAL and we're + * done recovering ... + */ + if (StandbyMode && stat_buf.st_size < expectedSize) + elevel = DEBUG1; + else + elevel = FATAL; + ereport(elevel, + (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", + xlogfname, + (unsigned long) stat_buf.st_size, + (unsigned long) expectedSize))); + return false; + } + else + { + ereport(LOG, + (errmsg("restored log file \"%s\" from archive", + xlogfname))); + strcpy(path, xlogpath); + return true; + } + } + else + { + /* stat failed */ + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + xlogpath))); + } + } + + /* + * Remember, we rollforward UNTIL the restore fails so failure here is + * just part of the process... that makes it difficult to determine + * whether the restore failed because there isn't an archive to restore, + * or because the administrator has specified the restore program + * incorrectly. We have to assume the former. + * + * However, if the failure was due to any sort of signal, it's best to + * punt and abort recovery. (If we "return false" here, upper levels will + * assume that recovery is complete and start up the database!) It's + * essential to abort on child SIGINT and SIGQUIT, because per spec + * system() ignores SIGINT and SIGQUIT while waiting; if we see one of + * those it's a good bet we should have gotten it too. + * + * On SIGTERM, assume we have received a fast shutdown request, and exit + * cleanly. It's pure chance whether we receive the SIGTERM first, or the + * child process. If we receive it first, the signal handler will call + * proc_exit, otherwise we do it here. If we or the child process received + * SIGTERM for any other reason than a fast shutdown request, postmaster + * will perform an immediate shutdown when it sees us exiting + * unexpectedly. + * + * Per the Single Unix Spec, shells report exit status > 128 when a called + * command died on a signal. Also, 126 and 127 are used to report + * problems such as an unfindable command; treat those as fatal errors + * too. + */ + if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) + proc_exit(1); + + signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + + ereport(signaled ? FATAL : DEBUG2, + (errmsg("could not restore file \"%s\" from archive: return code %d", + xlogfname, rc))); + +not_available: + + /* + * if an archived file is not available, there might still be a version of + * this file in XLOGDIR, so return that as the filename to open. + * + * In many recovery scenarios we expect this to fail also, but if so that + * just means we've reached the end of WAL. + */ + snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname); + return false; +} + +/* + * Attempt to execute an external shell command during recovery. + * + * 'command' is the shell command to be executed, 'commandName' is a + * human-readable name describing the command emitted in the logs. If + * 'failOnSignal' is true and the command is killed by a signal, a FATAL + * error is thrown. Otherwise a WARNING is emitted. + * + * This is currently used for recovery_end_command and archive_cleanup_command. + */ +void +ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal) +{ + char xlogRecoveryCmd[MAXPGPATH]; + char lastRestartPointFname[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + bool signaled; + XLogSegNo restartSegNo; + XLogRecPtr restartRedoPtr; + TimeLineID restartTli; + + Assert(command && commandName); + + /* + * Calculate the archive file cutoff point for use during log shipping + * replication. All files earlier than this point can be deleted from the + * archive, though there is no requirement to do so. + */ + GetOldestRestartPoint(&restartRedoPtr, &restartTli); + XLByteToSeg(restartRedoPtr, restartSegNo); + XLogFileName(lastRestartPointFname, restartTli, restartSegNo); + LWLockRelease(ControlFileLock); + + /* + * construct the command to be executed + */ + dp = xlogRecoveryCmd; + endp = xlogRecoveryCmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = command; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'r': + /* %r: filename of last restartpoint */ + sp++; + StrNCpy(dp, lastRestartPointFname, endp - dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + ereport(DEBUG3, + (errmsg_internal("executing %s \"%s\"", commandName, command))); + + /* + * execute the constructed command + */ + rc = system(xlogRecoveryCmd); + if (rc != 0) + { + /* + * If the failure was due to any sort of signal, it's best to punt and + * abort recovery. See also detailed comments on signals in + * RestoreArchivedFile(). + */ + signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + + ereport((signaled && failOnSignal) ? FATAL : WARNING, + /*------ + translator: First %s represents a recovery.conf parameter name like + "recovery_end_command", and the 2nd is the value of that parameter. */ + (errmsg("%s \"%s\": return code %d", commandName, + command, rc))); + } +} + + +/* + * XLogArchiveNotify + * + * Create an archive notification file + * + * The name of the notification file is the message that will be picked up + * by the archiver, e.g. we write 0000000100000001000000C6.ready + * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6, + * then when complete, rename it to 0000000100000001000000C6.done + */ +void +XLogArchiveNotify(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + FILE *fd; + + /* insert an otherwise empty file called <XLOG>.ready */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + fd = AllocateFile(archiveStatusPath, "w"); + if (fd == NULL) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not create archive status file \"%s\": %m", + archiveStatusPath))); + return; + } + if (FreeFile(fd)) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write archive status file \"%s\": %m", + archiveStatusPath))); + return; + } + + /* Notify archiver that it's got something to do */ + if (IsUnderPostmaster) + SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); +} + +/* + * Convenience routine to notify using segment number representation of filename + */ +void +XLogArchiveNotifySeg(XLogSegNo segno) +{ + char xlog[MAXFNAMELEN]; + + XLogFileName(xlog, ThisTimeLineID, segno); + XLogArchiveNotify(xlog); +} + +/* + * XLogArchiveCheckDone + * + * This is called when we are ready to delete or recycle an old XLOG segment + * file or backup history file. If it is okay to delete it then return true. + * If it is not time to delete it, make sure a .ready file exists, and return + * false. + * + * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists, + * then return false; else create <XLOG>.ready and return false. + * + * The reason we do things this way is so that if the original attempt to + * create <XLOG>.ready fails, we'll retry during subsequent checkpoints. + */ +bool +XLogArchiveCheckDone(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + struct stat stat_buf; + + /* Always deletable if archiving is off */ + if (!XLogArchivingActive()) + return true; + + /* First check for .done --- this means archiver is done with it */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* check for .ready --- this means archiver is still busy with it */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return false; + + /* Race condition --- maybe archiver just finished, so recheck */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* Retry creation of the .ready file */ + XLogArchiveNotify(xlog); + return false; +} + +/* + * XLogArchiveIsBusy + * + * Check to see if an XLOG segment file is still unarchived. + * This is almost but not quite the inverse of XLogArchiveCheckDone: in + * the first place we aren't chartered to recreate the .ready file, and + * in the second place we should consider that if the file is already gone + * then it's not busy. (This check is needed to handle the race condition + * that a checkpoint already deleted the no-longer-needed file.) + */ +bool +XLogArchiveIsBusy(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + struct stat stat_buf; + + /* First check for .done --- this means archiver is done with it */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return false; + + /* check for .ready --- this means archiver is still busy with it */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* Race condition --- maybe archiver just finished, so recheck */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return false; + + /* + * Check to see if the WAL file has been removed by checkpoint, which + * implies it has already been archived, and explains why we can't see a + * status file for it. + */ + snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog); + if (stat(archiveStatusPath, &stat_buf) != 0 && + errno == ENOENT) + return false; + + return true; +} + +/* + * XLogArchiveCleanup + * + * Cleanup archive notification file(s) for a particular xlog segment + */ +void +XLogArchiveCleanup(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + + /* Remove the .done file */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + unlink(archiveStatusPath); + /* should we complain about failure? */ + + /* Remove the .ready file if present --- normally it shouldn't be */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + unlink(archiveStatusPath); + /* should we complain about failure? */ +} |