aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/rmgrdesc/xlogdesc.c10
-rw-r--r--src/backend/access/transam/xlog.c192
-rw-r--r--src/bin/pg_ctl/pg_ctl.c18
-rw-r--r--src/include/access/xlog_internal.h6
-rw-r--r--src/include/catalog/pg_control.h1
5 files changed, 195 insertions, 32 deletions
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 506b208c9cf..69012985161 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -18,6 +18,7 @@
#include "access/xlog_internal.h"
#include "catalog/pg_control.h"
#include "utils/guc.h"
+#include "utils/timestamp.h"
/*
* GUC support
@@ -119,6 +120,15 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
memcpy(&fpw, rec, sizeof(bool));
appendStringInfo(buf, "full_page_writes: %s", fpw ? "true" : "false");
}
+ else if (info == XLOG_END_OF_RECOVERY)
+ {
+ xl_end_of_recovery xlrec;
+
+ memcpy(&xlrec, rec, sizeof(xl_end_of_recovery));
+ appendStringInfo(buf, "end_of_recovery: tli %u; time %s",
+ xlrec.ThisTimeLineID,
+ timestamptz_to_str(xlrec.end_time));
+ }
else
appendStringInfo(buf, "UNKNOWN");
}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index cf2f6e70cff..bcd379dca73 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -66,6 +66,7 @@
#define RECOVERY_COMMAND_FILE "recovery.conf"
#define RECOVERY_COMMAND_DONE "recovery.done"
#define PROMOTE_SIGNAL_FILE "promote"
+#define FAST_PROMOTE_SIGNAL_FILE "fast_promote"
/* User-settable parameters */
@@ -210,6 +211,9 @@ bool StandbyMode = false;
static char *PrimaryConnInfo = NULL;
static char *TriggerFile = NULL;
+/* whether request for fast promotion has been made yet */
+static bool fast_promote = false;
+
/* if recoveryStopsHere returns true, it saves actual stop xid/time/name here */
static TransactionId recoveryStopXid;
static TimestampTz recoveryStopTime;
@@ -611,6 +615,7 @@ static void CheckRequiredParameterValues(void);
static void XLogReportParameters(void);
static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI);
static void LocalSetXLogInsertAllowed(void);
+static void CreateEndOfRecoveryRecord(void);
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo);
@@ -642,7 +647,7 @@ static XLogRecord *ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
int emode, bool fetching_ckpt);
static void CheckRecoveryConsistency(void);
static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader,
- XLogRecPtr RecPtr, int whichChkpt);
+ XLogRecPtr RecPtr, int whichChkpti, bool report);
static bool rescanLatestTimeLine(void);
static void WriteControlFile(void);
static void ReadControlFile(void);
@@ -4848,7 +4853,7 @@ StartupXLOG(void)
* When a backup_label file is present, we want to roll forward from
* the checkpoint it identifies, rather than using pg_control.
*/
- record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0);
+ record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
if (record != NULL)
{
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
@@ -4890,7 +4895,7 @@ StartupXLOG(void)
*/
checkPointLoc = ControlFile->checkPoint;
RedoStartLSN = ControlFile->checkPointCopy.redo;
- record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1);
+ record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true);
if (record != NULL)
{
ereport(DEBUG1,
@@ -4909,7 +4914,7 @@ StartupXLOG(void)
else
{
checkPointLoc = ControlFile->prevCheckPoint;
- record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2);
+ record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);
if (record != NULL)
{
ereport(LOG,
@@ -5393,22 +5398,33 @@ StartupXLOG(void)
}
/*
- * Before replaying this record, check if it is a shutdown
- * checkpoint record that causes the current timeline to
- * change. The checkpoint record is already considered to be
- * part of the new timeline, so we update ThisTimeLineID
- * before replaying it. That's important so that replayEndTLI,
- * which is recorded as the minimum recovery point's TLI if
+ * Before replaying this record, check if this record
+ * causes the current timeline to change. The record is
+ * already considered to be part of the new timeline,
+ * so we update ThisTimeLineID before replaying it.
+ * That's important so that replayEndTLI, which is
+ * recorded as the minimum recovery point's TLI if
* recovery stops after this record, is set correctly.
*/
- if (record->xl_rmid == RM_XLOG_ID &&
- (record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN)
+ if (record->xl_rmid == RM_XLOG_ID)
{
- CheckPoint checkPoint;
- TimeLineID newTLI;
+ TimeLineID newTLI = ThisTimeLineID;
+ uint8 info = record->xl_info & ~XLR_INFO_MASK;
+
+ if (info == XLOG_CHECKPOINT_SHUTDOWN)
+ {
+ CheckPoint checkPoint;
+
+ memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
+ newTLI = checkPoint.ThisTimeLineID;
+ }
+ else if (info == XLOG_END_OF_RECOVERY)
+ {
+ xl_end_of_recovery xlrec;
- memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
- newTLI = checkPoint.ThisTimeLineID;
+ memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
+ newTLI = xlrec.ThisTimeLineID;
+ }
if (newTLI != ThisTimeLineID)
{
@@ -5729,9 +5745,36 @@ StartupXLOG(void)
* allows some extra error checking in xlog_redo.
*/
if (bgwriterLaunched)
- RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
- CHECKPOINT_IMMEDIATE |
- CHECKPOINT_WAIT);
+ {
+ bool checkpoint_wait = true;
+
+ /*
+ * If we've been explicitly promoted with fast option,
+ * end of recovery without a checkpoint if possible.
+ */
+ if (fast_promote)
+ {
+ checkPointLoc = ControlFile->prevCheckPoint;
+ record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, false);
+ if (record != NULL)
+ {
+ checkpoint_wait = false;
+ CreateEndOfRecoveryRecord();
+ }
+ }
+
+ /*
+ * In most cases we will wait for a full checkpoint to complete.
+ *
+ * If not, issue a normal, non-immediate checkpoint but don't wait.
+ */
+ if (checkpoint_wait)
+ RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
+ CHECKPOINT_IMMEDIATE |
+ CHECKPOINT_WAIT);
+ else
+ RequestCheckpoint(0); /* No flags */
+ }
else
CreateCheckPoint(CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE);
@@ -6060,12 +6103,15 @@ LocalSetXLogInsertAllowed(void)
*/
static XLogRecord *
ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
- int whichChkpt)
+ int whichChkpt, bool report)
{
XLogRecord *record;
if (!XRecOffIsValid(RecPtr))
{
+ if (!report)
+ return NULL;
+
switch (whichChkpt)
{
case 1:
@@ -6088,6 +6134,9 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
if (record == NULL)
{
+ if (!report)
+ return NULL;
+
switch (whichChkpt)
{
case 1:
@@ -6883,6 +6932,44 @@ CreateCheckPoint(int flags)
}
/*
+ * Mark the end of recovery in WAL though without running a full checkpoint.
+ * We can expect that a restartpoint is likely to be in progress as we
+ * do this, though we are unwilling to wait for it to complete. So be
+ * careful to avoid taking the CheckpointLock anywhere here.
+ *
+ * CreateRestartPoint() allows for the case where recovery may end before
+ * the restartpoint completes so there is no concern of concurrent behaviour.
+ */
+void
+CreateEndOfRecoveryRecord(void)
+{
+ xl_end_of_recovery xlrec;
+ XLogRecData rdata;
+
+ /* sanity check */
+ if (!RecoveryInProgress())
+ elog(ERROR, "can only be used to end recovery");
+
+ xlrec.end_time = time(NULL);
+ xlrec.ThisTimeLineID = ThisTimeLineID;
+
+ LocalSetXLogInsertAllowed();
+
+ START_CRIT_SECTION();
+
+ rdata.data = (char *) &xlrec;
+ rdata.len = sizeof(xl_end_of_recovery);
+ rdata.buffer = InvalidBuffer;
+ rdata.next = NULL;
+
+ (void) XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
+
+ END_CRIT_SECTION();
+
+ LocalXLogInsertAllowed = -1; /* return to "check" state */
+}
+
+/*
* Flush all data in shared memory to disk, and fsync
*
* This is the common code shared between regular checkpoints and
@@ -7613,6 +7700,27 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
RecoveryRestartPoint(&checkPoint);
}
+ else if (info == XLOG_END_OF_RECOVERY)
+ {
+ xl_end_of_recovery xlrec;
+
+ memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
+
+ /*
+ * For Hot Standby, we could treat this like a Shutdown Checkpoint,
+ * but this case is rarer and harder to test, so the benefit doesn't
+ * outweigh the potential extra cost of maintenance.
+ */
+
+ /*
+ * We should've already switched to the new TLI before replaying this
+ * record.
+ */
+ if (xlrec.ThisTimeLineID != ThisTimeLineID)
+ ereport(PANIC,
+ (errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
+ xlrec.ThisTimeLineID, ThisTimeLineID)));
+ }
else if (info == XLOG_NOOP)
{
/* nothing to do here */
@@ -9405,8 +9513,39 @@ CheckForStandbyTrigger(void)
if (IsPromoteTriggered())
{
- ereport(LOG,
+ /*
+ * In 9.1 and 9.2 the postmaster unlinked the promote file
+ * inside the signal handler. We now leave the file in place
+ * and let the Startup process do the unlink. This allows
+ * Startup to know whether we're doing fast or normal
+ * promotion. Fast promotion takes precedence.
+ */
+ if (stat(FAST_PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
+ {
+ unlink(FAST_PROMOTE_SIGNAL_FILE);
+ unlink(PROMOTE_SIGNAL_FILE);
+ fast_promote = true;
+ }
+ else if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
+ {
+ unlink(PROMOTE_SIGNAL_FILE);
+ fast_promote = false;
+ }
+
+ /*
+ * We only look for fast promote via the pg_ctl promote option.
+ * It would be possible to extend trigger file support for the
+ * fast promotion option but that wouldn't be backwards compatible
+ * anyway and we're looking to focus further work on the promote
+ * option as the right way to signal end of recovery.
+ */
+ if (fast_promote)
+ ereport(LOG,
+ (errmsg("received fast promote request")));
+ else
+ ereport(LOG,
(errmsg("received promote request")));
+
ResetPromoteTriggered();
triggered = true;
return true;
@@ -9435,15 +9574,10 @@ CheckPromoteSignal(void)
{
struct stat stat_buf;
- if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
- {
- /*
- * Since we are in a signal handler, it's not safe to elog. We
- * silently ignore any error from unlink.
- */
- unlink(PROMOTE_SIGNAL_FILE);
+ if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0 ||
+ stat(FAST_PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
return true;
- }
+
return false;
}
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index e412d71dcff..e086b1244cc 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -1136,6 +1136,15 @@ do_promote(void)
exit(1);
}
+ /*
+ * Use two different kinds of promotion file so we can understand
+ * the difference between smart and fast promotion.
+ */
+ if (shutdown_mode >= FAST_MODE)
+ snprintf(promote_file, MAXPGPATH, "%s/fast_promote", pg_data);
+ else
+ snprintf(promote_file, MAXPGPATH, "%s/promote", pg_data);
+
if ((prmfile = fopen(promote_file, "w")) == NULL)
{
write_stderr(_("%s: could not create promote signal file \"%s\": %s\n"),
@@ -1799,7 +1808,7 @@ do_help(void)
" [-o \"OPTIONS\"]\n"), progname);
printf(_(" %s reload [-D DATADIR] [-s]\n"), progname);
printf(_(" %s status [-D DATADIR]\n"), progname);
- printf(_(" %s promote [-D DATADIR] [-s]\n"), progname);
+ printf(_(" %s promote [-D DATADIR] [-s] [-m PROMOTION-MODE]\n"), progname);
printf(_(" %s kill SIGNALNAME PID\n"), progname);
#if defined(WIN32) || defined(__CYGWIN__)
printf(_(" %s register [-N SERVICENAME] [-U USERNAME] [-P PASSWORD] [-D DATADIR]\n"
@@ -1828,7 +1837,7 @@ do_help(void)
printf(_(" -o OPTIONS command line options to pass to postgres\n"
" (PostgreSQL server executable) or initdb\n"));
printf(_(" -p PATH-TO-POSTGRES normally not necessary\n"));
- printf(_("\nOptions for stop or restart:\n"));
+ printf(_("\nOptions for stop, restart or promote:\n"));
printf(_(" -m, --mode=MODE MODE can be \"smart\", \"fast\", or \"immediate\"\n"));
printf(_("\nShutdown modes are:\n"));
@@ -1836,6 +1845,10 @@ do_help(void)
printf(_(" fast quit directly, with proper shutdown\n"));
printf(_(" immediate quit without complete shutdown; will lead to recovery on restart\n"));
+ printf(_("\nPromotion modes are:\n"));
+ printf(_(" smart promote after performing a checkpoint\n"));
+ printf(_(" fast promote quickly without waiting for checkpoint completion\n"));
+
printf(_("\nAllowed signal names for kill:\n"));
printf(" ABRT HUP INT QUIT TERM USR1 USR2\n");
@@ -2271,7 +2284,6 @@ main(int argc, char **argv)
snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data);
snprintf(backup_file, MAXPGPATH, "%s/backup_label", pg_data);
snprintf(recovery_file, MAXPGPATH, "%s/recovery.conf", pg_data);
- snprintf(promote_file, MAXPGPATH, "%s/promote", pg_data);
}
switch (ctl_command)
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 43e1e60f9bf..ce9957e618f 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -217,6 +217,12 @@ typedef struct xl_restore_point
char rp_name[MAXFNAMELEN];
} xl_restore_point;
+/* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */
+typedef struct xl_end_of_recovery
+{
+ TimestampTz end_time;
+ TimeLineID ThisTimeLineID;
+} xl_end_of_recovery;
/*
* XLogRecord is defined in xlog.h, but we avoid #including that to keep
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index e4a9abe7bc5..ec8cea7c86e 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -64,6 +64,7 @@ typedef struct CheckPoint
#define XLOG_PARAMETER_CHANGE 0x60
#define XLOG_RESTORE_POINT 0x70
#define XLOG_FPW_CHANGE 0x80
+#define XLOG_END_OF_RECOVERY 0x90
/*