aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/transam/xlog.c33
-rw-r--r--src/backend/backup/basebackup_incremental.c90
-rw-r--r--src/backend/postmaster/walsummarizer.c142
3 files changed, 157 insertions, 108 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bb2685304e4..7f136026277 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -500,6 +500,11 @@ typedef struct XLogCtlData
* If we create a new timeline when the system was started up,
* PrevTimeLineID is the old timeline's ID that we forked off from.
* Otherwise it's equal to InsertTimeLineID.
+ *
+ * We set these fields while holding info_lck. Most that reads these
+ * values knows that recovery is no longer in progress and so can safely
+ * read the value without a lock, but code that could be run either during
+ * or after recovery can take info_lck while reading these values.
*/
TimeLineID InsertTimeLineID;
TimeLineID PrevTimeLineID;
@@ -5317,6 +5322,13 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
char partialfname[MAXFNAMELEN];
char partialpath[MAXPGPATH];
+ /*
+ * If we're summarizing WAL, we can't rename the partial file
+ * until the summarizer finishes with it, else it will fail.
+ */
+ if (summarize_wal)
+ WaitForWalSummarization(EndOfLog);
+
XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
@@ -5947,8 +5959,10 @@ StartupXLOG(void)
}
/* Save the selected TimeLineID in shared memory, too */
+ SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->InsertTimeLineID = newTLI;
XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
+ SpinLockRelease(&XLogCtl->info_lck);
/*
* Actually, if WAL ended in an incomplete record, skip the parts that
@@ -6484,6 +6498,25 @@ GetWALInsertionTimeLine(void)
}
/*
+ * GetWALInsertionTimeLineIfSet -- If the system is not in recovery, returns
+ * the WAL insertion timeline; else, returns 0. Wherever possible, use
+ * GetWALInsertionTimeLine() instead, since it's cheaper. Note that this
+ * function decides recovery has ended as soon as the insert TLI is set, which
+ * happens before we set XLogCtl->SharedRecoveryState to RECOVERY_STATE_DONE.
+ */
+TimeLineID
+GetWALInsertionTimeLineIfSet(void)
+{
+ TimeLineID insertTLI;
+
+ SpinLockAcquire(&XLogCtl->info_lck);
+ insertTLI = XLogCtl->InsertTimeLineID;
+ SpinLockRelease(&XLogCtl->info_lck);
+
+ return insertTLI;
+}
+
+/*
* GetLastImportantRecPtr -- Returns the LSN of the last important record
* inserted. All records not explicitly marked as unimportant are considered
* important.
diff --git a/src/backend/backup/basebackup_incremental.c b/src/backend/backup/basebackup_incremental.c
index cc2e168129a..af361f38a6c 100644
--- a/src/backend/backup/basebackup_incremental.c
+++ b/src/backend/backup/basebackup_incremental.c
@@ -277,12 +277,6 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
TimeLineID earliest_wal_range_tli = 0;
XLogRecPtr earliest_wal_range_start_lsn = InvalidXLogRecPtr;
TimeLineID latest_wal_range_tli = 0;
- XLogRecPtr summarized_lsn;
- XLogRecPtr pending_lsn;
- XLogRecPtr prior_pending_lsn = InvalidXLogRecPtr;
- int deadcycles = 0;
- TimestampTz initial_time,
- current_time;
Assert(ib->buf.data == NULL);
@@ -458,85 +452,13 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
}
/*
- * Wait for WAL summarization to catch up to the backup start LSN (but
- * time out if it doesn't do so quickly enough).
+ * Wait for WAL summarization to catch up to the backup start LSN. This
+ * will throw an error if the WAL summarizer appears to be stuck. If WAL
+ * summarization gets disabled while we're waiting, this will return
+ * immediately, and we'll error out further down if the WAL summaries are
+ * incomplete.
*/
- initial_time = current_time = GetCurrentTimestamp();
- while (1)
- {
- long timeout_in_ms = 10000;
- long elapsed_seconds;
-
- /*
- * Align the wait time to prevent drift. This doesn't really matter,
- * but we'd like the warnings about how long we've been waiting to say
- * 10 seconds, 20 seconds, 30 seconds, 40 seconds ... without ever
- * drifting to something that is not a multiple of ten.
- */
- timeout_in_ms -=
- TimestampDifferenceMilliseconds(initial_time, current_time) %
- timeout_in_ms;
-
- /* Wait for up to 10 seconds. */
- summarized_lsn = WaitForWalSummarization(backup_state->startpoint,
- timeout_in_ms, &pending_lsn);
-
- /* If WAL summarization has progressed sufficiently, stop waiting. */
- if (summarized_lsn >= backup_state->startpoint)
- break;
-
- /*
- * Keep track of the number of cycles during which there has been no
- * progression of pending_lsn. If pending_lsn is not advancing, that
- * means that not only are no new files appearing on disk, but we're
- * not even incorporating new records into the in-memory state.
- */
- if (pending_lsn > prior_pending_lsn)
- {
- prior_pending_lsn = pending_lsn;
- deadcycles = 0;
- }
- else
- ++deadcycles;
-
- /*
- * If we've managed to wait for an entire minute without the WAL
- * summarizer absorbing a single WAL record, error out; probably
- * something is wrong.
- *
- * We could consider also erroring out if the summarizer is taking too
- * long to catch up, but it's not clear what rate of progress would be
- * acceptable and what would be too slow. So instead, we just try to
- * error out in the case where there's no progress at all. That seems
- * likely to catch a reasonable number of the things that can go wrong
- * in practice (e.g. the summarizer process is completely hung, say
- * because somebody hooked up a debugger to it or something) without
- * giving up too quickly when the system is just slow.
- */
- if (deadcycles >= 6)
- ereport(ERROR,
- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("WAL summarization is not progressing"),
- errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.",
- LSN_FORMAT_ARGS(backup_state->startpoint),
- LSN_FORMAT_ARGS(summarized_lsn),
- LSN_FORMAT_ARGS(pending_lsn))));
-
- /*
- * Otherwise, just let the user know what's happening.
- */
- current_time = GetCurrentTimestamp();
- elapsed_seconds =
- TimestampDifferenceMilliseconds(initial_time, current_time) / 1000;
- ereport(WARNING,
- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("still waiting for WAL summarization through %X/%X after %ld seconds",
- LSN_FORMAT_ARGS(backup_state->startpoint),
- elapsed_seconds),
- errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.",
- LSN_FORMAT_ARGS(summarized_lsn),
- LSN_FORMAT_ARGS(pending_lsn))));
- }
+ WaitForWalSummarization(backup_state->startpoint);
/*
* Retrieve a list of all WAL summaries on any timeline that overlap with
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index 6bbc0405107..b62e2c36de9 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -650,54 +650,132 @@ SetWalSummarizerLatch(void)
}
/*
- * Wait until WAL summarization reaches the given LSN, but not longer than
- * the given timeout.
+ * Wait until WAL summarization reaches the given LSN, but time out with an
+ * error if the summarizer seems to be stick.
*
- * The return value is the first still-unsummarized LSN. If it's greater than
- * or equal to the passed LSN, then that LSN was reached. If not, we timed out.
- *
- * Either way, *pending_lsn is set to the value taken from WalSummarizerCtl.
+ * Returns immediately if summarize_wal is turned off while we wait. Caller
+ * is expected to handle this case, if necessary.
*/
-XLogRecPtr
-WaitForWalSummarization(XLogRecPtr lsn, long timeout, XLogRecPtr *pending_lsn)
+void
+WaitForWalSummarization(XLogRecPtr lsn)
{
- TimestampTz start_time = GetCurrentTimestamp();
- TimestampTz deadline = TimestampTzPlusMilliseconds(start_time, timeout);
- XLogRecPtr summarized_lsn;
+ TimestampTz initial_time,
+ cycle_time,
+ current_time;
+ XLogRecPtr prior_pending_lsn = InvalidXLogRecPtr;
+ int deadcycles = 0;
- Assert(!XLogRecPtrIsInvalid(lsn));
- Assert(timeout > 0);
+ initial_time = cycle_time = GetCurrentTimestamp();
while (1)
{
- TimestampTz now;
- long remaining_timeout;
+ long timeout_in_ms = 10000;
+ XLogRecPtr summarized_lsn;
+ XLogRecPtr pending_lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* If WAL summarization is disabled while we're waiting, give up. */
+ if (!summarize_wal)
+ return;
/*
* If the LSN summarized on disk has reached the target value, stop.
*/
LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
summarized_lsn = WalSummarizerCtl->summarized_lsn;
- *pending_lsn = WalSummarizerCtl->pending_lsn;
+ pending_lsn = WalSummarizerCtl->pending_lsn;
LWLockRelease(WALSummarizerLock);
+
+ /* If WAL summarization has progressed sufficiently, stop waiting. */
if (summarized_lsn >= lsn)
break;
- /* Timeout reached? If yes, stop. */
- now = GetCurrentTimestamp();
- remaining_timeout = TimestampDifferenceMilliseconds(now, deadline);
- if (remaining_timeout <= 0)
- break;
+ /* Recheck current time. */
+ current_time = GetCurrentTimestamp();
+
+ /* Have we finished the current cycle of waiting? */
+ if (TimestampDifferenceMilliseconds(cycle_time,
+ current_time) >= timeout_in_ms)
+ {
+ long elapsed_seconds;
+
+ /* Begin new wait cycle. */
+ cycle_time = TimestampTzPlusMilliseconds(cycle_time,
+ timeout_in_ms);
+
+ /*
+ * Keep track of the number of cycles during which there has been
+ * no progression of pending_lsn. If pending_lsn is not advancing,
+ * that means that not only are no new files appearing on disk,
+ * but we're not even incorporating new records into the in-memory
+ * state.
+ */
+ if (pending_lsn > prior_pending_lsn)
+ {
+ prior_pending_lsn = pending_lsn;
+ deadcycles = 0;
+ }
+ else
+ ++deadcycles;
+
+ /*
+ * If we've managed to wait for an entire minute without the WAL
+ * summarizer absorbing a single WAL record, error out; probably
+ * something is wrong.
+ *
+ * We could consider also erroring out if the summarizer is taking
+ * too long to catch up, but it's not clear what rate of progress
+ * would be acceptable and what would be too slow. So instead, we
+ * just try to error out in the case where there's no progress at
+ * all. That seems likely to catch a reasonable number of the
+ * things that can go wrong in practice (e.g. the summarizer
+ * process is completely hung, say because somebody hooked up a
+ * debugger to it or something) without giving up too quickly when
+ * the system is just slow.
+ */
+ if (deadcycles >= 6)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("WAL summarization is not progressing"),
+ errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.",
+ LSN_FORMAT_ARGS(lsn),
+ LSN_FORMAT_ARGS(summarized_lsn),
+ LSN_FORMAT_ARGS(pending_lsn))));
+
+
+ /*
+ * Otherwise, just let the user know what's happening.
+ */
+ elapsed_seconds =
+ TimestampDifferenceMilliseconds(initial_time,
+ current_time) / 1000;
+ ereport(WARNING,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("still waiting for WAL summarization through %X/%X after %ld seconds",
+ LSN_FORMAT_ARGS(lsn),
+ elapsed_seconds),
+ errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.",
+ LSN_FORMAT_ARGS(summarized_lsn),
+ LSN_FORMAT_ARGS(pending_lsn))));
+ }
+
+ /*
+ * Align the wait time to prevent drift. This doesn't really matter,
+ * but we'd like the warnings about how long we've been waiting to say
+ * 10 seconds, 20 seconds, 30 seconds, 40 seconds ... without ever
+ * drifting to something that is not a multiple of ten.
+ */
+ timeout_in_ms -=
+ TimestampDifferenceMilliseconds(cycle_time, current_time);
/* Wait and see. */
ConditionVariableTimedSleep(&WalSummarizerCtl->summary_file_cv,
- remaining_timeout,
+ timeout_in_ms,
WAIT_EVENT_WAL_SUMMARY_READY);
}
ConditionVariableCancelSleep();
-
- return summarized_lsn;
}
/*
@@ -730,6 +808,22 @@ GetLatestLSN(TimeLineID *tli)
TimeLineID flush_tli;
XLogRecPtr replay_lsn;
TimeLineID replay_tli;
+ TimeLineID insert_tli;
+
+ /*
+ * After the insert TLI has been set and before the control file has
+ * been updated to show the DB in production, RecoveryInProgress()
+ * will return true, because it's not yet safe for all backends to
+ * begin writing WAL. However, replay has already ceased, so from our
+ * point of view, recovery is already over. We should summarize up to
+ * where replay stopped and then prepare to resume at the start of the
+ * insert timeline.
+ */
+ if ((insert_tli = GetWALInsertionTimeLineIfSet()) != 0)
+ {
+ *tli = insert_tli;
+ return GetXLogReplayRecPtr(NULL);
+ }
/*
* What we really want to know is how much WAL has been flushed to