diff options
author | Robert Haas <rhaas@postgresql.org> | 2024-07-26 14:50:21 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2024-07-26 14:51:10 -0400 |
commit | 53b327f83ea2c820b26a9b51f49f498221bc4379 (patch) | |
tree | 0001c76ea84a493f4656ccdb0f0aab6acccf5d1b /src/backend/postmaster/walsummarizer.c | |
parent | f2af1f4559ea74a6133ac36df3204c12e8d12ed3 (diff) | |
download | postgresql-53b327f83ea2c820b26a9b51f49f498221bc4379.tar.gz postgresql-53b327f83ea2c820b26a9b51f49f498221bc4379.zip |
Wait for WAL summarization to catch up before creating .partial file.
When a standby is promoted, CleanupAfterArchiveRecovery() may decide
to rename the final WAL file from the old timeline by adding ".partial"
to the name. If WAL summarization is enabled and this file is renamed
before its partial contents are summarized, WAL summarization breaks:
the summarizer gets stuck at that point in the WAL stream and just
errors out.
To fix that, first make the startup process wait for WAL summarization
to catch up before renaming the file. Generally, this should be quick,
and if it's not, the user can shut off summarize_wal and try again.
To make this fix work, also teach the WAL summarizer that after a
promotion has occurred, no more WAL can appear on the previous
timeline: previously, the WAL summarizer wouldn't switch to the new
timeline until we actually started writing WAL there, but that meant
that when the startup process was waiting for the WAL summarizer, it
was waiting for an action that the summarizer wasn't yet prepared to
take.
In the process of fixing these bugs, I realized that the logic to wait
for WAL summarization to catch up was spread out in a way that made
it difficult to reuse properly, so this code refactors things to make
it easier.
Finally, add a test case that would have caught this bug and the
previously-fixed bug that WAL summarization sometimes needs to back up
when the timeline changes.
Discussion: https://postgr.es/m/CA+TgmoZGEsZodXC4f=XZNkAeyuDmWTSkpkjCEOcF19Am0mt_OA@mail.gmail.com
Diffstat (limited to 'src/backend/postmaster/walsummarizer.c')
-rw-r--r-- | src/backend/postmaster/walsummarizer.c | 142 |
1 files changed, 118 insertions, 24 deletions
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index 6bbc0405107..b62e2c36de9 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -650,54 +650,132 @@ SetWalSummarizerLatch(void) } /* - * Wait until WAL summarization reaches the given LSN, but not longer than - * the given timeout. + * Wait until WAL summarization reaches the given LSN, but time out with an + * error if the summarizer seems to be stick. * - * The return value is the first still-unsummarized LSN. If it's greater than - * or equal to the passed LSN, then that LSN was reached. If not, we timed out. - * - * Either way, *pending_lsn is set to the value taken from WalSummarizerCtl. + * Returns immediately if summarize_wal is turned off while we wait. Caller + * is expected to handle this case, if necessary. */ -XLogRecPtr -WaitForWalSummarization(XLogRecPtr lsn, long timeout, XLogRecPtr *pending_lsn) +void +WaitForWalSummarization(XLogRecPtr lsn) { - TimestampTz start_time = GetCurrentTimestamp(); - TimestampTz deadline = TimestampTzPlusMilliseconds(start_time, timeout); - XLogRecPtr summarized_lsn; + TimestampTz initial_time, + cycle_time, + current_time; + XLogRecPtr prior_pending_lsn = InvalidXLogRecPtr; + int deadcycles = 0; - Assert(!XLogRecPtrIsInvalid(lsn)); - Assert(timeout > 0); + initial_time = cycle_time = GetCurrentTimestamp(); while (1) { - TimestampTz now; - long remaining_timeout; + long timeout_in_ms = 10000; + XLogRecPtr summarized_lsn; + XLogRecPtr pending_lsn; + + CHECK_FOR_INTERRUPTS(); + + /* If WAL summarization is disabled while we're waiting, give up. */ + if (!summarize_wal) + return; /* * If the LSN summarized on disk has reached the target value, stop. */ LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE); summarized_lsn = WalSummarizerCtl->summarized_lsn; - *pending_lsn = WalSummarizerCtl->pending_lsn; + pending_lsn = WalSummarizerCtl->pending_lsn; LWLockRelease(WALSummarizerLock); + + /* If WAL summarization has progressed sufficiently, stop waiting. */ if (summarized_lsn >= lsn) break; - /* Timeout reached? If yes, stop. */ - now = GetCurrentTimestamp(); - remaining_timeout = TimestampDifferenceMilliseconds(now, deadline); - if (remaining_timeout <= 0) - break; + /* Recheck current time. */ + current_time = GetCurrentTimestamp(); + + /* Have we finished the current cycle of waiting? */ + if (TimestampDifferenceMilliseconds(cycle_time, + current_time) >= timeout_in_ms) + { + long elapsed_seconds; + + /* Begin new wait cycle. */ + cycle_time = TimestampTzPlusMilliseconds(cycle_time, + timeout_in_ms); + + /* + * Keep track of the number of cycles during which there has been + * no progression of pending_lsn. If pending_lsn is not advancing, + * that means that not only are no new files appearing on disk, + * but we're not even incorporating new records into the in-memory + * state. + */ + if (pending_lsn > prior_pending_lsn) + { + prior_pending_lsn = pending_lsn; + deadcycles = 0; + } + else + ++deadcycles; + + /* + * If we've managed to wait for an entire minute without the WAL + * summarizer absorbing a single WAL record, error out; probably + * something is wrong. + * + * We could consider also erroring out if the summarizer is taking + * too long to catch up, but it's not clear what rate of progress + * would be acceptable and what would be too slow. So instead, we + * just try to error out in the case where there's no progress at + * all. That seems likely to catch a reasonable number of the + * things that can go wrong in practice (e.g. the summarizer + * process is completely hung, say because somebody hooked up a + * debugger to it or something) without giving up too quickly when + * the system is just slow. + */ + if (deadcycles >= 6) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL summarization is not progressing"), + errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.", + LSN_FORMAT_ARGS(lsn), + LSN_FORMAT_ARGS(summarized_lsn), + LSN_FORMAT_ARGS(pending_lsn)))); + + + /* + * Otherwise, just let the user know what's happening. + */ + elapsed_seconds = + TimestampDifferenceMilliseconds(initial_time, + current_time) / 1000; + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("still waiting for WAL summarization through %X/%X after %ld seconds", + LSN_FORMAT_ARGS(lsn), + elapsed_seconds), + errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.", + LSN_FORMAT_ARGS(summarized_lsn), + LSN_FORMAT_ARGS(pending_lsn)))); + } + + /* + * Align the wait time to prevent drift. This doesn't really matter, + * but we'd like the warnings about how long we've been waiting to say + * 10 seconds, 20 seconds, 30 seconds, 40 seconds ... without ever + * drifting to something that is not a multiple of ten. + */ + timeout_in_ms -= + TimestampDifferenceMilliseconds(cycle_time, current_time); /* Wait and see. */ ConditionVariableTimedSleep(&WalSummarizerCtl->summary_file_cv, - remaining_timeout, + timeout_in_ms, WAIT_EVENT_WAL_SUMMARY_READY); } ConditionVariableCancelSleep(); - - return summarized_lsn; } /* @@ -730,6 +808,22 @@ GetLatestLSN(TimeLineID *tli) TimeLineID flush_tli; XLogRecPtr replay_lsn; TimeLineID replay_tli; + TimeLineID insert_tli; + + /* + * After the insert TLI has been set and before the control file has + * been updated to show the DB in production, RecoveryInProgress() + * will return true, because it's not yet safe for all backends to + * begin writing WAL. However, replay has already ceased, so from our + * point of view, recovery is already over. We should summarize up to + * where replay stopped and then prepare to resume at the start of the + * insert timeline. + */ + if ((insert_tli = GetWALInsertionTimeLineIfSet()) != 0) + { + *tli = insert_tli; + return GetXLogReplayRecPtr(NULL); + } /* * What we really want to know is how much WAL has been flushed to |