diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2018-05-05 01:34:53 +0300 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2018-05-05 01:34:53 +0300 |
commit | 0668719801838aa6a8bda330ff9b3d20097ea844 (patch) | |
tree | 72e42723ac95e2273fed0bf46c620cccb833b01a /src/backend/access/transam/xlogreader.c | |
parent | d2599ecfcc74fea9fad1720a70210a740c716730 (diff) | |
download | postgresql-0668719801838aa6a8bda330ff9b3d20097ea844.tar.gz postgresql-0668719801838aa6a8bda330ff9b3d20097ea844.zip |
Fix scenario where streaming standby gets stuck at a continuation record.
If a continuation record is split so that its first half has already been
removed from the master, and is only present in pg_wal, and there is a
recycled WAL segment in the standby server that looks like it would
contain the second half, recovery would get stuck. The code in
XLogPageRead() incorrectly started streaming at the beginning of the
WAL record, even if we had already read the first page.
Backpatch to 9.4. In principle, older versions have the same problem, but
without replication slots, there was no straightforward mechanism to
prevent the master from recycling old WAL that was still needed by standby.
Without such a mechanism, I think it's reasonable to assume that there's
enough slack in how many old segments are kept around to not run into this,
or you have a WAL archive.
Reported by Jonathon Nelson. Analysis and patch by Kyotaro HORIGUCHI, with
some extra comments by me.
Discussion: https://www.postgresql.org/message-id/CACJqAM3xVz0JY1XFDKPP%2BJoJAjoGx%3DGNuOAshEDWCext7BFvCQ%40mail.gmail.com
Diffstat (limited to 'src/backend/access/transam/xlogreader.c')
-rw-r--r-- | src/backend/access/transam/xlogreader.c | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 3a86f3497eb..a7953f323b6 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -27,8 +27,6 @@ static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength); -static bool ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, - XLogPageHeader hdr); static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess); static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record, @@ -533,7 +531,6 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) */ if (targetSegNo != state->readSegNo && targetPageOff != 0) { - XLogPageHeader hdr; XLogRecPtr targetSegmentPtr = pageptr - targetPageOff; readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ, @@ -545,9 +542,8 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) /* we can be sure to have enough WAL available, we scrolled back */ Assert(readLen == XLOG_BLCKSZ); - hdr = (XLogPageHeader) state->readBuf; - - if (!ValidXLogPageHeader(state, targetSegmentPtr, hdr)) + if (!XLogReaderValidatePageHeader(state, targetSegmentPtr, + state->readBuf)) goto err; } @@ -584,7 +580,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) /* * Now that we know we have the full header, validate it. */ - if (!ValidXLogPageHeader(state, pageptr, hdr)) + if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr)) goto err; /* update read state information */ @@ -709,15 +705,19 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr) } /* - * Validate a page header + * Validate a page header. + * + * Check if 'phdr' is valid as the header of the XLog page at position + * 'recptr'. */ -static bool -ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, - XLogPageHeader hdr) +bool +XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, + char *phdr) { XLogRecPtr recaddr; XLogSegNo segno; int32 offset; + XLogPageHeader hdr = (XLogPageHeader) phdr; Assert((recptr % XLOG_BLCKSZ) == 0); @@ -805,6 +805,11 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, return false; } + /* + * Check that the address on the page agrees with what we expected. + * This check typically fails when an old WAL segment is recycled, + * and hasn't yet been overwritten with new data yet. + */ if (hdr->xlp_pageaddr != recaddr) { char fname[MAXFNAMELEN]; |