diff options
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r-- | src/backend/access/transam/xlog.c | 44 |
1 files changed, 42 insertions, 2 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c0923d97f21..c633e111281 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -11694,6 +11694,40 @@ retry: Assert(reqLen <= readLen); *readTLI = curFileTLI; + + /* + * Check the page header immediately, so that we can retry immediately if + * it's not valid. This may seem unnecessary, because XLogReadRecord() + * validates the page header anyway, and would propagate the failure up to + * ReadRecord(), which would retry. However, there's a corner case with + * continuation records, if a record is split across two pages such that + * we would need to read the two pages from different sources. For + * example, imagine a scenario where a streaming replica is started up, + * and replay reaches a record that's split across two WAL segments. The + * first page is only available locally, in pg_wal, because it's already + * been recycled in the master. The second page, however, is not present + * in pg_wal, and we should stream it from the master. There is a recycled + * WAL segment present in pg_wal, with garbage contents, however. We would + * read the first page from the local WAL segment, but when reading the + * second page, we would read the bogus, recycled, WAL segment. If we + * didn't catch that case here, we would never recover, because + * ReadRecord() would retry reading the whole record from the beginning. + * + * Of course, this only catches errors in the page header, which is what + * happens in the case of a recycled WAL segment. Other kinds of errors or + * corruption still has the same problem. But this at least fixes the + * common case, which can happen as part of normal operation. + * + * Validating the page header is cheap enough that doing it twice + * shouldn't be a big deal from a performance point of view. + */ + if (!XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf)) + { + /* reset any error XLogReaderValidatePageHeader() might have set */ + xlogreader->errormsg_buf[0] = '\0'; + goto next_record_is_invalid; + } + return readLen; next_record_is_invalid: @@ -11828,12 +11862,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, } else { - ptr = tliRecPtr; + ptr = RecPtr; + + /* + * Use the record begin position to determine the + * TLI, rather than the position we're reading. + */ tli = tliOfPointInHistory(tliRecPtr, expectedTLEs); if (curFileTLI > 0 && tli < curFileTLI) elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", - (uint32) (ptr >> 32), (uint32) ptr, + (uint32) (tliRecPtr >> 32), + (uint32) tliRecPtr, tli, curFileTLI); } curFileTLI = tli; |