diff options
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r-- | src/backend/access/transam/xlog.c | 111 |
1 files changed, 107 insertions, 4 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7f9edef435c..07c68adf0bc 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -60,6 +60,7 @@ #include "utils/timestamp.h" #include "pg_trace.h" +extern bool bootstrap_data_checksums; /* File path names (all relative to $PGDATA) */ #define RECOVERY_COMMAND_FILE "recovery.conf" @@ -730,6 +731,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) bool updrqst; bool doPageWrites; bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH); + bool isHint = (rmid == RM_XLOG_ID && info == XLOG_HINT); uint8 info_orig = info; static XLogRecord *rechdr; @@ -1000,6 +1002,18 @@ begin:; } /* + * If this is a hint record and we don't need a backup block then + * we have no more work to do and can exit quickly without inserting + * a WAL record at all. In that case return InvalidXLogRecPtr. + */ + if (isHint && !(info & XLR_BKP_BLOCK_MASK)) + { + LWLockRelease(WALInsertLock); + END_CRIT_SECTION(); + return InvalidXLogRecPtr; + } + + /* * If the current page is completely full, the record goes to the next * page, right after the page header. */ @@ -1253,10 +1267,10 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites, * not. We don't need the buffer header lock for PageGetLSN because we * have exclusive lock on the page and/or the relation. */ - *lsn = PageGetLSN(page); + *lsn = BufferGetLSNAtomic(rdata->buffer); if (doPageWrites && - PageGetLSN(page) <= RedoRecPtr) + *lsn <= RedoRecPtr) { /* * The page needs to be backed up, so set up *bkpb @@ -3187,6 +3201,11 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index, BLCKSZ - (bkpb.hole_offset + bkpb.hole_length)); } + /* + * Any checksum set on this page will be invalid. We don't need + * to reset it here since it will be set before being written. + */ + PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -3767,6 +3786,16 @@ GetSystemIdentifier(void) } /* + * Are checksums enabled for data pages? + */ +bool +DataChecksumsEnabled(void) +{ + Assert(ControlFile != NULL); + return ControlFile->data_checksums; +} + +/* * Returns a fake LSN for unlogged relations. * * Each call generates an LSN that is greater than any previous value @@ -4092,6 +4121,7 @@ BootStrapXLOG(void) ControlFile->max_prepared_xacts = max_prepared_xacts; ControlFile->max_locks_per_xact = max_locks_per_xact; ControlFile->wal_level = wal_level; + ControlFile->data_checksums = bootstrap_data_checksums; /* some additional ControlFile fields are set in WriteControlFile() */ @@ -7602,6 +7632,51 @@ XLogRestorePoint(const char *rpName) } /* + * Write a backup block if needed when we are setting a hint. Note that + * this may be called for a variety of page types, not just heaps. + * + * Deciding the "if needed" part is delicate and requires us to either + * grab WALInsertLock or check the info_lck spinlock. If we check the + * spinlock and it says Yes then we will need to get WALInsertLock as well, + * so the design choice here is to just go straight for the WALInsertLock + * and trust that calls to this function are minimised elsewhere. + * + * Callable while holding just share lock on the buffer content. + * + * Possible that multiple concurrent backends could attempt to write + * WAL records. In that case, more than one backup block may be recorded + * though that isn't important to the outcome and the backup blocks are + * likely to be identical anyway. + */ +#define XLOG_HINT_WATERMARK 13579 +XLogRecPtr +XLogSaveBufferForHint(Buffer buffer) +{ + /* + * Make an XLOG entry reporting the hint + */ + XLogRecData rdata[2]; + int watermark = XLOG_HINT_WATERMARK; + + /* + * Not allowed to have zero-length records, so use a small watermark + */ + rdata[0].data = (char *) (&watermark); + rdata[0].len = sizeof(int); + rdata[0].buffer = InvalidBuffer; + rdata[0].buffer_std = false; + rdata[0].next = &(rdata[1]); + + rdata[1].data = NULL; + rdata[1].len = 0; + rdata[1].buffer = buffer; + rdata[1].buffer_std = true; + rdata[1].next = NULL; + + return XLogInsert(RM_XLOG_ID, XLOG_HINT, rdata); +} + +/* * Check if any of the GUC parameters that are critical for hot standby * have changed, and update the value in pg_control file if necessary. */ @@ -7767,8 +7842,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; - /* Backup blocks are not used in xlog records */ - Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + /* Backup blocks are not used in most xlog records */ + Assert(info == XLOG_HINT || !(record->xl_info & XLR_BKP_BLOCK_MASK)); if (info == XLOG_NEXTOID) { @@ -7961,6 +8036,34 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) { /* nothing to do here */ } + else if (info == XLOG_HINT) + { +#ifdef USE_ASSERT_CHECKING + int *watermark = (int *) XLogRecGetData(record); +#endif + + /* Check the watermark is correct for the hint record */ + Assert(*watermark == XLOG_HINT_WATERMARK); + + /* Backup blocks must be present for smgr hint records */ + Assert(record->xl_info & XLR_BKP_BLOCK_MASK); + + /* + * Hint records have no information that needs to be replayed. + * The sole purpose of them is to ensure that a hint bit does + * not cause a checksum invalidation if a hint bit write should + * cause a torn page. So the body of the record is empty but + * there must be one backup block. + * + * Since the only change in the backup block is a hint bit, + * there is no confict with Hot Standby. + * + * This also means there is no corresponding API call for this, + * so an smgr implementation has no need to implement anything. + * Which means nothing is needed in md.c etc + */ + RestoreBackupBlock(lsn, record, 0, false, false); + } else if (info == XLOG_BACKUP_END) { XLogRecPtr startpoint; |