aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c111
1 files changed, 107 insertions, 4 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7f9edef435c..07c68adf0bc 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -60,6 +60,7 @@
#include "utils/timestamp.h"
#include "pg_trace.h"
+extern bool bootstrap_data_checksums;
/* File path names (all relative to $PGDATA) */
#define RECOVERY_COMMAND_FILE "recovery.conf"
@@ -730,6 +731,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
bool updrqst;
bool doPageWrites;
bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
+ bool isHint = (rmid == RM_XLOG_ID && info == XLOG_HINT);
uint8 info_orig = info;
static XLogRecord *rechdr;
@@ -1000,6 +1002,18 @@ begin:;
}
/*
+ * If this is a hint record and we don't need a backup block then
+ * we have no more work to do and can exit quickly without inserting
+ * a WAL record at all. In that case return InvalidXLogRecPtr.
+ */
+ if (isHint && !(info & XLR_BKP_BLOCK_MASK))
+ {
+ LWLockRelease(WALInsertLock);
+ END_CRIT_SECTION();
+ return InvalidXLogRecPtr;
+ }
+
+ /*
* If the current page is completely full, the record goes to the next
* page, right after the page header.
*/
@@ -1253,10 +1267,10 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites,
* not. We don't need the buffer header lock for PageGetLSN because we
* have exclusive lock on the page and/or the relation.
*/
- *lsn = PageGetLSN(page);
+ *lsn = BufferGetLSNAtomic(rdata->buffer);
if (doPageWrites &&
- PageGetLSN(page) <= RedoRecPtr)
+ *lsn <= RedoRecPtr)
{
/*
* The page needs to be backed up, so set up *bkpb
@@ -3187,6 +3201,11 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
}
+ /*
+ * Any checksum set on this page will be invalid. We don't need
+ * to reset it here since it will be set before being written.
+ */
+
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -3767,6 +3786,16 @@ GetSystemIdentifier(void)
}
/*
+ * Are checksums enabled for data pages?
+ */
+bool
+DataChecksumsEnabled(void)
+{
+ Assert(ControlFile != NULL);
+ return ControlFile->data_checksums;
+}
+
+/*
* Returns a fake LSN for unlogged relations.
*
* Each call generates an LSN that is greater than any previous value
@@ -4092,6 +4121,7 @@ BootStrapXLOG(void)
ControlFile->max_prepared_xacts = max_prepared_xacts;
ControlFile->max_locks_per_xact = max_locks_per_xact;
ControlFile->wal_level = wal_level;
+ ControlFile->data_checksums = bootstrap_data_checksums;
/* some additional ControlFile fields are set in WriteControlFile() */
@@ -7602,6 +7632,51 @@ XLogRestorePoint(const char *rpName)
}
/*
+ * Write a backup block if needed when we are setting a hint. Note that
+ * this may be called for a variety of page types, not just heaps.
+ *
+ * Deciding the "if needed" part is delicate and requires us to either
+ * grab WALInsertLock or check the info_lck spinlock. If we check the
+ * spinlock and it says Yes then we will need to get WALInsertLock as well,
+ * so the design choice here is to just go straight for the WALInsertLock
+ * and trust that calls to this function are minimised elsewhere.
+ *
+ * Callable while holding just share lock on the buffer content.
+ *
+ * Possible that multiple concurrent backends could attempt to write
+ * WAL records. In that case, more than one backup block may be recorded
+ * though that isn't important to the outcome and the backup blocks are
+ * likely to be identical anyway.
+ */
+#define XLOG_HINT_WATERMARK 13579
+XLogRecPtr
+XLogSaveBufferForHint(Buffer buffer)
+{
+ /*
+ * Make an XLOG entry reporting the hint
+ */
+ XLogRecData rdata[2];
+ int watermark = XLOG_HINT_WATERMARK;
+
+ /*
+ * Not allowed to have zero-length records, so use a small watermark
+ */
+ rdata[0].data = (char *) (&watermark);
+ rdata[0].len = sizeof(int);
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].buffer_std = false;
+ rdata[0].next = &(rdata[1]);
+
+ rdata[1].data = NULL;
+ rdata[1].len = 0;
+ rdata[1].buffer = buffer;
+ rdata[1].buffer_std = true;
+ rdata[1].next = NULL;
+
+ return XLogInsert(RM_XLOG_ID, XLOG_HINT, rdata);
+}
+
+/*
* Check if any of the GUC parameters that are critical for hot standby
* have changed, and update the value in pg_control file if necessary.
*/
@@ -7767,8 +7842,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
- /* Backup blocks are not used in xlog records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ /* Backup blocks are not used in most xlog records */
+ Assert(info == XLOG_HINT || !(record->xl_info & XLR_BKP_BLOCK_MASK));
if (info == XLOG_NEXTOID)
{
@@ -7961,6 +8036,34 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
{
/* nothing to do here */
}
+ else if (info == XLOG_HINT)
+ {
+#ifdef USE_ASSERT_CHECKING
+ int *watermark = (int *) XLogRecGetData(record);
+#endif
+
+ /* Check the watermark is correct for the hint record */
+ Assert(*watermark == XLOG_HINT_WATERMARK);
+
+ /* Backup blocks must be present for smgr hint records */
+ Assert(record->xl_info & XLR_BKP_BLOCK_MASK);
+
+ /*
+ * Hint records have no information that needs to be replayed.
+ * The sole purpose of them is to ensure that a hint bit does
+ * not cause a checksum invalidation if a hint bit write should
+ * cause a torn page. So the body of the record is empty but
+ * there must be one backup block.
+ *
+ * Since the only change in the backup block is a hint bit,
+ * there is no confict with Hot Standby.
+ *
+ * This also means there is no corresponding API call for this,
+ * so an smgr implementation has no need to implement anything.
+ * Which means nothing is needed in md.c etc
+ */
+ RestoreBackupBlock(lsn, record, 0, false, false);
+ }
else if (info == XLOG_BACKUP_END)
{
XLogRecPtr startpoint;