aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtxlog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtxlog.c')
-rw-r--r--src/backend/access/nbtree/nbtxlog.c25
1 files changed, 22 insertions, 3 deletions
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index da28e21d5cb..d569ae1cd66 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -385,12 +385,29 @@ static void
btree_xlog_vacuum(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
- xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
Buffer buffer;
Page page;
BTPageOpaque opaque;
+#ifdef UNUSED
+ xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
/*
+ * This section of code is thought to be no longer needed, after analysis
+ * of the calling paths. It is retained to allow the code to be reinstated
+ * if a flaw is revealed in that thinking.
+ *
+ * If we are running non-MVCC scans using this index we need to do some
+ * additional work to ensure correctness, which is known as a "pin scan"
+ * described in more detail in next paragraphs. We used to do the extra
+ * work in all cases, whereas we now avoid that work in most cases. If
+ * lastBlockVacuumed is set to InvalidBlockNumber then we skip the
+ * additional work required for the pin scan.
+ *
+ * Avoiding this extra work is important since it requires us to touch
+ * every page in the index, so is an O(N) operation. Worse, it is an
+ * operation performed in the foreground during redo, so it delays
+ * replication directly.
+ *
* If queries might be active then we need to ensure every leaf page is
* unpinned between the lastBlockVacuumed and the current block, if there
* are any. This prevents replay of the VACUUM from reaching the stage of
@@ -412,7 +429,7 @@ btree_xlog_vacuum(XLogReaderState *record)
* isn't yet consistent; so we need not fear reading still-corrupt blocks
* here during crash recovery.
*/
- if (HotStandbyActiveInReplay())
+ if (HotStandbyActiveInReplay() && BlockNumberIsValid(xlrec->lastBlockVacuumed))
{
RelFileNode thisrnode;
BlockNumber thisblkno;
@@ -433,7 +450,8 @@ btree_xlog_vacuum(XLogReaderState *record)
* XXX we don't actually need to read the block, we just need to
* confirm it is unpinned. If we had a special call into the
* buffer manager we could optimise this so that if the block is
- * not in shared_buffers we confirm it as unpinned.
+ * not in shared_buffers we confirm it as unpinned. Optimizing
+ * this is now moot, since in most cases we avoid the scan.
*/
buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
RBM_NORMAL_NO_LOG);
@@ -444,6 +462,7 @@ btree_xlog_vacuum(XLogReaderState *record)
}
}
}
+#endif
/*
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf