aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/heap/heapam.c
diff options
context:
space:
mode:
authorPeter Geoghegan <pg@bowt.ie>2021-04-06 08:49:22 -0700
committerPeter Geoghegan <pg@bowt.ie>2021-04-06 08:49:22 -0700
commit8523492d4e349c4714aa2ab0291be175a88cb4fc (patch)
tree67506fa070807914d0885fac4c73b27922464a74 /src/backend/access/heap/heapam.c
parent789d81de8a50d9a23cc1a3b8ea5d839246020689 (diff)
downloadpostgresql-8523492d4e349c4714aa2ab0291be175a88cb4fc.tar.gz
postgresql-8523492d4e349c4714aa2ab0291be175a88cb4fc.zip
Remove tupgone special case from vacuumlazy.c.
Retry the call to heap_prune_page() in rare cases where there is disagreement between the heap_prune_page() call and the call to HeapTupleSatisfiesVacuum() that immediately follows. Disagreement is possible when a concurrently-aborted transaction makes a tuple DEAD during the tiny window between each step. This was the only case where a tuple considered DEAD by VACUUM still had storage following pruning. VACUUM's definition of dead tuples is now uniformly simple and unambiguous: dead tuples from each page are always LP_DEAD line pointers that were encountered just after we performed pruning (and just before we considered freezing remaining items with tuple storage). Eliminating the tupgone=true special case enables INDEX_CLEANUP=off style skipping of index vacuuming that takes place based on flexible, dynamic criteria. The INDEX_CLEANUP=off case had to know about skipping indexes up-front before now, due to a subtle interaction with the special case (see commit dd695979) -- this was a special case unto itself. Now there are no special cases. And so now it won't matter when or how we decide to skip index vacuuming: it won't affect how pruning behaves, and it won't be affected by any of the implementation details of pruning or freezing. Also remove XLOG_HEAP2_CLEANUP_INFO records. These are no longer necessary because we now rely entirely on heap pruning taking care of recovery conflicts. There is no longer any need to generate recovery conflicts for DEAD tuples that pruning just missed. This also means that heap vacuuming now uses exactly the same strategy for recovery conflicts as index vacuuming always has: REDO routines never need to process a latestRemovedXid from the WAL record, since earlier REDO of the WAL record from pruning is sufficient in all cases. The generic XLOG_HEAP2_CLEAN record type is now split into two new record types to reflect this new division (these are called XLOG_HEAP2_PRUNE and XLOG_HEAP2_VACUUM). Also stop acquiring a super-exclusive lock for heap pages when they're vacuumed during VACUUM's second heap pass. A regular exclusive lock is enough. This is correct because heap page vacuuming is now strictly a matter of setting the LP_DEAD line pointers to LP_UNUSED. No other backend can have a pointer to a tuple located in a pinned buffer that can be invalidated by a concurrent heap page vacuum operation. Heap vacuuming can now be thought of as conceptually similar to index vacuuming and conceptually dissimilar to heap pruning. Heap pruning now has sole responsibility for anything involving the logical contents of the database (e.g., managing transaction status information, recovery conflicts, considering what to do with HOT chains). Index vacuuming and heap vacuuming are now only concerned with recycling garbage items from physical data structures that back the logical database. Bump XLOG_PAGE_MAGIC due to pruning and heap page vacuum WAL record changes. Credit for the idea of retrying pruning a page to avoid the tupgone case goes to Andres Freund. Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Andres Freund <andres@anarazel.de> Reviewed-By: Masahiko Sawada <sawada.mshk@gmail.com> Discussion: https://postgr.es/m/CAH2-WznneCXTzuFmcwx_EyRQgfsfJAAsu+CsqRFmFXCAar=nJw@mail.gmail.com
Diffstat (limited to 'src/backend/access/heap/heapam.c')
-rw-r--r--src/backend/access/heap/heapam.c205
1 files changed, 87 insertions, 118 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 595310ba1b2..9cbc161d7a9 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -7538,7 +7538,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
* must have considered the original tuple header as part of
* generating its own latestRemovedXid value.
*
- * Relying on XLOG_HEAP2_CLEAN records like this is the same
+ * Relying on XLOG_HEAP2_PRUNE records like this is the same
* strategy that index vacuuming uses in all cases. Index VACUUM
* WAL records don't even have a latestRemovedXid field of their
* own for this reason.
@@ -7958,88 +7958,6 @@ bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
}
/*
- * Perform XLogInsert to register a heap cleanup info message. These
- * messages are sent once per VACUUM and are required because
- * of the phasing of removal operations during a lazy VACUUM.
- * see comments for vacuum_log_cleanup_info().
- */
-XLogRecPtr
-log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
-{
- xl_heap_cleanup_info xlrec;
- XLogRecPtr recptr;
-
- xlrec.node = rnode;
- xlrec.latestRemovedXid = latestRemovedXid;
-
- XLogBeginInsert();
- XLogRegisterData((char *) &xlrec, SizeOfHeapCleanupInfo);
-
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO);
-
- return recptr;
-}
-
-/*
- * Perform XLogInsert for a heap-clean operation. Caller must already
- * have modified the buffer and marked it dirty.
- *
- * Note: prior to Postgres 8.3, the entries in the nowunused[] array were
- * zero-based tuple indexes. Now they are one-based like other uses
- * of OffsetNumber.
- *
- * We also include latestRemovedXid, which is the greatest XID present in
- * the removed tuples. That allows recovery processing to cancel or wait
- * for long standby queries that can still see these tuples.
- */
-XLogRecPtr
-log_heap_clean(Relation reln, Buffer buffer,
- OffsetNumber *redirected, int nredirected,
- OffsetNumber *nowdead, int ndead,
- OffsetNumber *nowunused, int nunused,
- TransactionId latestRemovedXid)
-{
- xl_heap_clean xlrec;
- XLogRecPtr recptr;
-
- /* Caller should not call me on a non-WAL-logged relation */
- Assert(RelationNeedsWAL(reln));
-
- xlrec.latestRemovedXid = latestRemovedXid;
- xlrec.nredirected = nredirected;
- xlrec.ndead = ndead;
-
- XLogBeginInsert();
- XLogRegisterData((char *) &xlrec, SizeOfHeapClean);
-
- XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
-
- /*
- * The OffsetNumber arrays are not actually in the buffer, but we pretend
- * that they are. When XLogInsert stores the whole buffer, the offset
- * arrays need not be stored too. Note that even if all three arrays are
- * empty, we want to expose the buffer as a candidate for whole-page
- * storage, since this record type implies a defragmentation operation
- * even if no line pointers changed state.
- */
- if (nredirected > 0)
- XLogRegisterBufData(0, (char *) redirected,
- nredirected * sizeof(OffsetNumber) * 2);
-
- if (ndead > 0)
- XLogRegisterBufData(0, (char *) nowdead,
- ndead * sizeof(OffsetNumber));
-
- if (nunused > 0)
- XLogRegisterBufData(0, (char *) nowunused,
- nunused * sizeof(OffsetNumber));
-
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN);
-
- return recptr;
-}
-
-/*
* Perform XLogInsert for a heap-freeze operation. Caller must have already
* modified the buffer and marked it dirty.
*/
@@ -8510,34 +8428,15 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed,
}
/*
- * Handles CLEANUP_INFO
- */
-static void
-heap_xlog_cleanup_info(XLogReaderState *record)
-{
- xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) XLogRecGetData(record);
-
- if (InHotStandby)
- ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
-
- /*
- * Actual operation is a no-op. Record type exists to provide a means for
- * conflict processing to occur before we begin index vacuum actions. see
- * vacuumlazy.c and also comments in btvacuumpage()
- */
-
- /* Backup blocks are not used in cleanup_info records */
- Assert(!XLogRecHasAnyBlockRefs(record));
-}
-
-/*
- * Handles XLOG_HEAP2_CLEAN record type
+ * Handles XLOG_HEAP2_PRUNE record type.
+ *
+ * Acquires a super-exclusive lock.
*/
static void
-heap_xlog_clean(XLogReaderState *record)
+heap_xlog_prune(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
- xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record);
+ xl_heap_prune *xlrec = (xl_heap_prune *) XLogRecGetData(record);
Buffer buffer;
RelFileNode rnode;
BlockNumber blkno;
@@ -8548,12 +8447,8 @@ heap_xlog_clean(XLogReaderState *record)
/*
* We're about to remove tuples. In Hot Standby mode, ensure that there's
* no queries running for which the removed tuples are still visible.
- *
- * Not all HEAP2_CLEAN records remove tuples with xids, so we only want to
- * conflict on the records that cause MVCC failures for user queries. If
- * latestRemovedXid is invalid, skip conflict processing.
*/
- if (InHotStandby && TransactionIdIsValid(xlrec->latestRemovedXid))
+ if (InHotStandby)
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, rnode);
/*
@@ -8606,7 +8501,7 @@ heap_xlog_clean(XLogReaderState *record)
UnlockReleaseBuffer(buffer);
/*
- * After cleaning records from a page, it's useful to update the FSM
+ * After pruning records from a page, it's useful to update the FSM
* about it, as it may cause the page become target for insertions
* later even if vacuum decides not to visit it (which is possible if
* gets marked all-visible.)
@@ -8619,6 +8514,80 @@ heap_xlog_clean(XLogReaderState *record)
}
/*
+ * Handles XLOG_HEAP2_VACUUM record type.
+ *
+ * Acquires an exclusive lock only.
+ */
+static void
+heap_xlog_vacuum(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ xl_heap_vacuum *xlrec = (xl_heap_vacuum *) XLogRecGetData(record);
+ Buffer buffer;
+ BlockNumber blkno;
+ XLogRedoAction action;
+
+ /*
+ * If we have a full-page image, restore it (without using a cleanup lock)
+ * and we're done.
+ */
+ action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, false,
+ &buffer);
+ if (action == BLK_NEEDS_REDO)
+ {
+ Page page = (Page) BufferGetPage(buffer);
+ OffsetNumber *nowunused;
+ Size datalen;
+ OffsetNumber *offnum;
+
+ nowunused = (OffsetNumber *) XLogRecGetBlockData(record, 0, &datalen);
+
+ /* Shouldn't be a record unless there's something to do */
+ Assert(xlrec->nunused > 0);
+
+ /* Update all now-unused line pointers */
+ offnum = nowunused;
+ for (int i = 0; i < xlrec->nunused; i++)
+ {
+ OffsetNumber off = *offnum++;
+ ItemId lp = PageGetItemId(page, off);
+
+ Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
+ ItemIdSetUnused(lp);
+ }
+
+ /*
+ * Update the page's hint bit about whether it has free pointers
+ */
+ PageSetHasFreeLinePointers(page);
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ }
+
+ if (BufferIsValid(buffer))
+ {
+ Size freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
+ RelFileNode rnode;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+
+ UnlockReleaseBuffer(buffer);
+
+ /*
+ * After vacuuming LP_DEAD items from a page, it's useful to update
+ * the FSM about it, as it may cause the page become target for
+ * insertions later even if vacuum decides not to visit it (which is
+ * possible if gets marked all-visible.)
+ *
+ * Do this regardless of a full-page image being applied, since the
+ * FSM data is not in the page anyway.
+ */
+ XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
+ }
+}
+
+/*
* Replay XLOG_HEAP2_VISIBLE record.
*
* The critical integrity requirement here is that we must never end up with
@@ -9722,15 +9691,15 @@ heap2_redo(XLogReaderState *record)
switch (info & XLOG_HEAP_OPMASK)
{
- case XLOG_HEAP2_CLEAN:
- heap_xlog_clean(record);
+ case XLOG_HEAP2_PRUNE:
+ heap_xlog_prune(record);
+ break;
+ case XLOG_HEAP2_VACUUM:
+ heap_xlog_vacuum(record);
break;
case XLOG_HEAP2_FREEZE_PAGE:
heap_xlog_freeze_page(record);
break;
- case XLOG_HEAP2_CLEANUP_INFO:
- heap_xlog_cleanup_info(record);
- break;
case XLOG_HEAP2_VISIBLE:
heap_xlog_visible(record);
break;