aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r--src/backend/access/nbtree/nbtree.c69
1 files changed, 41 insertions, 28 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 75a6ea81d45..b7f9e2e1608 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -56,8 +56,8 @@ typedef struct
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
- BlockNumber lastBlockVacuumed; /* last blkno reached by Vacuum scan */
- BlockNumber lastUsedPage; /* blkno of last non-recyclable page */
+ BlockNumber lastBlockVacuumed; /* highest blkno actually vacuumed */
+ BlockNumber lastBlockLocked; /* highest blkno we've cleanup-locked */
BlockNumber totFreePages; /* true total # of free pages */
MemoryContext pagedelcontext;
} BTVacState;
@@ -763,7 +763,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
vstate.callback_state = callback_state;
vstate.cycleid = cycleid;
vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
- vstate.lastUsedPage = BTREE_METAPAGE;
+ vstate.lastBlockLocked = BTREE_METAPAGE;
vstate.totFreePages = 0;
/* Create a temporary memory context to run _bt_pagedel in */
@@ -819,27 +819,30 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
}
/*
- * InHotStandby we need to scan right up to the end of the index for
- * correct locking, so we may need to write a WAL record for the final
- * block in the index if it was not vacuumed. It's possible that VACUUMing
- * has actually removed zeroed pages at the end of the index so we need to
- * take care to issue the record for last actual block and not for the
- * last block that was scanned. Ignore empty indexes.
+ * If the WAL is replayed in hot standby, the replay process needs to get
+ * cleanup locks on all index leaf pages, just as we've been doing here.
+ * However, we won't issue any WAL records about pages that have no items
+ * to be deleted. For pages between pages we've vacuumed, the replay code
+ * will take locks under the direction of the lastBlockVacuumed fields in
+ * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
+ * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
+ * against the last leaf page in the index, if that one wasn't vacuumed.
*/
if (XLogStandbyInfoActive() &&
- num_pages > 1 && vstate.lastBlockVacuumed < (num_pages - 1))
+ vstate.lastBlockVacuumed < vstate.lastBlockLocked)
{
Buffer buf;
/*
- * We can't use _bt_getbuf() here because it always applies
- * _bt_checkpage(), which will barf on an all-zero page. We want to
- * recycle all-zero pages, not fail. Also, we want to use a
- * nondefault buffer access strategy.
+ * The page should be valid, but we can't use _bt_getbuf() because we
+ * want to use a nondefault buffer access strategy. Since we aren't
+ * going to delete any items, getting cleanup lock again is probably
+ * overkill, but for consistency do that anyway.
*/
- buf = ReadBufferExtended(rel, MAIN_FORKNUM, num_pages - 1, RBM_NORMAL,
- info->strategy);
+ buf = ReadBufferExtended(rel, MAIN_FORKNUM, vstate.lastBlockLocked,
+ RBM_NORMAL, info->strategy);
LockBufferForCleanup(buf);
+ _bt_checkpage(rel, buf);
_bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
_bt_relbuf(rel, buf);
}
@@ -914,10 +917,6 @@ restart:
}
}
- /* If the page is in use, update lastUsedPage */
- if (!_bt_page_recyclable(page) && vstate->lastUsedPage < blkno)
- vstate->lastUsedPage = blkno;
-
/* Page is valid, see what to do with it */
if (_bt_page_recyclable(page))
{
@@ -954,6 +953,13 @@ restart:
LockBufferForCleanup(buf);
/*
+ * Remember highest leaf page number we've taken cleanup lock on; see
+ * notes in btvacuumscan
+ */
+ if (blkno > vstate->lastBlockLocked)
+ vstate->lastBlockLocked = blkno;
+
+ /*
* Check whether we need to recurse back to earlier pages. What we
* are concerned about is a page split that happened since we started
* the vacuum scan. If the split moved some tuples to a lower page
@@ -1019,19 +1025,26 @@ restart:
*/
if (ndeletable > 0)
{
- BlockNumber lastBlockVacuumed = BufferGetBlockNumber(buf);
-
+ /*
+ * Notice that the issued XLOG_BTREE_VACUUM WAL record includes an
+ * instruction to the replay code to get cleanup lock on all pages
+ * between the previous lastBlockVacuumed and this page. This
+ * ensures that WAL replay locks all leaf pages at some point.
+ *
+ * Since we can visit leaf pages out-of-order when recursing,
+ * replay might end up locking such pages an extra time, but it
+ * doesn't seem worth the amount of bookkeeping it'd take to avoid
+ * that.
+ */
_bt_delitems_vacuum(rel, buf, deletable, ndeletable,
vstate->lastBlockVacuumed);
/*
- * Keep track of the block number of the lastBlockVacuumed, so we
- * can scan those blocks as well during WAL replay. This then
- * provides concurrency protection and allows btrees to be used
- * while in recovery.
+ * Remember highest leaf page number we've issued a
+ * XLOG_BTREE_VACUUM WAL record for.
*/
- if (lastBlockVacuumed > vstate->lastBlockVacuumed)
- vstate->lastBlockVacuumed = lastBlockVacuumed;
+ if (blkno > vstate->lastBlockVacuumed)
+ vstate->lastBlockVacuumed = blkno;
stats->tuples_removed += ndeletable;
/* must recompute maxoff */