aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/commands/vacuumlazy.c43
-rw-r--r--src/backend/storage/freespace/README12
-rw-r--r--src/backend/storage/freespace/freespace.c107
-rw-r--r--src/include/storage/freespace.h2
4 files changed, 141 insertions, 23 deletions
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index f9da24c491f..d2a006671ac 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -85,6 +85,15 @@
#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
/*
+ * When a table has no indexes, vacuum the FSM after every 8GB, approximately
+ * (it won't be exact because we only vacuum FSM after processing a heap page
+ * that has some removable tuples). When there are indexes, this is ignored,
+ * and we vacuum FSM after each index/heap cleaning pass.
+ */
+#define VACUUM_FSM_EVERY_PAGES \
+ ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
+
+/*
* Guesstimation of number of dead tuples per page. This is used to
* provide an upper limit to memory allocated when vacuuming small
* tables.
@@ -285,9 +294,6 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
- /* Vacuum the Free Space Map */
- FreeSpaceMapVacuum(onerel);
-
/*
* Update statistics in pg_class.
*
@@ -465,7 +471,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
TransactionId relminmxid = onerel->rd_rel->relminmxid;
BlockNumber empty_pages,
- vacuumed_pages;
+ vacuumed_pages,
+ next_fsm_block_to_vacuum;
double num_tuples, /* total number of nonremovable tuples */
live_tuples, /* live tuples (reltuples estimate) */
tups_vacuumed, /* tuples cleaned up by vacuum */
@@ -501,6 +508,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
relname)));
empty_pages = vacuumed_pages = 0;
+ next_fsm_block_to_vacuum = (BlockNumber) 0;
num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
indstats = (IndexBulkDeleteResult **)
@@ -752,6 +760,13 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
vacrelstats->num_dead_tuples = 0;
vacrelstats->num_index_scans++;
+ /*
+ * Vacuum the Free Space Map to make newly-freed space visible on
+ * upper-level FSM pages. Note we have not yet processed blkno.
+ */
+ FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
+ next_fsm_block_to_vacuum = blkno;
+
/* Report that we are once again scanning the heap */
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_SCAN_HEAP);
@@ -1200,6 +1215,19 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
*/
vacrelstats->num_dead_tuples = 0;
vacuumed_pages++;
+
+ /*
+ * Periodically do incremental FSM vacuuming to make newly-freed
+ * space visible on upper FSM pages. Note: although we've cleaned
+ * the current block, we haven't yet updated its FSM entry (that
+ * happens further down), so passing end == blkno is correct.
+ */
+ if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
+ {
+ FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
+ blkno);
+ next_fsm_block_to_vacuum = blkno;
+ }
}
freespace = PageGetHeapFreeSpace(page);
@@ -1368,6 +1396,13 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
vacrelstats->num_index_scans++;
}
+ /*
+ * Vacuum the remainder of the Free Space Map. We must do this whether or
+ * not there were indexes.
+ */
+ if (blkno > next_fsm_block_to_vacuum)
+ FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
+
/* report all blocks vacuumed; and that we're cleaning up */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
diff --git a/src/backend/storage/freespace/README b/src/backend/storage/freespace/README
index bbd1b93fac6..e7ff23b76f7 100644
--- a/src/backend/storage/freespace/README
+++ b/src/backend/storage/freespace/README
@@ -180,13 +180,13 @@ have a corrupted page, with a parent somewhere with too small a value.
Secondly, if we detect corrupted pages while we search, traversing down
the tree. That check will notice if a parent node is set to too high a value.
In both cases, the upper nodes on the page are immediately rebuilt, fixing
-the corruption.
+the corruption so far as that page is concerned.
-Vacuum updates all the bottom level pages with correct amount of free space
-on the heap pages, fixing any outdated values there. After the heap and
-index passes are done, FreeSpaceMapVacuum is called, and the FSM tree is
-scanned in depth-first order. This fixes any discrepancies between upper
-and lower level FSM pages.
+VACUUM updates all the bottom-level FSM pages with the correct amount of free
+space on corresponding heap pages, as it proceeds through the heap. This
+goes through fsm_set_avail(), so that the upper nodes on those pages are
+immediately updated. Periodically, VACUUM calls FreeSpaceMapVacuum[Range]
+to propagate the new free-space info into the upper pages of the FSM tree.
TODO
----
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index dd8ae526444..7eb4f3ee930 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -108,7 +108,9 @@ static Size fsm_space_cat_to_avail(uint8 cat);
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
uint8 newValue, uint8 minValue);
static BlockNumber fsm_search(Relation rel, uint8 min_cat);
-static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
+static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
+ BlockNumber start, BlockNumber end,
+ bool *eof);
static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr);
static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat);
@@ -370,21 +372,48 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
*/
if (rel->rd_smgr)
rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks;
+
+ /*
+ * Update upper-level FSM pages to account for the truncation. This is
+ * important because the just-truncated pages were likely marked as
+ * all-free, and would be preferentially selected.
+ */
+ FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
}
/*
- * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM
+ * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
+ *
+ * We assume that the bottom-level pages have already been updated with
+ * new free-space information.
*/
void
FreeSpaceMapVacuum(Relation rel)
{
bool dummy;
- /*
- * Traverse the tree in depth-first order. The tree is stored physically
- * in depth-first order, so this should be pretty I/O efficient.
- */
- fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy);
+ /* Recursively scan the tree, starting at the root */
+ (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS,
+ (BlockNumber) 0, InvalidBlockNumber,
+ &dummy);
+}
+
+/*
+ * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
+ *
+ * As above, but assume that only heap pages between start and end-1 inclusive
+ * have new free-space information, so update only the upper-level slots
+ * covering that block range. end == InvalidBlockNumber is equivalent to
+ * "all the rest of the relation".
+ */
+void
+FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
+{
+ bool dummy;
+
+ /* Recursively scan the tree, starting at the root */
+ if (end > start)
+ (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy);
}
/******** Internal routines ********/
@@ -783,9 +812,21 @@ fsm_search(Relation rel, uint8 min_cat)
/*
* Recursive guts of FreeSpaceMapVacuum
+ *
+ * Examine the FSM page indicated by addr, as well as its children, updating
+ * upper-level nodes that cover the heap block range from start to end-1.
+ * (It's okay if end is beyond the actual end of the map.)
+ * Return the maximum freespace value on this page.
+ *
+ * If addr is past the end of the FSM, set *eof_p to true and return 0.
+ *
+ * This traverses the tree in depth-first order. The tree is stored
+ * physically in depth-first order, so this should be pretty I/O efficient.
*/
static uint8
-fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
+fsm_vacuum_page(Relation rel, FSMAddress addr,
+ BlockNumber start, BlockNumber end,
+ bool *eof_p)
{
Buffer buf;
Page page;
@@ -804,15 +845,52 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
page = BufferGetPage(buf);
/*
- * Recurse into children, and fix the information stored about them at
- * this level.
+ * If we're above the bottom level, recurse into children, and fix the
+ * information stored about them at this level.
*/
if (addr.level > FSM_BOTTOM_LEVEL)
{
- int slot;
+ FSMAddress fsm_start,
+ fsm_end;
+ uint16 fsm_start_slot,
+ fsm_end_slot;
+ int slot,
+ start_slot,
+ end_slot;
bool eof = false;
- for (slot = 0; slot < SlotsPerFSMPage; slot++)
+ /*
+ * Compute the range of slots we need to update on this page, given
+ * the requested range of heap blocks to consider. The first slot to
+ * update is the one covering the "start" block, and the last slot is
+ * the one covering "end - 1". (Some of this work will be duplicated
+ * in each recursive call, but it's cheap enough to not worry about.)
+ */
+ fsm_start = fsm_get_location(start, &fsm_start_slot);
+ fsm_end = fsm_get_location(end - 1, &fsm_end_slot);
+
+ while (fsm_start.level < addr.level)
+ {
+ fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot);
+ fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot);
+ }
+ Assert(fsm_start.level == addr.level);
+
+ if (fsm_start.logpageno == addr.logpageno)
+ start_slot = fsm_start_slot;
+ else if (fsm_start.logpageno > addr.logpageno)
+ start_slot = SlotsPerFSMPage; /* shouldn't get here... */
+ else
+ start_slot = 0;
+
+ if (fsm_end.logpageno == addr.logpageno)
+ end_slot = fsm_end_slot;
+ else if (fsm_end.logpageno > addr.logpageno)
+ end_slot = SlotsPerFSMPage - 1;
+ else
+ end_slot = -1; /* shouldn't get here... */
+
+ for (slot = start_slot; slot <= end_slot; slot++)
{
int child_avail;
@@ -820,7 +898,9 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
/* After we hit end-of-file, just clear the rest of the slots */
if (!eof)
- child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof);
+ child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot),
+ start, end,
+ &eof);
else
child_avail = 0;
@@ -835,6 +915,7 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
}
}
+ /* Now get the maximum value on the page, to return to caller */
max_avail = fsm_get_max_avail(BufferGetPage(buf));
/*
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index a517d7ec435..a2032518ac2 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -32,6 +32,8 @@ extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
extern void FreeSpaceMapVacuum(Relation rel);
+extern void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start,
+ BlockNumber end);
extern void UpdateFreeSpaceMap(Relation rel,
BlockNumber startBlkNum,
BlockNumber endBlkNum,