aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/freespace/freespace.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2018-03-29 11:29:54 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2018-03-29 11:29:54 -0400
commit851a26e26637aac60d6e974acbadb31748b12f86 (patch)
treeb5382c5c35d5ba3b5859c68599394ee7b4905914 /src/backend/storage/freespace/freespace.c
parentc0cbe00fee6d0a5e0ec72c6d68a035e674edc4cc (diff)
downloadpostgresql-851a26e26637aac60d6e974acbadb31748b12f86.tar.gz
postgresql-851a26e26637aac60d6e974acbadb31748b12f86.zip
While vacuuming a large table, update upper-level FSM data every so often.
VACUUM updates leaf-level FSM entries immediately after cleaning the corresponding heap blocks. fsmpage.c updates the intra-page search trees on the leaf-level FSM pages when this happens, but it does not touch the upper-level FSM pages, so that the released space might not actually be findable by searchers. Previously, updating the upper-level pages happened only at the conclusion of the VACUUM run, in a single FreeSpaceMapVacuum() call. This is bad because the VACUUM might get canceled before ever reaching that point, so that from the point of view of searchers no space has been freed at all, leading to table bloat. We can improve matters by updating the upper pages immediately after each cycle of index-cleaning and heap-cleaning, processing just the FSM pages corresponding to the range of heap blocks we have now fully cleaned. This adds a small amount of extra work, since the FSM pages leading down to each range boundary will be touched twice, but it's pretty negligible compared to everything else going on in a large VACUUM. If there are no indexes, VACUUM doesn't work in cycles but just cleans each heap page on first visit. In that case we just arbitrarily update upper FSM pages after each 8GB of heap. That maintains the goal of not letting all this work slide until the very end, and it doesn't seem worth expending extra complexity on a case that so seldom occurs in practice. In either case, the FSM is fully up to date before any attempt is made to truncate the relation, so that the most likely scenario for VACUUM cancellation no longer results in out-of-date upper FSM pages. When we do successfully truncate, adjusting the FSM to reflect that is now fully handled within FreeSpaceMapTruncateRel. Claudio Freire, reviewed by Masahiko Sawada and Jing Wang, some additional tweaks by me Discussion: https://postgr.es/m/CAGTBQpYR0uJCNTt3M5GOzBRHo+-GccNO1nCaQ8yEJmZKSW5q1A@mail.gmail.com
Diffstat (limited to 'src/backend/storage/freespace/freespace.c')
-rw-r--r--src/backend/storage/freespace/freespace.c107
1 files changed, 94 insertions, 13 deletions
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index dd8ae526444..7eb4f3ee930 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -108,7 +108,9 @@ static Size fsm_space_cat_to_avail(uint8 cat);
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
uint8 newValue, uint8 minValue);
static BlockNumber fsm_search(Relation rel, uint8 min_cat);
-static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
+static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
+ BlockNumber start, BlockNumber end,
+ bool *eof);
static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr);
static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat);
@@ -370,21 +372,48 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
*/
if (rel->rd_smgr)
rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks;
+
+ /*
+ * Update upper-level FSM pages to account for the truncation. This is
+ * important because the just-truncated pages were likely marked as
+ * all-free, and would be preferentially selected.
+ */
+ FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
}
/*
- * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM
+ * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
+ *
+ * We assume that the bottom-level pages have already been updated with
+ * new free-space information.
*/
void
FreeSpaceMapVacuum(Relation rel)
{
bool dummy;
- /*
- * Traverse the tree in depth-first order. The tree is stored physically
- * in depth-first order, so this should be pretty I/O efficient.
- */
- fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy);
+ /* Recursively scan the tree, starting at the root */
+ (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS,
+ (BlockNumber) 0, InvalidBlockNumber,
+ &dummy);
+}
+
+/*
+ * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
+ *
+ * As above, but assume that only heap pages between start and end-1 inclusive
+ * have new free-space information, so update only the upper-level slots
+ * covering that block range. end == InvalidBlockNumber is equivalent to
+ * "all the rest of the relation".
+ */
+void
+FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
+{
+ bool dummy;
+
+ /* Recursively scan the tree, starting at the root */
+ if (end > start)
+ (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy);
}
/******** Internal routines ********/
@@ -783,9 +812,21 @@ fsm_search(Relation rel, uint8 min_cat)
/*
* Recursive guts of FreeSpaceMapVacuum
+ *
+ * Examine the FSM page indicated by addr, as well as its children, updating
+ * upper-level nodes that cover the heap block range from start to end-1.
+ * (It's okay if end is beyond the actual end of the map.)
+ * Return the maximum freespace value on this page.
+ *
+ * If addr is past the end of the FSM, set *eof_p to true and return 0.
+ *
+ * This traverses the tree in depth-first order. The tree is stored
+ * physically in depth-first order, so this should be pretty I/O efficient.
*/
static uint8
-fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
+fsm_vacuum_page(Relation rel, FSMAddress addr,
+ BlockNumber start, BlockNumber end,
+ bool *eof_p)
{
Buffer buf;
Page page;
@@ -804,15 +845,52 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
page = BufferGetPage(buf);
/*
- * Recurse into children, and fix the information stored about them at
- * this level.
+ * If we're above the bottom level, recurse into children, and fix the
+ * information stored about them at this level.
*/
if (addr.level > FSM_BOTTOM_LEVEL)
{
- int slot;
+ FSMAddress fsm_start,
+ fsm_end;
+ uint16 fsm_start_slot,
+ fsm_end_slot;
+ int slot,
+ start_slot,
+ end_slot;
bool eof = false;
- for (slot = 0; slot < SlotsPerFSMPage; slot++)
+ /*
+ * Compute the range of slots we need to update on this page, given
+ * the requested range of heap blocks to consider. The first slot to
+ * update is the one covering the "start" block, and the last slot is
+ * the one covering "end - 1". (Some of this work will be duplicated
+ * in each recursive call, but it's cheap enough to not worry about.)
+ */
+ fsm_start = fsm_get_location(start, &fsm_start_slot);
+ fsm_end = fsm_get_location(end - 1, &fsm_end_slot);
+
+ while (fsm_start.level < addr.level)
+ {
+ fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot);
+ fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot);
+ }
+ Assert(fsm_start.level == addr.level);
+
+ if (fsm_start.logpageno == addr.logpageno)
+ start_slot = fsm_start_slot;
+ else if (fsm_start.logpageno > addr.logpageno)
+ start_slot = SlotsPerFSMPage; /* shouldn't get here... */
+ else
+ start_slot = 0;
+
+ if (fsm_end.logpageno == addr.logpageno)
+ end_slot = fsm_end_slot;
+ else if (fsm_end.logpageno > addr.logpageno)
+ end_slot = SlotsPerFSMPage - 1;
+ else
+ end_slot = -1; /* shouldn't get here... */
+
+ for (slot = start_slot; slot <= end_slot; slot++)
{
int child_avail;
@@ -820,7 +898,9 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
/* After we hit end-of-file, just clear the rest of the slots */
if (!eof)
- child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof);
+ child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot),
+ start, end,
+ &eof);
else
child_avail = 0;
@@ -835,6 +915,7 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
}
}
+ /* Now get the maximum value on the page, to return to caller */
max_avail = fsm_get_max_avail(BufferGetPage(buf));
/*