diff options
author | Amit Kapila <akapila@postgresql.org> | 2019-01-28 08:14:06 +0530 |
---|---|---|
committer | Amit Kapila <akapila@postgresql.org> | 2019-01-28 08:14:06 +0530 |
commit | ac88d2962a96a9c7e83d5acfc28fe49a72812086 (patch) | |
tree | c66901928bff8ba6a1998f3304551f13cab68c61 /src/backend/access | |
parent | d66e3664b8baf41908865ad363c6ba943e6f9c4e (diff) | |
download | postgresql-ac88d2962a96a9c7e83d5acfc28fe49a72812086.tar.gz postgresql-ac88d2962a96a9c7e83d5acfc28fe49a72812086.zip |
Avoid creation of the free space map for small heap relations.
Previously, all heaps had FSMs. For very small tables, this means that the
FSM took up more space than the heap did. This is wasteful, so now we
refrain from creating the FSM for heaps with 4 pages or fewer. If the last
known target block has insufficient space, we still try to insert into some
other page before giving up and extending the relation, since doing
otherwise leads to table bloat. Testing showed that trying every page
penalized performance slightly, so we compromise and try every other page.
This way, we visit at most two pages. Any pages with wasted free space
become visible at next relation extension, so we still control table bloat.
As a bonus, directly attempting one or two pages can even be faster than
consulting the FSM would have been.
Once the FSM is created for a heap we don't remove it even if somebody
deletes all the rows from the corresponding relation. We don't think it is
a useful optimization as it is quite likely that relation will again grow
to the same size.
Author: John Naylor with design inputs and some code contribution by Amit Kapila
Reviewed-by: Amit Kapila
Tested-by: Mithun C Y
Discussion: https://www.postgresql.org/message-id/CAJVSVGWvB13PzpbLEecFuGFc5V2fsO736BsdTakPiPAcdMM5tQ@mail.gmail.com
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/brin/brin.c | 2 | ||||
-rw-r--r-- | src/backend/access/brin/brin_pageops.c | 10 | ||||
-rw-r--r-- | src/backend/access/heap/hio.c | 47 | ||||
-rw-r--r-- | src/backend/access/heap/vacuumlazy.c | 17 | ||||
-rw-r--r-- | src/backend/access/transam/xact.c | 14 |
5 files changed, 59 insertions, 31 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 467d91e6818..8f008dd0080 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -1150,7 +1150,7 @@ terminate_brin_buildstate(BrinBuildState *state) freespace = PageGetFreeSpace(page); blk = BufferGetBlockNumber(state->bs_currentInsertBuf); ReleaseBuffer(state->bs_currentInsertBuf); - RecordPageWithFreeSpace(state->bs_irel, blk, freespace); + RecordPageWithFreeSpace(state->bs_irel, blk, freespace, InvalidBlockNumber); FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1); } diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index 164a4681556..2eb354f948f 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -310,7 +310,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, if (extended) { - RecordPageWithFreeSpace(idxrel, newblk, freespace); + RecordPageWithFreeSpace(idxrel, newblk, freespace, InvalidBlockNumber); FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } @@ -461,7 +461,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, if (extended) { - RecordPageWithFreeSpace(idxrel, blk, freespace); + RecordPageWithFreeSpace(idxrel, blk, freespace, InvalidBlockNumber); FreeSpaceMapVacuumRange(idxrel, blk, blk + 1); } @@ -654,7 +654,7 @@ brin_page_cleanup(Relation idxrel, Buffer buf) /* Measure free space and record it */ RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf), - br_page_get_freespace(page)); + br_page_get_freespace(page), InvalidBlockNumber); } /* @@ -703,7 +703,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, /* Choose initial target page, re-using existing target if known */ newblk = RelationGetTargetBlock(irel); if (newblk == InvalidBlockNumber) - newblk = GetPageWithFreeSpace(irel, itemsz); + newblk = GetPageWithFreeSpace(irel, itemsz, true); /* * Loop until we find a page with sufficient free space. By the time we @@ -895,7 +895,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer) * pages whose FSM records were forgotten in a crash. */ RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer), - br_page_get_freespace(page)); + br_page_get_freespace(page), InvalidBlockNumber); } diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 3da0b49ccc4..4c3e774eee2 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -239,8 +239,14 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) * Immediately update the bottom level of the FSM. This has a good * chance of making this page visible to other concurrently inserting * backends, and we want that to happen without delay. + * + * Since we know the table will end up with extraBlocks additional + * pages, we pass the final number to avoid possible unnecessary + * system calls and to make sure the FSM is created when we add the + * first new page. */ - RecordPageWithFreeSpace(relation, blockNum, freespace); + RecordPageWithFreeSpace(relation, blockNum, freespace, + firstBlock + extraBlocks); } while (--extraBlocks > 0); @@ -377,20 +383,9 @@ RelationGetBufferForTuple(Relation relation, Size len, * We have no cached target page, so ask the FSM for an initial * target. */ - targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace); - - /* - * If the FSM knows nothing of the rel, try the last page before we - * give up and extend. This avoids one-tuple-per-page syndrome during - * bootstrapping or in a recently-started system. - */ - if (targetBlock == InvalidBlockNumber) - { - BlockNumber nblocks = RelationGetNumberOfBlocks(relation); - - if (nblocks > 0) - targetBlock = nblocks - 1; - } + targetBlock = GetPageWithFreeSpace(relation, + len + saveFreeSpace, + false); } loop: @@ -484,6 +479,14 @@ loop: { /* use this page as future insert target, too */ RelationSetTargetBlock(relation, targetBlock); + + /* + * In case we used an in-memory map of available blocks, reset it + * for next use. + */ + if (targetBlock < HEAP_FSM_CREATION_THRESHOLD) + FSMClearLocalMap(); + return buffer; } @@ -543,9 +546,12 @@ loop: /* * Check if some other backend has extended a block for us while - * we were waiting on the lock. + * we were waiting on the lock. We only check the FSM -- if there + * isn't one we don't recheck the number of blocks. */ - targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace); + targetBlock = GetPageWithFreeSpace(relation, + len + saveFreeSpace, + true); /* * If some other waiter has already extended the relation, we @@ -625,5 +631,12 @@ loop: */ RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer)); + /* + * In case we used an in-memory map of available blocks, reset it for next + * use. We do this unconditionally since after relation extension we + * can't skip this based on the targetBlock. + */ + FSMClearLocalMap(); + return buffer; } diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 37aa484ec3a..9cfa65ca47f 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -153,7 +153,7 @@ static BufferAccessStrategy vac_strategy; static void lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive); -static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); +static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup); static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, @@ -758,7 +758,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, pgstat_progress_update_multi_param(2, hvp_index, hvp_val); /* Remove tuples from heap */ - lazy_vacuum_heap(onerel, vacrelstats); + lazy_vacuum_heap(onerel, vacrelstats, nblocks); /* * Forget the now-vacuumed tuples, and press on, but be careful @@ -896,7 +896,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, MarkBufferDirty(buf); UnlockReleaseBuffer(buf); - RecordPageWithFreeSpace(onerel, blkno, freespace); + RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks); continue; } @@ -935,7 +935,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, } UnlockReleaseBuffer(buf); - RecordPageWithFreeSpace(onerel, blkno, freespace); + RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks); continue; } @@ -1332,7 +1332,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * taken if there are no indexes.) */ if (vacrelstats->num_dead_tuples == prev_dead_count) - RecordPageWithFreeSpace(onerel, blkno, freespace); + RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks); } /* report that everything is scanned and vacuumed */ @@ -1394,7 +1394,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, /* Remove tuples from heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP); - lazy_vacuum_heap(onerel, vacrelstats); + lazy_vacuum_heap(onerel, vacrelstats, nblocks); vacrelstats->num_index_scans++; } @@ -1465,9 +1465,10 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * Note: the reason for doing this as a second pass is we cannot remove * the tuples until we've removed their index entries, and we want to * process index entry removal in batches as large as possible. + * Note: nblocks is passed as an optimization for RecordPageWithFreeSpace(). */ static void -lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) +lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks) { int tupindex; int npages; @@ -1504,7 +1505,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) freespace = PageGetHeapFreeSpace(page); UnlockReleaseBuffer(buf); - RecordPageWithFreeSpace(onerel, tblk, freespace); + RecordPageWithFreeSpace(onerel, tblk, freespace, nblocks); npages++; } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 0181976964c..92bda878043 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -48,6 +48,7 @@ #include "replication/walsender.h" #include "storage/condition_variable.h" #include "storage/fd.h" +#include "storage/freespace.h" #include "storage/lmgr.h" #include "storage/predicate.h" #include "storage/proc.h" @@ -2493,6 +2494,12 @@ AbortTransaction(void) pgstat_report_wait_end(); pgstat_progress_end_command(); + /* + * In case we aborted during RelationGetBufferForTuple(), clear the local + * map of heap pages. + */ + FSMClearLocalMap(); + /* Clean up buffer I/O and buffer context locks, too */ AbortBufferIO(); UnlockBuffers(); @@ -4714,6 +4721,13 @@ AbortSubTransaction(void) pgstat_report_wait_end(); pgstat_progress_end_command(); + + /* + * In case we aborted during RelationGetBufferForTuple(), clear the local + * map of heap pages. + */ + FSMClearLocalMap(); + AbortBufferIO(); UnlockBuffers(); |