diff options
Diffstat (limited to 'src/backend/storage/freespace')
-rw-r--r-- | src/backend/storage/freespace/README | 38 | ||||
-rw-r--r-- | src/backend/storage/freespace/freespace.c | 301 | ||||
-rw-r--r-- | src/backend/storage/freespace/indexfsm.c | 6 |
3 files changed, 12 insertions, 333 deletions
diff --git a/src/backend/storage/freespace/README b/src/backend/storage/freespace/README index 0d3cd29772e..e7ff23b76f7 100644 --- a/src/backend/storage/freespace/README +++ b/src/backend/storage/freespace/README @@ -8,41 +8,7 @@ free space to hold a tuple to be stored; or to determine that no such page exists and the relation must be extended by one page. As of PostgreSQL 8.4 each relation has its own, extensible free space map stored in a separate "fork" of its relation. This eliminates the disadvantages of the former -fixed-size FSM. There are two exceptions: - -1. Hash indexes never have a FSM. -2. For very small tables, a 3-page relation fork would be relatively large -and wasteful, so to save space we refrain from creating the FSM if the -heap has HEAP_FSM_CREATION_THRESHOLD pages or fewer. - -To locate free space in the latter case, we simply try pages directly without -knowing ahead of time how much free space they have. To maintain good -performance, we create a local in-memory map of pages to try, and only mark -every other page as available. For example, in a 3-page heap, the local map -would look like: - -ANAN -0123 - -Pages 0 and 2 are marked "available", and page 1 as "not available". -Page 3 is beyond the end of the relation, so is likewise marked "not -available". First we try page 2, and if that doesn't have sufficient free -space we try page 0 before giving up and extending the relation. There may -be some wasted free space on block 1, but if the relation extends to 4 pages: - -NANA -0123 - -We not only have the new page 3 at our disposal, we can now check page 1 -for free space as well. - -Once the FSM is created for a heap we don't remove it even if somebody deletes -all the rows from the corresponding relation. We don't think it is a useful -optimization as it is quite likely that relation will again grow to the same -size. - -FSM data structure ------------------- +fixed-size FSM. It is important to keep the map small so that it can be searched rapidly. Therefore, we don't attempt to record the exact free space on a page. @@ -226,3 +192,5 @@ TODO ---- - fastroot to avoid traversing upper nodes with just 1 child +- use a different system for tables that fit into one FSM page, with a + mechanism to switch to the real thing as it grows. diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index c3ed4242e2d..eee82860575 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -76,14 +76,6 @@ #define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1) #define FSM_BOTTOM_LEVEL 0 -/* Status codes for the local map. */ - -/* Either already tried, or beyond the end of the relation */ -#define FSM_LOCAL_NOT_AVAIL 0x00 - -/* Available to try */ -#define FSM_LOCAL_AVAIL 0x01 - /* * The internal FSM routines work on a logical addressing scheme. Each * level of the tree can be thought of as a separately addressable file. @@ -97,32 +89,6 @@ typedef struct /* Address of the root page. */ static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0}; -/* - * For small relations, we don't create FSM to save space, instead we use - * local in-memory map of pages to try. To locate free space, we simply try - * pages directly without knowing ahead of time how much free space they have. - * - * Note that this map is used to the find the block with required free space - * for any given relation. We clear this map when we have found a block with - * enough free space, when we extend the relation, or on transaction abort. - * See src/backend/storage/freespace/README for further details. - */ -typedef struct -{ - BlockNumber nblocks; - uint8 map[HEAP_FSM_CREATION_THRESHOLD]; -} FSMLocalMap; - -static FSMLocalMap fsm_local_map = -{ - 0, - { - FSM_LOCAL_NOT_AVAIL - } -}; - -#define FSM_LOCAL_MAP_EXISTS (fsm_local_map.nblocks > 0) - /* functions to navigate the tree */ static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot); static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot); @@ -141,14 +107,10 @@ static Size fsm_space_cat_to_avail(uint8 cat); /* workhorse functions for various operations */ static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue); -static void fsm_local_set(Relation rel, BlockNumber cur_nblocks); static BlockNumber fsm_search(Relation rel, uint8 min_cat); -static BlockNumber fsm_local_search(void); static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, BlockNumber start, BlockNumber end, bool *eof); -static bool fsm_allow_writes(Relation rel, BlockNumber heapblk, - BlockNumber nblocks, BlockNumber *get_nblocks); /******** Public API ********/ @@ -165,46 +127,13 @@ static bool fsm_allow_writes(Relation rel, BlockNumber heapblk, * amount of free space available on that page and then try again (see * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned, * extend the relation. - * - * For very small heap relations that don't have a FSM, we try every other - * page before extending the relation. To keep track of which pages have - * been tried, initialize a local in-memory map of pages. */ BlockNumber -GetPageWithFreeSpace(Relation rel, Size spaceNeeded, bool check_fsm_only) +GetPageWithFreeSpace(Relation rel, Size spaceNeeded) { uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded); - BlockNumber target_block, - nblocks; - /* First try the FSM, if it exists. */ - target_block = fsm_search(rel, min_cat); - - if (target_block == InvalidBlockNumber && - (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_TOASTVALUE) && - !check_fsm_only) - { - nblocks = RelationGetNumberOfBlocks(rel); - - if (nblocks > HEAP_FSM_CREATION_THRESHOLD) - { - /* - * If the FSM knows nothing of the rel, try the last page before - * we give up and extend. This avoids one-tuple-per-page syndrome - * during bootstrapping or in a recently-started system. - */ - target_block = nblocks - 1; - } - else if (nblocks > 0) - { - /* Initialize local map and get first candidate block. */ - fsm_local_set(rel, nblocks); - target_block = fsm_local_search(); - } - } - - return target_block; + return fsm_search(rel, min_cat); } /* @@ -215,47 +144,16 @@ GetPageWithFreeSpace(Relation rel, Size spaceNeeded, bool check_fsm_only) * also some effort to return a page close to the old page; if there's a * page with enough free space on the same FSM page where the old one page * is located, it is preferred. - * - * For very small heap relations that don't have a FSM, we update the local - * map to indicate we have tried a page, and return the next page to try. */ BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded) { - int old_cat; - int search_cat; + int old_cat = fsm_space_avail_to_cat(oldSpaceAvail); + int search_cat = fsm_space_needed_to_cat(spaceNeeded); FSMAddress addr; uint16 slot; int search_slot; - BlockNumber nblocks = InvalidBlockNumber; - - /* First try the local map, if it exists. */ - if (FSM_LOCAL_MAP_EXISTS) - { - Assert((rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_TOASTVALUE) && - fsm_local_map.map[oldPage] == FSM_LOCAL_AVAIL); - - fsm_local_map.map[oldPage] = FSM_LOCAL_NOT_AVAIL; - return fsm_local_search(); - } - - if (!fsm_allow_writes(rel, oldPage, InvalidBlockNumber, &nblocks)) - { - /* - * If we have neither a local map nor a FSM, we probably just tried - * the target block in the smgr relation entry and failed, so we'll - * need to create the local map. - */ - fsm_local_set(rel, nblocks); - return fsm_local_search(); - } - - /* Normal FSM logic follows */ - - old_cat = fsm_space_avail_to_cat(oldSpaceAvail); - search_cat = fsm_space_needed_to_cat(spaceNeeded); /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(oldPage, &slot); @@ -278,45 +176,21 @@ RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, * Note that if the new spaceAvail value is higher than the old value stored * in the FSM, the space might not become visible to searchers until the next * FreeSpaceMapVacuum call, which updates the upper level pages. - * - * Callers have no need for a local map. */ void -RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, - Size spaceAvail, BlockNumber nblocks) +RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail) { - int new_cat; + int new_cat = fsm_space_avail_to_cat(spaceAvail); FSMAddress addr; uint16 slot; - BlockNumber dummy; - - if (!fsm_allow_writes(rel, heapBlk, nblocks, &dummy)) - /* No FSM to update and no local map either */ - return; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); - new_cat = fsm_space_avail_to_cat(spaceAvail); fsm_set_and_search(rel, addr, slot, new_cat, 0); } /* - * Clear the local map. We must call this when we have found a block with - * enough free space, when we extend the relation, or on transaction abort. - */ -void -FSMClearLocalMap(void) -{ - if (FSM_LOCAL_MAP_EXISTS) - { - fsm_local_map.nblocks = 0; - memset(&fsm_local_map.map, FSM_LOCAL_NOT_AVAIL, - sizeof(fsm_local_map.map)); - } -} - -/* * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in * WAL replay */ @@ -330,31 +204,6 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, BlockNumber blkno; Buffer buf; Page page; - bool write_to_fsm; - - /* This is meant to mirror the logic in fsm_allow_writes() */ - if (heapBlk >= HEAP_FSM_CREATION_THRESHOLD) - write_to_fsm = true; - else - { - /* Open the relation at smgr level */ - SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - - if (smgrexists(smgr, FSM_FORKNUM)) - write_to_fsm = true; - else - { - BlockNumber heap_nblocks = smgrnblocks(smgr, MAIN_FORKNUM); - - if (heap_nblocks > HEAP_FSM_CREATION_THRESHOLD) - write_to_fsm = true; - else - write_to_fsm = false; - } - } - - if (!write_to_fsm) - return; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); @@ -1055,141 +904,3 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, return max_avail; } - -/* - * For heaps, we prevent creation of the FSM unless the number of pages - * exceeds HEAP_FSM_CREATION_THRESHOLD. For tables that don't already have - * a FSM, this will save an inode and a few kB of space. - * - * XXX The API is a little awkward -- if the caller passes a valid nblocks - * value, it can avoid invoking a system call. If the caller passes - * InvalidBlockNumber and receives a false return value, it can get an - * up-to-date relation size from get_nblocks. This saves a few cycles in - * the caller, which would otherwise need to get the relation size by itself. - */ -static bool -fsm_allow_writes(Relation rel, BlockNumber heapblk, - BlockNumber nblocks, BlockNumber *get_nblocks) -{ - bool skip_get_nblocks; - - if (heapblk >= HEAP_FSM_CREATION_THRESHOLD) - return true; - - /* Non-heap rels can always create a FSM. */ - if (rel->rd_rel->relkind != RELKIND_RELATION && - rel->rd_rel->relkind != RELKIND_TOASTVALUE) - return true; - - /* - * If the caller knows nblocks, we can avoid a system call later. If it - * doesn't, maybe we have relpages from a previous VACUUM. Since the table - * may have extended since then, we still have to count the pages later if - * we can't return now. - */ - if (nblocks != InvalidBlockNumber) - { - if (nblocks > HEAP_FSM_CREATION_THRESHOLD) - return true; - else - skip_get_nblocks = true; - } - else - { - if (rel->rd_rel->relpages != InvalidBlockNumber && - rel->rd_rel->relpages > HEAP_FSM_CREATION_THRESHOLD) - return true; - else - skip_get_nblocks = false; - } - - RelationOpenSmgr(rel); - if (smgrexists(rel->rd_smgr, FSM_FORKNUM)) - return true; - - if (skip_get_nblocks) - return false; - - /* last resort */ - *get_nblocks = RelationGetNumberOfBlocks(rel); - if (*get_nblocks > HEAP_FSM_CREATION_THRESHOLD) - return true; - else - return false; -} - -/* - * Initialize the local map of blocks to try, for when there is no FSM. - * - * When we initialize the map, the whole heap is potentially available to - * try. Testing revealed that trying every block can cause a small - * performance dip compared to when we use a FSM, so we try every other - * block instead. - */ -static void -fsm_local_set(Relation rel, BlockNumber cur_nblocks) -{ - BlockNumber blkno, - cached_target_block; - - /* The local map must not be set already. */ - Assert(!FSM_LOCAL_MAP_EXISTS); - - /* - * Starting at the current last block in the relation and working - * backwards, mark alternating blocks as available. - */ - blkno = cur_nblocks - 1; - while (true) - { - fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL; - if (blkno >= 2) - blkno -= 2; - else - break; - } - - /* Cache the number of blocks. */ - fsm_local_map.nblocks = cur_nblocks; - - /* Set the status of the cached target block to 'unavailable'. */ - cached_target_block = RelationGetTargetBlock(rel); - if (cached_target_block != InvalidBlockNumber && - cached_target_block < cur_nblocks) - fsm_local_map.map[cached_target_block] = FSM_LOCAL_NOT_AVAIL; -} - -/* - * Search the local map for an available block to try, in descending order. - * As such, there is no heuristic available to decide which order will be - * better to try, but the probability of having space in the last block in the - * map is higher because that is the most recent block added to the heap. - * - * This function is used when there is no FSM. - */ -static BlockNumber -fsm_local_search(void) -{ - BlockNumber target_block; - - /* Local map must be set by now. */ - Assert(FSM_LOCAL_MAP_EXISTS); - - target_block = fsm_local_map.nblocks; - do - { - target_block--; - if (fsm_local_map.map[target_block] == FSM_LOCAL_AVAIL) - return target_block; - } while (target_block > 0); - - /* - * If we didn't find any available block to try in the local map, then - * clear it. This prevents us from using the map again without setting it - * first, which would otherwise lead to the same conclusion again and - * again. - */ - FSMClearLocalMap(); - - return InvalidBlockNumber; -} diff --git a/src/backend/storage/freespace/indexfsm.c b/src/backend/storage/freespace/indexfsm.c index 9d8f43d3739..58cedeaa9f7 100644 --- a/src/backend/storage/freespace/indexfsm.c +++ b/src/backend/storage/freespace/indexfsm.c @@ -37,7 +37,7 @@ BlockNumber GetFreeIndexPage(Relation rel) { - BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2, true); + BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2); if (blkno != InvalidBlockNumber) RecordUsedIndexPage(rel, blkno); @@ -51,7 +51,7 @@ GetFreeIndexPage(Relation rel) void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock) { - RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1, InvalidBlockNumber); + RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1); } @@ -61,7 +61,7 @@ RecordFreeIndexPage(Relation rel, BlockNumber freeBlock) void RecordUsedIndexPage(Relation rel, BlockNumber usedBlock) { - RecordPageWithFreeSpace(rel, usedBlock, 0, InvalidBlockNumber); + RecordPageWithFreeSpace(rel, usedBlock, 0); } /* |