diff options
author | Robert Haas <rhaas@postgresql.org> | 2016-04-08 02:04:46 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2016-04-08 02:04:46 -0400 |
commit | 719c84c1be51f3d3fe6049b77ddbaa0c4b58a9a9 (patch) | |
tree | c2f4b1501655e50339e3365f267fcce00fd06bbb /src/backend/access/heap/hio.c | |
parent | 8643b91ecf8f47a1307df4a00d66b2fceada0d6f (diff) | |
download | postgresql-719c84c1be51f3d3fe6049b77ddbaa0c4b58a9a9.tar.gz postgresql-719c84c1be51f3d3fe6049b77ddbaa0c4b58a9a9.zip |
Extend relations multiple blocks at a time to improve scalability.
Contention on the relation extension lock can become quite fierce when
multiple processes are inserting data into the same relation at the same
time at a high rate. Experimentation shows the extending the relation
multiple blocks at a time improves scalability.
Dilip Kumar, reviewed by Petr Jelinek, Amit Kapila, and me.
Diffstat (limited to 'src/backend/access/heap/hio.c')
-rw-r--r-- | src/backend/access/heap/hio.c | 112 |
1 files changed, 109 insertions, 3 deletions
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 8140418c390..6b850920c7c 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -169,6 +169,75 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, } /* + * Extend a relation by multiple blocks to avoid future contention on the + * relation extension lock. Our goal is to pre-extend the relation by an + * amount which ramps up as the degree of contention ramps up, but limiting + * the result to some sane overall value. + */ +static void +RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) +{ + Page page; + BlockNumber blockNum = InvalidBlockNumber, + firstBlock = InvalidBlockNumber; + int extraBlocks = 0; + int lockWaiters = 0; + Size freespace = 0; + Buffer buffer; + + /* Use the length of the lock wait queue to judge how much to extend. */ + lockWaiters = RelationExtensionLockWaiterCount(relation); + if (lockWaiters <= 0) + return; + + /* + * It might seem like multiplying the number of lock waiters by as much + * as 20 is too aggressive, but benchmarking revealed that smaller numbers + * were insufficient. 512 is just an arbitrary cap to prevent pathological + * results. + */ + extraBlocks = Min(512, lockWaiters * 20); + + while (extraBlocks-- >= 0) + { + /* Ouch - an unnecessary lseek() each time through the loop! */ + buffer = ReadBufferBI(relation, P_NEW, bistate); + + /* Extend by one page. */ + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buffer); + PageInit(page, BufferGetPageSize(buffer), 0); + MarkBufferDirty(buffer); + blockNum = BufferGetBlockNumber(buffer); + freespace = PageGetHeapFreeSpace(page); + UnlockReleaseBuffer(buffer); + + /* Remember first block number thus added. */ + if (firstBlock == InvalidBlockNumber) + firstBlock = blockNum; + + /* + * Immediately update the bottom level of the FSM. This has a good + * chance of making this page visible to other concurrently inserting + * backends, and we want that to happen without delay. + */ + RecordPageWithFreeSpace(relation, blockNum, freespace); + } + + /* + * Updating the upper levels of the free space map is too expensive + * to do for every block, but it's worth doing once at the end to make + * sure that subsequent insertion activity sees all of those nifty free + * pages we just inserted. + * + * Note that we're using the freespace value that was reported for the + * last block we added as if it were the freespace value for every block + * we added. That's actually true, because they're all equally empty. + */ + UpdateFreeSpaceMap(relation, firstBlock, blockNum, freespace); +} + +/* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation @@ -233,8 +302,8 @@ RelationGetBufferForTuple(Relation relation, Size len, bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; - Size pageFreeSpace, - saveFreeSpace; + Size pageFreeSpace = 0, + saveFreeSpace = 0; BlockNumber targetBlock, otherBlock; bool needLock; @@ -308,6 +377,7 @@ RelationGetBufferForTuple(Relation relation, Size len, } } +loop: while (targetBlock != InvalidBlockNumber) { /* @@ -440,10 +510,46 @@ RelationGetBufferForTuple(Relation relation, Size len, */ needLock = !RELATION_IS_LOCAL(relation); + /* + * If we need the lock but are not able to acquire it immediately, we'll + * consider extending the relation by multiple blocks at a time to manage + * contention on the relation extension lock. However, this only makes + * sense if we're using the FSM; otherwise, there's no point. + */ if (needLock) - LockRelationForExtension(relation, ExclusiveLock); + { + if (!use_fsm) + LockRelationForExtension(relation, ExclusiveLock); + else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock)) + { + /* Couldn't get the lock immediately; wait for it. */ + LockRelationForExtension(relation, ExclusiveLock); + + /* + * Check if some other backend has extended a block for us while + * we were waiting on the lock. + */ + targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace); + + /* + * If some other waiter has already extended the relation, we + * don't need to do so; just use the existing freespace. + */ + if (targetBlock != InvalidBlockNumber) + { + UnlockRelationForExtension(relation, ExclusiveLock); + goto loop; + } + + /* Time to bulk-extend. */ + RelationAddExtraBlocks(relation, bistate); + } + } /* + * In addition to whatever extension we performed above, we always add + * at least one block to satisfy our own request. + * * XXX This does an lseek - rather expensive - but at the moment it is the * only way to accurately determine how many blocks are in a relation. Is * it worth keeping an accurate file length in shared memory someplace, |