diff options
Diffstat (limited to 'src/backend/access/heap/visibilitymap.c')
-rw-r--r-- | src/backend/access/heap/visibilitymap.c | 110 |
1 files changed, 59 insertions, 51 deletions
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 58bab7df102..a19352021dc 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -11,10 +11,11 @@ * src/backend/access/heap/visibilitymap.c * * INTERFACE ROUTINES - * visibilitymap_clear - clear a bit in the visibility map - * visibilitymap_pin - pin a map page for setting a bit - * visibilitymap_set - set a bit in a previously pinned page - * visibilitymap_test - test if a bit is set + * visibilitymap_clear - clear a bit in the visibility map + * visibilitymap_pin - pin a map page for setting a bit + * visibilitymap_pin_ok - check whether correct map page is already pinned + * visibilitymap_set - set a bit in a previously pinned page + * visibilitymap_test - test if a bit is set * * NOTES * @@ -64,32 +65,13 @@ * It would be nice to use the visibility map to skip visibility checks in * index scans. * - * Currently, the visibility map is not 100% correct all the time. - * During updates, the bit in the visibility map is cleared after releasing - * the lock on the heap page. During the window between releasing the lock - * and clearing the bit in the visibility map, the bit in the visibility map - * is set, but the new insertion or deletion is not yet visible to other - * backends. - * - * That might actually be OK for the index scans, though. The newly inserted - * tuple wouldn't have an index pointer yet, so all tuples reachable from an - * index would still be visible to all other backends, and deletions wouldn't - * be visible to other backends yet. (But HOT breaks that argument, no?) - * - * There's another hole in the way the PD_ALL_VISIBLE flag is set. When - * vacuum observes that all tuples are visible to all, it sets the flag on - * the heap page, and also sets the bit in the visibility map. If we then - * crash, and only the visibility map page was flushed to disk, we'll have - * a bit set in the visibility map, but the corresponding flag on the heap - * page is not set. If the heap page is then updated, the updater won't - * know to clear the bit in the visibility map. (Isn't that prevented by - * the LSN interlock?) - * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/heapam.h" #include "access/visibilitymap.h" +#include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/bufpage.h" #include "storage/lmgr.h" @@ -127,38 +109,37 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks); /* * visibilitymap_clear - clear a bit in visibility map * - * Clear a bit in the visibility map, marking that not all tuples are - * visible to all transactions anymore. + * You must pass a buffer containing the correct map page to this function. + * Call visibilitymap_pin first to pin the right one. This function doesn't do + * any I/O. */ void -visibilitymap_clear(Relation rel, BlockNumber heapBlk) +visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); int mapBit = HEAPBLK_TO_MAPBIT(heapBlk); uint8 mask = 1 << mapBit; - Buffer mapBuffer; char *map; #ifdef TRACE_VISIBILITYMAP elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk); #endif - mapBuffer = vm_readbuf(rel, mapBlock, false); - if (!BufferIsValid(mapBuffer)) - return; /* nothing to do */ + if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock) + elog(ERROR, "wrong buffer passed to visibilitymap_clear"); - LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE); - map = PageGetContents(BufferGetPage(mapBuffer)); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + map = PageGetContents(BufferGetPage(buf)); if (map[mapByte] & mask) { map[mapByte] &= ~mask; - MarkBufferDirty(mapBuffer); + MarkBufferDirty(buf); } - UnlockReleaseBuffer(mapBuffer); + LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* @@ -194,19 +175,36 @@ visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf) } /* + * visibilitymap_pin_ok - do we already have the correct page pinned? + * + * On entry, buf should be InvalidBuffer or a valid buffer returned by + * an earlier call to visibilitymap_pin or visibilitymap_test on the same + * relation. The return value indicates whether the buffer covers the + * given heapBlk. + */ +bool +visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf) +{ + BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); + + return BufferIsValid(buf) && BufferGetBlockNumber(buf) == mapBlock; +} + +/* * visibilitymap_set - set a bit on a previously pinned page * - * recptr is the LSN of the heap page. The LSN of the visibility map page is - * advanced to that, to make sure that the visibility map doesn't get flushed - * to disk before the update to the heap page that made all tuples visible. + * recptr is the LSN of the XLOG record we're replaying, if we're in recovery, + * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the + * one provided; in normal running, we generate a new XLOG record and set the + * page LSN to that value. * - * This is an opportunistic function. It does nothing, unless *buf - * contains the bit for heapBlk. Call visibilitymap_pin first to pin - * the right map page. This function doesn't do any I/O. + * You must pass a buffer containing the correct map page to this function. + * Call visibilitymap_pin first to pin the right one. This function doesn't do + * any I/O. */ void visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr, - Buffer *buf) + Buffer buf) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); @@ -218,25 +216,35 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr, elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); #endif + Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); + /* Check that we have the right page pinned */ - if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != mapBlock) - return; + if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock) + elog(ERROR, "wrong buffer passed to visibilitymap_set"); - page = BufferGetPage(*buf); + page = BufferGetPage(buf); map = PageGetContents(page); - LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (!(map[mapByte] & (1 << mapBit))) { + START_CRIT_SECTION(); + map[mapByte] |= (1 << mapBit); + MarkBufferDirty(buf); - if (XLByteLT(PageGetLSN(page), recptr)) + if (RelationNeedsWAL(rel)) + { + if (XLogRecPtrIsInvalid(recptr)) + recptr = log_heap_visible(rel->rd_node, heapBlk, buf); PageSetLSN(page, recptr); - PageSetTLI(page, ThisTimeLineID); - MarkBufferDirty(*buf); + PageSetTLI(page, ThisTimeLineID); + } + + END_CRIT_SECTION(); } - LockBuffer(*buf, BUFFER_LOCK_UNLOCK); + LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* |