aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/heap/visibilitymap.c
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2011-06-21 23:04:40 -0400
committerRobert Haas <rhaas@postgresql.org>2011-06-21 23:04:40 -0400
commit503c7305a1e379f95649eef1a694d0c1dbdc674a (patch)
tree39bb67975f3419f76d6973e86d5517c8e55f9853 /src/backend/access/heap/visibilitymap.c
parent431ab0e82819b31fcd1e33ecb52c2cd3b4b41da7 (diff)
downloadpostgresql-503c7305a1e379f95649eef1a694d0c1dbdc674a.tar.gz
postgresql-503c7305a1e379f95649eef1a694d0c1dbdc674a.zip
Make the visibility map crash-safe.
This involves two main changes from the previous behavior. First, when we set a bit in the visibility map, emit a new WAL record of type XLOG_HEAP2_VISIBLE. Replay sets the page-level PD_ALL_VISIBLE bit and the visibility map bit. Second, when inserting, updating, or deleting a tuple, we can no longer get away with clearing the visibility map bit after releasing the lock on the corresponding heap page, because an intervening crash might leave the visibility map bit set and the page-level bit clear. Making this work requires a bit of interface refactoring. In passing, a few minor but related cleanups: change the test in visibilitymap_set and visibilitymap_clear to throw an error if the wrong page (or no page) is pinned, rather than silently doing nothing; this case should never occur. Also, remove duplicate definitions of InvalidXLogRecPtr. Patch by me, review by Noah Misch.
Diffstat (limited to 'src/backend/access/heap/visibilitymap.c')
-rw-r--r--src/backend/access/heap/visibilitymap.c110
1 files changed, 59 insertions, 51 deletions
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 58bab7df102..a19352021dc 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -11,10 +11,11 @@
* src/backend/access/heap/visibilitymap.c
*
* INTERFACE ROUTINES
- * visibilitymap_clear - clear a bit in the visibility map
- * visibilitymap_pin - pin a map page for setting a bit
- * visibilitymap_set - set a bit in a previously pinned page
- * visibilitymap_test - test if a bit is set
+ * visibilitymap_clear - clear a bit in the visibility map
+ * visibilitymap_pin - pin a map page for setting a bit
+ * visibilitymap_pin_ok - check whether correct map page is already pinned
+ * visibilitymap_set - set a bit in a previously pinned page
+ * visibilitymap_test - test if a bit is set
*
* NOTES
*
@@ -64,32 +65,13 @@
* It would be nice to use the visibility map to skip visibility checks in
* index scans.
*
- * Currently, the visibility map is not 100% correct all the time.
- * During updates, the bit in the visibility map is cleared after releasing
- * the lock on the heap page. During the window between releasing the lock
- * and clearing the bit in the visibility map, the bit in the visibility map
- * is set, but the new insertion or deletion is not yet visible to other
- * backends.
- *
- * That might actually be OK for the index scans, though. The newly inserted
- * tuple wouldn't have an index pointer yet, so all tuples reachable from an
- * index would still be visible to all other backends, and deletions wouldn't
- * be visible to other backends yet. (But HOT breaks that argument, no?)
- *
- * There's another hole in the way the PD_ALL_VISIBLE flag is set. When
- * vacuum observes that all tuples are visible to all, it sets the flag on
- * the heap page, and also sets the bit in the visibility map. If we then
- * crash, and only the visibility map page was flushed to disk, we'll have
- * a bit set in the visibility map, but the corresponding flag on the heap
- * page is not set. If the heap page is then updated, the updater won't
- * know to clear the bit in the visibility map. (Isn't that prevented by
- * the LSN interlock?)
- *
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "access/heapam.h"
#include "access/visibilitymap.h"
+#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/lmgr.h"
@@ -127,38 +109,37 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks);
/*
* visibilitymap_clear - clear a bit in visibility map
*
- * Clear a bit in the visibility map, marking that not all tuples are
- * visible to all transactions anymore.
+ * You must pass a buffer containing the correct map page to this function.
+ * Call visibilitymap_pin first to pin the right one. This function doesn't do
+ * any I/O.
*/
void
-visibilitymap_clear(Relation rel, BlockNumber heapBlk)
+visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
int mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
uint8 mask = 1 << mapBit;
- Buffer mapBuffer;
char *map;
#ifdef TRACE_VISIBILITYMAP
elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
#endif
- mapBuffer = vm_readbuf(rel, mapBlock, false);
- if (!BufferIsValid(mapBuffer))
- return; /* nothing to do */
+ if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock)
+ elog(ERROR, "wrong buffer passed to visibilitymap_clear");
- LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
- map = PageGetContents(BufferGetPage(mapBuffer));
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ map = PageGetContents(BufferGetPage(buf));
if (map[mapByte] & mask)
{
map[mapByte] &= ~mask;
- MarkBufferDirty(mapBuffer);
+ MarkBufferDirty(buf);
}
- UnlockReleaseBuffer(mapBuffer);
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
/*
@@ -194,19 +175,36 @@ visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
}
/*
+ * visibilitymap_pin_ok - do we already have the correct page pinned?
+ *
+ * On entry, buf should be InvalidBuffer or a valid buffer returned by
+ * an earlier call to visibilitymap_pin or visibilitymap_test on the same
+ * relation. The return value indicates whether the buffer covers the
+ * given heapBlk.
+ */
+bool
+visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
+{
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+
+ return BufferIsValid(buf) && BufferGetBlockNumber(buf) == mapBlock;
+}
+
+/*
* visibilitymap_set - set a bit on a previously pinned page
*
- * recptr is the LSN of the heap page. The LSN of the visibility map page is
- * advanced to that, to make sure that the visibility map doesn't get flushed
- * to disk before the update to the heap page that made all tuples visible.
+ * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
+ * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
+ * one provided; in normal running, we generate a new XLOG record and set the
+ * page LSN to that value.
*
- * This is an opportunistic function. It does nothing, unless *buf
- * contains the bit for heapBlk. Call visibilitymap_pin first to pin
- * the right map page. This function doesn't do any I/O.
+ * You must pass a buffer containing the correct map page to this function.
+ * Call visibilitymap_pin first to pin the right one. This function doesn't do
+ * any I/O.
*/
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
- Buffer *buf)
+ Buffer buf)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -218,25 +216,35 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
#endif
+ Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
+
/* Check that we have the right page pinned */
- if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != mapBlock)
- return;
+ if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock)
+ elog(ERROR, "wrong buffer passed to visibilitymap_set");
- page = BufferGetPage(*buf);
+ page = BufferGetPage(buf);
map = PageGetContents(page);
- LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
if (!(map[mapByte] & (1 << mapBit)))
{
+ START_CRIT_SECTION();
+
map[mapByte] |= (1 << mapBit);
+ MarkBufferDirty(buf);
- if (XLByteLT(PageGetLSN(page), recptr))
+ if (RelationNeedsWAL(rel))
+ {
+ if (XLogRecPtrIsInvalid(recptr))
+ recptr = log_heap_visible(rel->rd_node, heapBlk, buf);
PageSetLSN(page, recptr);
- PageSetTLI(page, ThisTimeLineID);
- MarkBufferDirty(*buf);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+
+ END_CRIT_SECTION();
}
- LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
/*