diff options
author | Robert Haas <rhaas@postgresql.org> | 2012-04-26 20:00:21 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2012-04-26 20:00:21 -0400 |
commit | 3424bff90f40532527b9cf4f2ad9eaff750682f7 (patch) | |
tree | 028c10eea2a93f672d9462ebb4dcef7097d316ca /src/backend | |
parent | 92df2203437603d40417fe711c3cb7066ac4fdf5 (diff) | |
download | postgresql-3424bff90f40532527b9cf4f2ad9eaff750682f7.tar.gz postgresql-3424bff90f40532527b9cf4f2ad9eaff750682f7.zip |
Prevent index-only scans from returning wrong answers under Hot Standby.
The alternative of disallowing index-only scans in HS operation was
discussed, but the consensus was that it was better to treat marking
a page all-visible as a recovery conflict for snapshots that could still
fail to see XIDs on that page. We may in the future try to soften this,
so that we simply force index scans to do heap fetches in cases where
this may be an issue, rather than throwing a hard conflict.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/access/heap/heapam.c | 18 | ||||
-rw-r--r-- | src/backend/access/heap/visibilitymap.c | 9 | ||||
-rw-r--r-- | src/backend/commands/vacuumlazy.c | 11 |
3 files changed, 31 insertions, 7 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 98d1e559d32..3259354d5e0 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -4368,7 +4368,8 @@ log_heap_freeze(Relation reln, Buffer buffer, * and dirtied. */ XLogRecPtr -log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer) +log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer, + TransactionId cutoff_xid) { xl_heap_visible xlrec; XLogRecPtr recptr; @@ -4376,6 +4377,7 @@ log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer) xlrec.node = rnode; xlrec.block = block; + xlrec.cutoff_xid = cutoff_xid; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapVisible; @@ -4708,6 +4710,17 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) return; page = (Page) BufferGetPage(buffer); + /* + * If there are any Hot Standby transactions running that have an xmin + * horizon old enough that this page isn't all-visible for them, they + * might incorrectly decide that an index-only scan can skip a heap fetch. + * + * NB: It might be better to throw some kind of "soft" conflict here that + * forces any index-only scan that is in flight to perform heap fetches, + * rather than killing the transaction outright. + */ + ResolveRecoveryConflictWithSnapshot(xlrec->cutoff_xid, xlrec->node); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* @@ -4760,7 +4773,8 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) * harm is done; and the next VACUUM will fix it. */ if (!XLByteLE(lsn, PageGetLSN(BufferGetPage(vmbuffer)))) - visibilitymap_set(reln, xlrec->block, lsn, vmbuffer); + visibilitymap_set(reln, xlrec->block, lsn, vmbuffer, + xlrec->cutoff_xid); ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 6505e76daed..5696abe4d2a 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -229,7 +229,9 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf) * recptr is the LSN of the XLOG record we're replaying, if we're in recovery, * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the * one provided; in normal running, we generate a new XLOG record and set the - * page LSN to that value. + * page LSN to that value. cutoff_xid is the largest xmin on the page being + * marked all-visible; it is needed for Hot Standby, and can be + * InvalidTransactionId if the page contains no tuples. * * You must pass a buffer containing the correct map page to this function. * Call visibilitymap_pin first to pin the right one. This function doesn't do @@ -237,7 +239,7 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf) */ void visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr, - Buffer buf) + Buffer buf, TransactionId cutoff_xid) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); @@ -269,7 +271,8 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr, if (RelationNeedsWAL(rel)) { if (XLogRecPtrIsInvalid(recptr)) - recptr = log_heap_visible(rel->rd_node, heapBlk, buf); + recptr = log_heap_visible(rel->rd_node, heapBlk, buf, + cutoff_xid); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 60171470d36..0e0193d40e1 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -448,6 +448,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, bool all_visible_according_to_vm; bool all_visible; bool has_dead_tuples; + TransactionId visibility_cutoff_xid = InvalidTransactionId; if (blkno == next_not_all_visible_block) { @@ -627,7 +628,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, { PageSetAllVisible(page); MarkBufferDirty(buf); - visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer); + visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer, + InvalidTransactionId); } UnlockReleaseBuffer(buf); @@ -759,6 +761,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, all_visible = false; break; } + + /* Track newest xmin on page. */ + if (TransactionIdFollows(xmin, visibility_cutoff_xid)) + visibility_cutoff_xid = xmin; } break; case HEAPTUPLE_RECENTLY_DEAD: @@ -853,7 +859,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, PageSetAllVisible(page); MarkBufferDirty(buf); } - visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer); + visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer, + visibility_cutoff_xid); } /* |