diff options
author | Kevin Grittner <kgrittn@postgresql.org> | 2016-04-08 14:36:30 -0500 |
---|---|---|
committer | Kevin Grittner <kgrittn@postgresql.org> | 2016-04-08 14:36:30 -0500 |
commit | 848ef42bb8c7909c9d7baa38178d4a209906e7c1 (patch) | |
tree | e15250d8dfd8f46b15e3ecfddfcad09799cc3866 /src/backend/access | |
parent | 8b65cf4c5edabdcae45ceaef7b9ac236879aae50 (diff) | |
download | postgresql-848ef42bb8c7909c9d7baa38178d4a209906e7c1.tar.gz postgresql-848ef42bb8c7909c9d7baa38178d4a209906e7c1.zip |
Add the "snapshot too old" feature
This feature is controlled by a new old_snapshot_threshold GUC. A
value of -1 disables the feature, and that is the default. The
value of 0 is just intended for testing. Above that it is the
number of minutes a snapshot can reach before pruning and vacuum
are allowed to remove dead tuples which the snapshot would
otherwise protect. The xmin associated with a transaction ID does
still protect dead tuples. A connection which is using an "old"
snapshot does not get an error unless it accesses a page modified
recently enough that it might not be able to produce accurate
results.
This is similar to the Oracle feature, and we use the same SQLSTATE
and error message for compatibility.
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/brin/brin.c | 19 | ||||
-rw-r--r-- | src/backend/access/brin/brin_revmap.c | 11 | ||||
-rw-r--r-- | src/backend/access/gin/ginbtree.c | 9 | ||||
-rw-r--r-- | src/backend/access/gin/gindatapage.c | 7 | ||||
-rw-r--r-- | src/backend/access/gin/ginget.c | 22 | ||||
-rw-r--r-- | src/backend/access/gin/gininsert.c | 2 | ||||
-rw-r--r-- | src/backend/access/gist/gistget.c | 2 | ||||
-rw-r--r-- | src/backend/access/hash/hash.c | 3 | ||||
-rw-r--r-- | src/backend/access/hash/hashsearch.c | 10 | ||||
-rw-r--r-- | src/backend/access/heap/heapam.c | 31 | ||||
-rw-r--r-- | src/backend/access/heap/pruneheap.c | 11 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtinsert.c | 7 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 2 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtsearch.c | 51 | ||||
-rw-r--r-- | src/backend/access/spgist/spgscan.c | 2 |
15 files changed, 116 insertions, 73 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 6f6f1b1b415..e64c94d3569 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -135,7 +135,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, MemoryContext tupcxt = NULL; MemoryContext oldcxt = NULL; - revmap = brinRevmapInitialize(idxRel, &pagesPerRange); + revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); for (;;) { @@ -152,7 +152,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, /* normalize the block number to be the first block in the range */ heapBlk = (heapBlk / pagesPerRange) * pagesPerRange; brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, - BUFFER_LOCK_SHARE); + BUFFER_LOCK_SHARE, NULL); /* if range is unsummarized, there's nothing to do */ if (!brtup) @@ -285,7 +285,8 @@ brinbeginscan(Relation r, int nkeys, int norderbys) scan = RelationGetIndexScan(r, nkeys, norderbys); opaque = (BrinOpaque *) palloc(sizeof(BrinOpaque)); - opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange); + opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange, + scan->xs_snapshot); opaque->bo_bdesc = brin_build_desc(r); scan->opaque = opaque; @@ -368,7 +369,8 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) MemoryContextResetAndDeleteChildren(perRangeCxt); tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf, - &off, &size, BUFFER_LOCK_SHARE); + &off, &size, BUFFER_LOCK_SHARE, + scan->xs_snapshot); if (tup) { tup = brin_copy_tuple(tup, size); @@ -647,7 +649,7 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) /* * Initialize our state, including the deformed tuple state. */ - revmap = brinRevmapInitialize(index, &pagesPerRange); + revmap = brinRevmapInitialize(index, &pagesPerRange, NULL); state = initialize_brin_buildstate(index, revmap, pagesPerRange); /* @@ -1045,7 +1047,8 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, * the same.) */ phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf, - &offset, &phsz, BUFFER_LOCK_SHARE); + &offset, &phsz, BUFFER_LOCK_SHARE, + NULL); /* the placeholder tuple must exist */ if (phtup == NULL) elog(ERROR, "missing placeholder tuple"); @@ -1080,7 +1083,7 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized, BlockNumber pagesPerRange; Buffer buf; - revmap = brinRevmapInitialize(index, &pagesPerRange); + revmap = brinRevmapInitialize(index, &pagesPerRange, NULL); /* * Scan the revmap to find unsummarized items. @@ -1095,7 +1098,7 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized, CHECK_FOR_INTERRUPTS(); tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, - BUFFER_LOCK_SHARE); + BUFFER_LOCK_SHARE, NULL); if (tup == NULL) { /* no revmap entry for this heap range. Summarize it. */ diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index ce21cbabb7a..5ed867cf30b 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -68,7 +68,8 @@ static void revmap_physical_extend(BrinRevmap *revmap); * brinRevmapTerminate when caller is done with it. */ BrinRevmap * -brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange) +brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, + Snapshot snapshot) { BrinRevmap *revmap; Buffer meta; @@ -77,7 +78,7 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange) meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO); LockBuffer(meta, BUFFER_LOCK_SHARE); - page = BufferGetPage(meta, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(meta, snapshot, idxrel, BGP_TEST_FOR_OLD_SNAPSHOT); metadata = (BrinMetaPageData *) PageGetContents(page); revmap = palloc(sizeof(BrinRevmap)); @@ -187,7 +188,8 @@ brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, */ BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, - Buffer *buf, OffsetNumber *off, Size *size, int mode) + Buffer *buf, OffsetNumber *off, Size *size, int mode, + Snapshot snapshot) { Relation idxRel = revmap->rm_irel; BlockNumber mapBlk; @@ -264,7 +266,8 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, *buf = ReadBuffer(idxRel, blk); } LockBuffer(*buf, mode); - page = BufferGetPage(*buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(*buf, snapshot, idxRel, + BGP_TEST_FOR_OLD_SNAPSHOT); /* If we land on a revmap page, start over */ if (BRIN_IS_REGULAR_PAGE(page)) diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 13258cca0ea..e593b2bbe99 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -71,7 +71,7 @@ ginTraverseLock(Buffer buffer, bool searchMode) * is share-locked, and stack->parent is NULL. */ GinBtreeStack * -ginFindLeafPage(GinBtree btree, bool searchMode) +ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot) { GinBtreeStack *stack; @@ -89,7 +89,8 @@ ginFindLeafPage(GinBtree btree, bool searchMode) stack->off = InvalidOffsetNumber; - page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(stack->buffer, snapshot, btree->index, + BGP_TEST_FOR_OLD_SNAPSHOT); access = ginTraverseLock(stack->buffer, searchMode); @@ -115,8 +116,8 @@ ginFindLeafPage(GinBtree btree, bool searchMode) stack->buffer = ginStepRight(stack->buffer, btree->index, access); stack->blkno = rightlink; - page = BufferGetPage(stack->buffer, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(stack->buffer, snapshot, btree->index, + BGP_TEST_FOR_OLD_SNAPSHOT); if (!searchMode && GinPageIsIncompleteSplit(page)) ginFinishSplit(btree, stack, false, NULL); diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index 9c501a1af5f..ed3d9174f65 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -1820,7 +1820,7 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno, { /* search for the leaf page where the first item should go to */ btree.itemptr = insertdata.items[insertdata.curitem]; - stack = ginFindLeafPage(&btree, false); + stack = ginFindLeafPage(&btree, false, NULL); ginInsertValue(&btree, stack, &insertdata, buildStats); } @@ -1830,7 +1830,8 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno, * Starts a new scan on a posting tree. */ GinBtreeStack * -ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno) +ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, + Snapshot snapshot) { GinBtreeStack *stack; @@ -1838,7 +1839,7 @@ ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno) btree->fullScan = TRUE; - stack = ginFindLeafPage(btree, TRUE); + stack = ginFindLeafPage(btree, TRUE, snapshot); return stack; } diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 33683278e10..b79ba1e62af 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -73,7 +73,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry, Page page; /* Descend to the leftmost leaf page */ - stack = ginScanBeginPostingTree(&btree, index, rootPostingTree); + stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot); buffer = stack->buffer; IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */ @@ -146,7 +146,8 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, if (moveRightIfItNeeded(btree, stack) == false) return true; - page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(stack->buffer, snapshot, btree->index, + BGP_TEST_FOR_OLD_SNAPSHOT); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); /* @@ -320,7 +321,7 @@ restartScanEntry: ginPrepareEntryScan(&btreeEntry, entry->attnum, entry->queryKey, entry->queryCategory, ginstate); - stackEntry = ginFindLeafPage(&btreeEntry, true); + stackEntry = ginFindLeafPage(&btreeEntry, true, snapshot); page = BufferGetPage(stackEntry->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); needUnlock = TRUE; @@ -385,7 +386,7 @@ restartScanEntry: needUnlock = FALSE; stack = ginScanBeginPostingTree(&entry->btree, ginstate->index, - rootPostingTree); + rootPostingTree, snapshot); entry->buffer = stack->buffer; /* @@ -627,7 +628,7 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, entry->btree.itemptr.ip_posid++; } entry->btree.fullScan = false; - stack = ginFindLeafPage(&entry->btree, true); + stack = ginFindLeafPage(&entry->btree, true, snapshot); /* we don't need the stack, just the buffer. */ entry->buffer = stack->buffer; @@ -1335,8 +1336,8 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos) ItemPointerSetInvalid(&pos->item); for (;;) { - page = BufferGetPage(pos->pendingBuffer, NULL, - NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(pos->pendingBuffer, scan->xs_snapshot, + scan->indexRelation, BGP_TEST_FOR_OLD_SNAPSHOT); maxoff = PageGetMaxOffsetNumber(page); if (pos->firstOffset > maxoff) @@ -1516,8 +1517,8 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos) memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset)); - page = BufferGetPage(pos->pendingBuffer, NULL, - NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(pos->pendingBuffer, scan->xs_snapshot, + scan->indexRelation, BGP_TEST_FOR_OLD_SNAPSHOT); for (i = 0; i < so->nkeys; i++) { @@ -1710,7 +1711,8 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) *ntids = 0; LockBuffer(metabuffer, GIN_SHARE); - page = BufferGetPage(metabuffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(metabuffer, scan->xs_snapshot, scan->indexRelation, + BGP_TEST_FOR_OLD_SNAPSHOT); blkno = GinPageGetMeta(page)->head; /* diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 126501149d2..d4bfed06bcc 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -192,7 +192,7 @@ ginEntryInsert(GinState *ginstate, ginPrepareEntryScan(&btree, attnum, key, category, ginstate); - stack = ginFindLeafPage(&btree, false); + stack = ginFindLeafPage(&btree, false, NULL); page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); if (btree.findItem(&btree, stack)) diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 13a039947ba..24af868466d 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -336,7 +336,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, buffer = ReadBuffer(scan->indexRelation, pageItem->blkno); LockBuffer(buffer, GIST_SHARE); gistcheckpage(scan->indexRelation, buffer); - page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buffer, scan->xs_snapshot, r, BGP_TEST_FOR_OLD_SNAPSHOT); opaque = GistPageGetOpaque(page); /* diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index a5032e1251d..03cd0b006c3 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -278,7 +278,8 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir) buf = so->hashso_curbuf; Assert(BufferIsValid(buf)); - page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, scan->xs_snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); maxoffnum = PageGetMaxOffsetNumber(page); for (offnum = ItemPointerGetOffsetNumber(current); offnum <= maxoffnum; diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index dd1f464e53a..4c14362c6fe 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -188,8 +188,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); - page = BufferGetPage(metabuf, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(metabuf, scan->xs_snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); metap = HashPageGetMeta(page); /* @@ -242,8 +242,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) /* Fetch the primary bucket page for the bucket */ buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE); - page = BufferGetPage(buf, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, scan->xs_snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(opaque->hasho_bucket == bucket); @@ -350,6 +350,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) _hash_readnext(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { + TestForOldSnapshot(scan->xs_snapshot, rel, page); maxoff = PageGetMaxOffsetNumber(page); offnum = _hash_binsearch(page, so->hashso_sk_hash); } @@ -391,6 +392,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) _hash_readprev(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { + TestForOldSnapshot(scan->xs_snapshot, rel, page); maxoff = PageGetMaxOffsetNumber(page); offnum = _hash_binsearch_last(page, so->hashso_sk_hash); } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 66b23540fe2..29fd31a819d 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -394,7 +394,8 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) */ LockBuffer(buffer, BUFFER_LOCK_SHARE); - dp = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(buffer, snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lines = PageGetMaxOffsetNumber(dp); ntup = 0; @@ -537,7 +538,7 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd, BGP_TEST_FOR_OLD_SNAPSHOT); lines = PageGetMaxOffsetNumber(dp); /* page and lineoff now reference the physically next tid */ @@ -582,7 +583,8 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lines = PageGetMaxOffsetNumber(dp); if (!scan->rs_inited) @@ -616,7 +618,8 @@ heapgettup(HeapScanDesc scan, heapgetpage(scan, page); /* Since the tuple was previously fetched, needn't lock page here */ - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); lpp = PageGetItemId(dp, lineoff); Assert(ItemIdIsNormal(lpp)); @@ -745,7 +748,8 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lines = PageGetMaxOffsetNumber((Page) dp); linesleft = lines; if (backward) @@ -832,7 +836,8 @@ heapgettup_pagemode(HeapScanDesc scan, lineindex = scan->rs_cindex + 1; } - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lines = scan->rs_ntuples; /* page and lineindex now reference the next visible tid */ @@ -875,7 +880,8 @@ heapgettup_pagemode(HeapScanDesc scan, page = scan->rs_cblock; /* current page */ } - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lines = scan->rs_ntuples; if (!scan->rs_inited) @@ -908,7 +914,8 @@ heapgettup_pagemode(HeapScanDesc scan, heapgetpage(scan, page); /* Since the tuple was previously fetched, needn't lock page here */ - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); lpp = PageGetItemId(dp, lineoff); Assert(ItemIdIsNormal(lpp)); @@ -1027,7 +1034,8 @@ heapgettup_pagemode(HeapScanDesc scan, heapgetpage(scan, page); - dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + dp = BufferGetPage(scan->rs_cbuf, scan->rs_snapshot, scan->rs_rd, + BGP_TEST_FOR_OLD_SNAPSHOT); lines = scan->rs_ntuples; linesleft = lines; if (backward) @@ -1871,7 +1879,7 @@ heap_fetch(Relation relation, * Need share lock on buffer to examine tuple commit status. */ LockBuffer(buffer, BUFFER_LOCK_SHARE); - page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buffer, snapshot, relation, BGP_TEST_FOR_OLD_SNAPSHOT); /* * We'd better check for out-of-range offnum in case of VACUUM since the @@ -2200,7 +2208,8 @@ heap_get_latest_tid(Relation relation, */ buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid)); LockBuffer(buffer, BUFFER_LOCK_SHARE); - page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buffer, snapshot, relation, + BGP_TEST_FOR_OLD_SNAPSHOT); /* * Check for bogus item number. This is not treated as an error diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 19201b0bca5..ce073ccdc23 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -92,12 +92,21 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * need to use the horizon that includes slots, otherwise the data-only * horizon can be used. Note that the toast relation of user defined * relations are *not* considered catalog relations. + * + * It is OK to apply the old snapshot limit before acquiring the cleanup + * lock because the worst that can happen is that we are not quite as + * aggressive about the cleanup (by however many transaction IDs are + * consumed between this point and acquiring the lock). This allows us to + * save significant overhead in the case where the page is found not to be + * prunable. */ if (IsCatalogRelation(relation) || RelationIsAccessibleInLogicalDecoding(relation)) OldestXmin = RecentGlobalXmin; else - OldestXmin = RecentGlobalDataXmin; + OldestXmin = + TransactionIdLimitedForOldSnapshots(RecentGlobalDataXmin, + relation); Assert(TransactionIdIsValid(OldestXmin)); diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index bf7a8175517..3796656e177 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -119,7 +119,7 @@ _bt_doinsert(Relation rel, IndexTuple itup, top: /* find the first page containing this key */ - stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE); + stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE, NULL); offset = InvalidOffsetNumber; @@ -135,7 +135,7 @@ top: * precise description. */ buf = _bt_moveright(rel, buf, natts, itup_scankey, false, - true, stack, BT_WRITE); + true, stack, BT_WRITE, NULL); /* * If we're not allowing duplicates, make sure the key isn't already in @@ -1682,7 +1682,8 @@ _bt_insert_parent(Relation rel, elog(DEBUG2, "concurrent ROOT page split"); lpageop = (BTPageOpaque) PageGetSpecialPointer(page); /* Find the leftmost page at the next level up */ - pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false); + pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false, + NULL); /* Set up a phony stack entry pointing there */ stack = &fakestack; stack->bts_blkno = BufferGetBlockNumber(pbuf); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 36b18047615..9ba61d5fe13 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -1255,7 +1255,7 @@ _bt_pagedel(Relation rel, Buffer buf) itup_scankey = _bt_mkscankey(rel, targetkey); /* find the leftmost leaf page containing this key */ stack = _bt_search(rel, rel->rd_rel->relnatts, itup_scankey, - false, &lbuf, BT_READ); + false, &lbuf, BT_READ, NULL); /* don't need a pin on the page */ _bt_relbuf(rel, lbuf); diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 83f790f7917..470bab0c521 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -79,6 +79,10 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp) * address of the leaf-page buffer, which is read-locked and pinned. * No locks are held on the parent pages, however! * + * If the snapshot parameter is not NULL, "old snapshot" checking will take + * place during the descent through the tree. This is not needed when + * positioning for an insert or delete, so NULL is used for those cases. + * * NOTE that the returned buffer is read-locked regardless of the access * parameter. However, access = BT_WRITE will allow an empty root page * to be created and returned. When access = BT_READ, an empty index @@ -87,7 +91,7 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp) */ BTStack _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, - Buffer *bufP, int access) + Buffer *bufP, int access, Snapshot snapshot) { BTStack stack_in = NULL; @@ -126,7 +130,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, */ *bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey, (access == BT_WRITE), stack_in, - BT_READ); + BT_READ, snapshot); /* if this is a leaf page, we're done */ page = BufferGetPage(*bufP, NULL, NULL, BGP_NO_SNAPSHOT_TEST); @@ -199,6 +203,10 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, * On entry, we have the buffer pinned and a lock of the type specified by * 'access'. If we move right, we release the buffer and lock and acquire * the same on the right sibling. Return value is the buffer we stop at. + * + * If the snapshot parameter is not NULL, "old snapshot" checking will take + * place during the descent through the tree. This is not needed when + * positioning for an insert or delete, so NULL is used for those cases. */ Buffer _bt_moveright(Relation rel, @@ -208,7 +216,8 @@ _bt_moveright(Relation rel, bool nextkey, bool forupdate, BTStack stack, - int access) + int access, + Snapshot snapshot) { Page page; BTPageOpaque opaque; @@ -233,7 +242,7 @@ _bt_moveright(Relation rel, for (;;) { - page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (P_RIGHTMOST(opaque)) @@ -972,7 +981,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * Use the manufactured insertion scan key to descend the tree and * position ourselves on the target leaf page. */ - stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ); + stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ, + scan->xs_snapshot); /* don't need to keep the stack around... */ _bt_freestack(stack); @@ -1337,8 +1347,8 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir) /* step right one page */ so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ); /* check for deleted page */ - page = BufferGetPage(so->currPos.buf, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(so->currPos.buf, scan->xs_snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { @@ -1412,8 +1422,8 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir) * it's not half-dead and contains matching tuples. Else loop back * and do it all again. */ - page = BufferGetPage(so->currPos.buf, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(so->currPos.buf, scan->xs_snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { @@ -1476,7 +1486,7 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot) /* check for interrupts while we're not holding any buffer lock */ CHECK_FOR_INTERRUPTS(); buf = _bt_getbuf(rel, blkno, BT_READ); - page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* @@ -1502,14 +1512,14 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot) break; blkno = opaque->btpo_next; buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); - page = BufferGetPage(buf, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); } /* Return to the original page to see what's up */ buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ); - page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (P_ISDELETED(opaque)) { @@ -1526,8 +1536,8 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot) RelationGetRelationName(rel)); blkno = opaque->btpo_next; buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); - page = BufferGetPage(buf, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_ISDELETED(opaque)) break; @@ -1564,7 +1574,8 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot) * The returned buffer is pinned and read-locked. */ Buffer -_bt_get_endpoint(Relation rel, uint32 level, bool rightmost) +_bt_get_endpoint(Relation rel, uint32 level, bool rightmost, + Snapshot snapshot) { Buffer buf; Page page; @@ -1586,7 +1597,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) if (!BufferIsValid(buf)) return InvalidBuffer; - page = BufferGetPage(buf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, snapshot, rel, BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); for (;;) @@ -1605,8 +1616,8 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) elog(ERROR, "fell off the end of index \"%s\"", RelationGetRelationName(rel)); buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); - page = BufferGetPage(buf, NULL, NULL, - BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buf, snapshot, rel, + BGP_TEST_FOR_OLD_SNAPSHOT); opaque = (BTPageOpaque) PageGetSpecialPointer(page); } @@ -1659,7 +1670,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) * version of _bt_search(). We don't maintain a stack since we know we * won't need it. */ - buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir)); + buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir), scan->xs_snapshot); if (!BufferIsValid(buf)) { diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index fafdca31f39..7acd71a2911 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -341,7 +341,7 @@ redirect: } /* else new pointer points to the same page, no work needed */ - page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); + page = BufferGetPage(buffer, snapshot, index, BGP_TEST_FOR_OLD_SNAPSHOT); isnull = SpGistPageStoresNulls(page) ? true : false; |