diff options
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/index/genam.c | 4 | ||||
-rw-r--r-- | src/backend/access/index/indexam.c | 236 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 92 |
3 files changed, 246 insertions, 86 deletions
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 98832adb8a3..236e48912bb 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -93,6 +93,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) else scan->orderByData = NULL; + scan->xs_want_itup = false; /* may be set later */ + /* * During recovery we ignore killed tuples and don't bother to kill them * either. We do this because the xmin on the primary node could easily be @@ -109,6 +111,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->opaque = NULL; + scan->xs_itup = NULL; + ItemPointerSetInvalid(&scan->xs_ctup.t_self); scan->xs_ctup.t_data = NULL; scan->xs_cbuf = InvalidBuffer; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 230af9bfa3a..3e0797a5c2e 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -20,7 +20,9 @@ * index_insert - insert an index tuple into a relation * index_markpos - mark a scan position * index_restrpos - restore a scan position - * index_getnext - get the next tuple from a scan + * index_getnext_tid - get the next TID from a scan + * index_fetch_heap - get the scan's next heap tuple + * index_getnext - get the next heap tuple from a scan * index_getbitmap - get all tuples from a scan * index_bulk_delete - bulk deletion of index tuples * index_vacuum_cleanup - post-deletion cleanup of an index @@ -423,12 +425,65 @@ index_restrpos(IndexScanDesc scan) } /* ---------------- - * index_getnext - get the next heap tuple from a scan + * index_getnext_tid - get the next TID from a scan * - * The result is the next heap tuple satisfying the scan keys and the - * snapshot, or NULL if no more matching tuples exist. On success, - * the buffer containing the heap tuple is pinned (the pin will be dropped - * at the next index_getnext or index_endscan). + * The result is the next TID satisfying the scan keys, + * or NULL if no more matching tuples exist. + * ---------------- + */ +ItemPointer +index_getnext_tid(IndexScanDesc scan, ScanDirection direction) +{ + FmgrInfo *procedure; + bool found; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(amgettuple); + + Assert(TransactionIdIsValid(RecentGlobalXmin)); + + /* + * The AM's gettuple proc finds the next index entry matching the scan + * keys, and puts the TID in xs_ctup.t_self. It should also set + * scan->xs_recheck, though we pay no attention to that here. + */ + found = DatumGetBool(FunctionCall2(procedure, + PointerGetDatum(scan), + Int32GetDatum(direction))); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + + /* If we're out of index entries, we're done */ + if (!found) + { + /* ... but first, release any held pin on a heap page */ + if (BufferIsValid(scan->xs_cbuf)) + { + ReleaseBuffer(scan->xs_cbuf); + scan->xs_cbuf = InvalidBuffer; + } + return NULL; + } + + pgstat_count_index_tuples(scan->indexRelation, 1); + + /* Return the TID of the tuple we found. */ + return &scan->xs_ctup.t_self; +} + +/* ---------------- + * index_fetch_heap - get the scan's next heap tuple + * + * The result is a visible heap tuple associated with the index TID most + * recently fetched by index_getnext_tid, or NULL if no more matching tuples + * exist. (There can be more than one matching tuple because of HOT chains, + * although when using an MVCC snapshot it should be impossible for more than + * one such tuple to exist.) + * + * On success, the buffer containing the heap tup is pinned (the pin will be + * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan + * call). * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have @@ -436,22 +491,90 @@ index_restrpos(IndexScanDesc scan) * ---------------- */ HeapTuple -index_getnext(IndexScanDesc scan, ScanDirection direction) +index_fetch_heap(IndexScanDesc scan) { - HeapTuple heapTuple = &scan->xs_ctup; - ItemPointer tid = &heapTuple->t_self; - FmgrInfo *procedure; + ItemPointer tid = &scan->xs_ctup.t_self; bool all_dead = false; + bool got_heap_tuple; - SCAN_CHECKS; - GET_SCAN_PROCEDURE(amgettuple); + /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ + if (!scan->xs_continue_hot) + { + /* Switch to correct buffer if we don't have it already */ + Buffer prev_buf = scan->xs_cbuf; - Assert(TransactionIdIsValid(RecentGlobalXmin)); + scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, + scan->heapRelation, + ItemPointerGetBlockNumber(tid)); - for (;;) + /* + * Prune page, but only if we weren't already on this page + */ + if (prev_buf != scan->xs_cbuf) + heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf, + RecentGlobalXmin); + } + + /* Obtain share-lock on the buffer so we can examine visibility */ + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); + got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, + scan->xs_cbuf, + scan->xs_snapshot, + &scan->xs_ctup, + &all_dead, + !scan->xs_continue_hot); + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + + if (got_heap_tuple) { - bool got_heap_tuple; + /* + * Only in a non-MVCC snapshot can more than one member of the + * HOT chain be visible. + */ + scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); + pgstat_count_heap_fetch(scan->indexRelation); + return &scan->xs_ctup; + } + + /* We've reached the end of the HOT chain. */ + scan->xs_continue_hot = false; + + /* + * If we scanned a whole HOT chain and found only dead tuples, tell index + * AM to kill its entry for that TID (this will take effect in the next + * amgettuple call, in index_getnext_tid). We do not do this when in + * recovery because it may violate MVCC to do so. See comments in + * RelationGetIndexScan(). + */ + if (!scan->xactStartedInRecovery) + scan->kill_prior_tuple = all_dead; + return NULL; +} + +/* ---------------- + * index_getnext - get the next heap tuple from a scan + * + * The result is the next heap tuple satisfying the scan keys and the + * snapshot, or NULL if no more matching tuples exist. + * + * On success, the buffer containing the heap tup is pinned (the pin will be + * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan + * call). + * + * Note: caller must check scan->xs_recheck, and perform rechecking of the + * scan keys if required. We do not do that here because we don't have + * enough information to do it efficiently in the general case. + * ---------------- + */ +HeapTuple +index_getnext(IndexScanDesc scan, ScanDirection direction) +{ + HeapTuple heapTuple; + ItemPointer tid; + + for (;;) + { if (scan->xs_continue_hot) { /* @@ -459,86 +582,27 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) * earlier member. Must still hold pin on current heap page. */ Assert(BufferIsValid(scan->xs_cbuf)); - Assert(ItemPointerGetBlockNumber(tid) == + Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) == BufferGetBlockNumber(scan->xs_cbuf)); } else { - bool found; - Buffer prev_buf; - - /* - * If we scanned a whole HOT chain and found only dead tuples, - * tell index AM to kill its entry for that TID. We do not do this - * when in recovery because it may violate MVCC to do so. see - * comments in RelationGetIndexScan(). - */ - if (!scan->xactStartedInRecovery) - scan->kill_prior_tuple = all_dead; - - /* - * The AM's gettuple proc finds the next index entry matching the - * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It - * should also set scan->xs_recheck, though we pay no attention to - * that here. - */ - found = DatumGetBool(FunctionCall2(procedure, - PointerGetDatum(scan), - Int32GetDatum(direction))); - - /* Reset kill flag immediately for safety */ - scan->kill_prior_tuple = false; + /* Time to fetch the next TID from the index */ + tid = index_getnext_tid(scan, direction); - /* If we're out of index entries, break out of outer loop */ - if (!found) + /* If we're out of index entries, we're done */ + if (tid == NULL) break; - - pgstat_count_index_tuples(scan->indexRelation, 1); - - /* Switch to correct buffer if we don't have it already */ - prev_buf = scan->xs_cbuf; - scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, - scan->heapRelation, - ItemPointerGetBlockNumber(tid)); - - /* - * Prune page, but only if we weren't already on this page - */ - if (prev_buf != scan->xs_cbuf) - heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf, - RecentGlobalXmin); } - /* Obtain share-lock on the buffer so we can examine visibility */ - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); - got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, - scan->xs_cbuf, - scan->xs_snapshot, - &scan->xs_ctup, - &all_dead, - !scan->xs_continue_hot); - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); - - if (got_heap_tuple) - { - /* - * Only in a non-MVCC snapshot can more than one member of the - * HOT chain be visible. - */ - scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); - pgstat_count_heap_fetch(scan->indexRelation); + /* + * Fetch the next (or only) visible heap tuple for this index entry. + * If we don't find anything, loop around and grab the next TID from + * the index. + */ + heapTuple = index_fetch_heap(scan); + if (heapTuple != NULL) return heapTuple; - } - - /* Loop around to ask index AM for another TID */ - scan->xs_continue_hot = false; - } - - /* Release any held pin on a heap page */ - if (BufferIsValid(scan->xs_cbuf)) - { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; } return NULL; /* failure exit */ diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 996611516fe..11b57659ee8 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -73,6 +73,7 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, BTCycleId cycleid); static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno); +static IndexTuple bt_getindextuple(IndexScanDesc scan); /* @@ -310,10 +311,95 @@ btgettuple(PG_FUNCTION_ARGS) else res = _bt_first(scan, dir); + /* Return the whole index tuple if requested */ + if (scan->xs_want_itup) + { + /* First, free the last one ... */ + if (scan->xs_itup != NULL) + { + pfree(scan->xs_itup); + scan->xs_itup = NULL; + } + + if (res) + scan->xs_itup = bt_getindextuple(scan); + } + PG_RETURN_BOOL(res); } /* + * bt_getindextuple - fetch index tuple at current position. + * + * This can fail to find the tuple if new tuples have been inserted on the + * index page since we stepped onto the page. NULL is returned in that case. + * (We could try a bit harder by searching for the TID; but if insertions + * are happening, it's reasonably likely that an index-only scan will fail + * anyway because of visibility. So probably not worth the trouble.) + * + * The tuple returned is a palloc'd copy, so that we don't need to keep a + * lock on the index page. + * + * The caller must have pin on so->currPos.buf. + */ +static IndexTuple +bt_getindextuple(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Page page; + BTPageOpaque opaque; + OffsetNumber minoff; + OffsetNumber maxoff; + int itemIndex; + OffsetNumber offnum; + IndexTuple ituple, + result; + + Assert(BufferIsValid(so->currPos.buf)); + + LockBuffer(so->currPos.buf, BT_READ); + + /* Locate the tuple, being paranoid about possibility the page changed */ + page = BufferGetPage(so->currPos.buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + minoff = P_FIRSTDATAKEY(opaque); + maxoff = PageGetMaxOffsetNumber(page); + + itemIndex = so->currPos.itemIndex; + /* pure paranoia */ + Assert(itemIndex >= so->currPos.firstItem && + itemIndex <= so->currPos.lastItem); + + offnum = so->currPos.items[itemIndex].indexOffset; + if (offnum < minoff || offnum > maxoff) + { + /* should never happen, since we have pin on page, but be careful */ + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); + return NULL; + } + + ituple = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + + if (ItemPointerEquals(&ituple->t_tid, &scan->xs_ctup.t_self)) + { + /* yup, it's the desired tuple, so make a copy */ + Size itupsz = IndexTupleSize(ituple); + + result = palloc(itupsz); + memcpy(result, ituple, itupsz); + } + else + { + /* oops, it got moved */ + result = NULL; + } + + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); + + return result; +} + +/* * btgetbitmap() -- gets all matching tuples, and adds them to a bitmap */ Datum @@ -464,6 +550,12 @@ btendscan(PG_FUNCTION_ARGS) pfree(so->keyData); pfree(so); + if (scan->xs_itup != NULL) + { + pfree(scan->xs_itup); + scan->xs_itup = NULL; + } + PG_RETURN_VOID(); } |