aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtutils.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtutils.c')
-rw-r--r--src/backend/access/nbtree/nbtutils.c93
1 files changed, 41 insertions, 52 deletions
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 1a15dfcb7d3..29f0dca1b08 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -63,7 +63,7 @@ static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
bool *continuescan, int *ikey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
- ScanDirection dir, bool forcenonrequired, bool *continuescan);
+ ScanDirection dir, bool *continuescan);
static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
int tupnatts, TupleDesc tupdesc);
static int _bt_keep_natts(Relation rel, IndexTuple lastleft,
@@ -2902,8 +2902,10 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
/* row-comparison keys need special processing */
if (key->sk_flags & SK_ROW_HEADER)
{
+ Assert(!forcenonrequired); /* forbidden by _bt_set_startikey */
+
if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir,
- forcenonrequired, continuescan))
+ continuescan))
continue;
return false;
}
@@ -3060,8 +3062,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
*/
static bool
_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
- TupleDesc tupdesc, ScanDirection dir,
- bool forcenonrequired, bool *continuescan)
+ TupleDesc tupdesc, ScanDirection dir, bool *continuescan)
{
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
int32 cmpresult = 0;
@@ -3101,11 +3102,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
if (isNull)
{
- if (forcenonrequired)
- {
- /* treating scan's keys as non-required */
- }
- else if (subkey->sk_flags & SK_BT_NULLS_FIRST)
+ if (subkey->sk_flags & SK_BT_NULLS_FIRST)
{
/*
* Since NULLs are sorted before non-NULLs, we know we have
@@ -3159,12 +3156,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
*/
Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
subkey--;
- if (forcenonrequired)
- {
- /* treating scan's keys as non-required */
- }
- else if ((subkey->sk_flags & SK_BT_REQFWD) &&
- ScanDirectionIsForward(dir))
+ if ((subkey->sk_flags & SK_BT_REQFWD) &&
+ ScanDirectionIsForward(dir))
*continuescan = false;
else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
ScanDirectionIsBackward(dir))
@@ -3216,7 +3209,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
break;
}
- if (!result && !forcenonrequired)
+ if (!result)
{
/*
* Tuple fails this qual. If it's a required qual for the current
@@ -3342,75 +3335,71 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
*
* Note that if we hold a pin on the target page continuously from initially
* reading the items until applying this function, VACUUM cannot have deleted
- * any items from the page, and so there is no need to search left from the
- * recorded offset. (This observation also guarantees that the item is still
- * the right one to delete, which might otherwise be questionable since heap
- * TIDs can get recycled.) This holds true even if the page has been modified
- * by inserts and page splits, so there is no need to consult the LSN.
- *
- * If the pin was released after reading the page, then we re-read it. If it
- * has been modified since we read it (as determined by the LSN), we dare not
- * flag any entries because it is possible that the old entry was vacuumed
- * away and the TID was re-used by a completely different heap tuple.
+ * any items on the page, so the page's TIDs can't have been recycled by now.
+ * There's no risk that we'll confuse a new index tuple that happens to use a
+ * recycled TID with a now-removed tuple with the same TID (that used to be on
+ * this same page). We can't rely on that during scans that drop pins eagerly
+ * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on
+ * the page LSN having not changed since back when _bt_readpage saw the page.
*/
void
_bt_killitems(IndexScanDesc scan)
{
+ Relation rel = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
Page page;
BTPageOpaque opaque;
OffsetNumber minoff;
OffsetNumber maxoff;
- int i;
int numKilled = so->numKilled;
bool killedsomething = false;
- bool droppedpin PG_USED_FOR_ASSERTS_ONLY;
+ Assert(numKilled > 0);
Assert(BTScanPosIsValid(so->currPos));
+ Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */
- /*
- * Always reset the scan state, so we don't look for same items on other
- * pages.
- */
+ /* Always invalidate so->killedItems[] before leaving so->currPos */
so->numKilled = 0;
- if (BTScanPosIsPinned(so->currPos))
+ if (!so->dropPin)
{
/*
* We have held the pin on this page since we read the index tuples,
* so all we need to do is lock it. The pin will have prevented
- * re-use of any TID on the page, so there is no need to check the
- * LSN.
+ * concurrent VACUUMs from recycling any of the TIDs on the page.
*/
- droppedpin = false;
- _bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ);
-
- page = BufferGetPage(so->currPos.buf);
+ Assert(BTScanPosIsPinned(so->currPos));
+ _bt_lockbuf(rel, so->currPos.buf, BT_READ);
}
else
{
Buffer buf;
+ XLogRecPtr latestlsn;
- droppedpin = true;
- /* Attempt to re-read the buffer, getting pin and lock. */
- buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
+ Assert(!BTScanPosIsPinned(so->currPos));
+ Assert(RelationNeedsWAL(rel));
+ buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ);
- page = BufferGetPage(buf);
- if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
- so->currPos.buf = buf;
- else
+ latestlsn = BufferGetLSNAtomic(buf);
+ Assert(!XLogRecPtrIsInvalid(so->currPos.lsn));
+ Assert(so->currPos.lsn <= latestlsn);
+ if (so->currPos.lsn != latestlsn)
{
- /* Modified while not pinned means hinting is not safe. */
- _bt_relbuf(scan->indexRelation, buf);
+ /* Modified, give up on hinting */
+ _bt_relbuf(rel, buf);
return;
}
+
+ /* Unmodified, hinting is safe */
+ so->currPos.buf = buf;
}
+ page = BufferGetPage(so->currPos.buf);
opaque = BTPageGetOpaque(page);
minoff = P_FIRSTDATAKEY(opaque);
maxoff = PageGetMaxOffsetNumber(page);
- for (i = 0; i < numKilled; i++)
+ for (int i = 0; i < numKilled; i++)
{
int itemIndex = so->killedItems[i];
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
@@ -3442,7 +3431,7 @@ _bt_killitems(IndexScanDesc scan)
* correctness.
*
* Note that the page may have been modified in almost any way
- * since we first read it (in the !droppedpin case), so it's
+ * since we first read it (in the !so->dropPin case), so it's
* possible that this posting list tuple wasn't a posting list
* tuple when we first encountered its heap TIDs.
*/
@@ -3458,7 +3447,7 @@ _bt_killitems(IndexScanDesc scan)
* though only in the common case where the page can't
* have been concurrently modified
*/
- Assert(kitem->indexOffset == offnum || !droppedpin);
+ Assert(kitem->indexOffset == offnum || !so->dropPin);
/*
* Read-ahead to later kitems here.
@@ -3525,7 +3514,7 @@ _bt_killitems(IndexScanDesc scan)
MarkBufferDirtyHint(so->currPos.buf, true);
}
- _bt_unlockbuf(scan->indexRelation, so->currPos.buf);
+ _bt_unlockbuf(rel, so->currPos.buf);
}