diff options
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/common/reloptions.c | 17 | ||||
-rw-r--r-- | src/backend/access/gist/gistutil.c | 14 | ||||
-rw-r--r-- | src/backend/access/gist/gistvalidate.c | 6 | ||||
-rw-r--r-- | src/backend/access/heap/heapam.c | 6 | ||||
-rw-r--r-- | src/backend/access/heap/heapam_handler.c | 2 | ||||
-rw-r--r-- | src/backend/access/heap/vacuumlazy.c | 3 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 30 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtsearch.c | 70 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtutils.c | 93 | ||||
-rw-r--r-- | src/backend/access/transam/xact.c | 28 |
10 files changed, 165 insertions, 104 deletions
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 46c1dce222d..50747c16396 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -1243,8 +1243,9 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, } else { - text *t; + const char *name; const char *value; + text *t; Size len; /* @@ -1291,11 +1292,19 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, * have just "name", assume "name=true" is meant. Note: the * namespace is not output. */ + name = def->defname; if (def->arg != NULL) value = defGetString(def); else value = "true"; + /* Insist that name not contain "=", else "a=b=c" is ambiguous */ + if (strchr(name, '=') != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid option name \"%s\": must not contain \"=\"", + name))); + /* * This is not a great place for this test, but there's no other * convenient place to filter the option out. As WITH (oids = @@ -1303,7 +1312,7 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, * amount of ugly. */ if (acceptOidsOff && def->defnamespace == NULL && - strcmp(def->defname, "oids") == 0) + strcmp(name, "oids") == 0) { if (defGetBoolean(def)) ereport(ERROR, @@ -1313,11 +1322,11 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, continue; } - len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value); + len = VARHDRSZ + strlen(name) + 1 + strlen(value); /* +1 leaves room for sprintf's trailing null */ t = (text *) palloc(len + 1); SET_VARSIZE(t, len); - sprintf(VARDATA(t), "%s=%s", def->defname, value); + sprintf(VARDATA(t), "%s=%s", name, value); astate = accumArrayResult(astate, PointerGetDatum(t), false, TEXTOID, diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index a6b701943d3..c0aa7d0222f 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -1058,11 +1058,11 @@ gistGetFakeLSN(Relation rel) } /* - * This is a stratnum support function for GiST opclasses that use the - * RT*StrategyNumber constants. + * This is a stratnum translation support function for GiST opclasses that use + * the RT*StrategyNumber constants. */ Datum -gist_stratnum_common(PG_FUNCTION_ARGS) +gist_translate_cmptype_common(PG_FUNCTION_ARGS) { CompareType cmptype = PG_GETARG_INT32(0); @@ -1090,9 +1090,9 @@ gist_stratnum_common(PG_FUNCTION_ARGS) /* * Returns the opclass's private stratnum used for the given compare type. * - * Calls the opclass's GIST_STRATNUM_PROC support function, if any, - * and returns the result. - * Returns InvalidStrategy if the function is not defined. + * Calls the opclass's GIST_TRANSLATE_CMPTYPE_PROC support function, if any, + * and returns the result. Returns InvalidStrategy if the function is not + * defined. */ StrategyNumber gisttranslatecmptype(CompareType cmptype, Oid opfamily) @@ -1101,7 +1101,7 @@ gisttranslatecmptype(CompareType cmptype, Oid opfamily) Datum result; /* Check whether the function is provided. */ - funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_STRATNUM_PROC); + funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_TRANSLATE_CMPTYPE_PROC); if (!OidIsValid(funcid)) return InvalidStrategy; diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c index 2a49e6d20f0..2ed6f74fce9 100644 --- a/src/backend/access/gist/gistvalidate.c +++ b/src/backend/access/gist/gistvalidate.c @@ -138,7 +138,7 @@ gistvalidate(Oid opclassoid) ok = check_amproc_signature(procform->amproc, VOIDOID, true, 1, 1, INTERNALOID); break; - case GIST_STRATNUM_PROC: + case GIST_TRANSLATE_CMPTYPE_PROC: ok = check_amproc_signature(procform->amproc, INT2OID, true, 1, 1, INT4OID) && procform->amproclefttype == ANYOID && @@ -265,7 +265,7 @@ gistvalidate(Oid opclassoid) if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC || i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC || i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC || - i == GIST_STRATNUM_PROC) + i == GIST_TRANSLATE_CMPTYPE_PROC) continue; /* optional methods */ ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -336,7 +336,7 @@ gistadjustmembers(Oid opfamilyoid, case GIST_FETCH_PROC: case GIST_OPTIONS_PROC: case GIST_SORTSUPPORT_PROC: - case GIST_STRATNUM_PROC: + case GIST_TRANSLATE_CMPTYPE_PROC: /* Optional, so force it to be a soft family dependency */ op->ref_is_hard = false; op->ref_is_family = true; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 2be7f817c78..0dcd6ee817e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -4982,7 +4982,7 @@ l3: case LockWaitError: if (!ConditionalMultiXactIdWait((MultiXactId) xwait, status, infomask, relation, - NULL, log_lock_failure)) + NULL, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", @@ -5020,7 +5020,7 @@ l3: } break; case LockWaitError: - if (!ConditionalXactLockTableWait(xwait, log_lock_failure)) + if (!ConditionalXactLockTableWait(xwait, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", @@ -5285,7 +5285,7 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, break; case LockWaitError: - if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure)) + if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index ac082fefa77..cb4bc35c93e 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -464,7 +464,7 @@ tuple_lock_retry: return TM_WouldBlock; break; case LockWaitError: - if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure)) + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 708674d8fcf..09416450af9 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -757,7 +757,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->vm_new_visible_pages = 0; vacrel->vm_new_visible_frozen_pages = 0; vacrel->vm_new_frozen_pages = 0; - vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); /* * Get cutoffs that determine which deleted tuples are considered DEAD, @@ -776,7 +775,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * to increase the number of dead tuples it can prune away.) */ vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs); + vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); vacrel->vistest = GlobalVisTestFor(rel); + /* Initialize state used to track oldest extant XID/MXID */ vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin; vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact; diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 765659887af..03a1d7b027a 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; + Assert(scan->heapRelation != NULL); + /* btree indexes are never lossy */ scan->xs_recheck = false; @@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) int64 ntids = 0; ItemPointer heapTid; + Assert(scan->heapRelation == NULL); + /* Each loop iteration performs another primitive index scan */ do { @@ -393,6 +397,32 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, BTScanPosInvalidate(so->currPos); } + /* + * We prefer to eagerly drop leaf page pins before btgettuple returns. + * This avoids making VACUUM wait to acquire a cleanup lock on the page. + * + * We cannot safely drop leaf page pins during index-only scans due to a + * race condition involving VACUUM setting pages all-visible in the VM. + * It's also unsafe for plain index scans that use a non-MVCC snapshot. + * + * When we drop pins eagerly, the mechanism that marks so->killedItems[] + * index tuples LP_DEAD has to deal with concurrent TID recycling races. + * The scheme used to detect unsafe TID recycling won't work when scanning + * unlogged relations (since it involves saving an affected page's LSN). + * Opt out of eager pin dropping during unlogged relation scans for now + * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting). + * + * Also opt out of dropping leaf page pins eagerly during bitmap scans. + * Pins cannot be held for more than an instant during bitmap scans either + * way, so we might as well avoid wasting cycles on acquiring page LSNs. + * + * See nbtree/README section on making concurrent TID recycling safe. + */ + so->dropPin = (!scan->xs_want_itup && + IsMVCCSnapshot(scan->xs_snapshot) && + RelationNeedsWAL(scan->indexRelation) && + scan->heapRelation != NULL); + so->markItemIndex = -1; so->needPrimScan = false; so->scanBehind = false; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index fe9a3886913..070f14c8b91 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -25,7 +25,7 @@ #include "utils/rel.h" -static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp); +static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so); static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key, Buffer buf, bool forupdate, BTStack stack, int access); @@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); /* * _bt_drop_lock_and_maybe_pin() * - * Unlock the buffer; and if it is safe to release the pin, do that, too. - * This will prevent vacuum from stalling in a blocked state trying to read a - * page when a cursor is sitting on it. - * - * See nbtree/README section on making concurrent TID recycling safe. + * Unlock so->currPos.buf. If scan is so->dropPin, drop the pin, too. + * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock. */ -static void -_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp) +static inline void +_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so) { - _bt_unlockbuf(scan->indexRelation, sp->buf); - - if (IsMVCCSnapshot(scan->xs_snapshot) && - RelationNeedsWAL(scan->indexRelation) && - !scan->xs_want_itup) + if (!so->dropPin) { - ReleaseBuffer(sp->buf); - sp->buf = InvalidBuffer; + /* Just drop the lock (not the pin) */ + _bt_unlockbuf(rel, so->currPos.buf); + return; } + + /* + * Drop both the lock and the pin. + * + * Have to set so->currPos.lsn so that _bt_killitems has a way to detect + * when concurrent heap TID recycling by VACUUM might have taken place. + */ + Assert(RelationNeedsWAL(rel)); + so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); + _bt_relbuf(rel, so->currPos.buf); + so->currPos.buf = InvalidBuffer; } /* @@ -866,8 +871,8 @@ _bt_compare(Relation rel, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, data about the * matching tuple(s) on the page has been loaded into so->currPos. We'll - * drop all locks and hold onto a pin on page's buffer, except when - * _bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM. + * drop all locks and hold onto a pin on page's buffer, except during + * so->dropPin scans, when we drop both the lock and the pin. * _bt_returnitem sets the next item to return to scan on success exit. * * If there are no matching items in the index, we return false, with no @@ -1610,7 +1615,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf); so->currPos.prevPage = opaque->btpo_prev; so->currPos.nextPage = opaque->btpo_next; + /* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */ + so->currPos.dir = dir; + so->currPos.nextTupleOffset = 0; + /* either moreRight or moreLeft should be set now (may be unset later) */ + Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight : + so->currPos.moreLeft); Assert(!P_IGNORE(opaque)); Assert(BTScanPosIsPinned(so->currPos)); Assert(!so->needPrimScan); @@ -1626,14 +1637,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->currPos.currPage); } - /* initialize remaining currPos fields related to current page */ - so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); - so->currPos.dir = dir; - so->currPos.nextTupleOffset = 0; - /* either moreLeft or moreRight should be set now (may be unset later) */ - Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight : - so->currPos.moreLeft); - PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot); /* initialize local variables */ @@ -2107,10 +2110,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so) * * Wrapper on _bt_readnextpage that performs final steps for the current page. * - * On entry, if so->currPos.buf is valid the buffer is pinned but not locked. - * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped - * the pin eagerly earlier on. The scan must have so->currPos.currPage set to - * a valid block, in any case. + * On entry, so->currPos must be valid. Its buffer will be pinned, though + * never locked. (Actually, when so->dropPin there won't even be a pin held, + * though so->currPos.currPage must still be set to a valid block number.) */ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir) @@ -2251,12 +2253,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) */ if (_bt_readpage(scan, dir, offnum, true)) { + Relation rel = scan->indexRelation; + /* * _bt_readpage succeeded. Drop the lock (and maybe the pin) on * so->currPos.buf in preparation for btgettuple returning tuples. */ Assert(BTScanPosIsPinned(so->currPos)); - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + _bt_drop_lock_and_maybe_pin(rel, so); return true; } @@ -2294,8 +2298,8 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) * * On success exit, so->currPos is updated to contain data from the next * interesting page, and we return true. We hold a pin on the buffer on - * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe - * to eagerly drop the pin (to avoid blocking VACUUM). + * success exit (except during so->dropPin index scans, when we drop the pin + * eagerly to avoid blocking VACUUM). * * If there are no more matching records in the given direction, we drop all * locks and pins, invalidate so->currPos, and return false. @@ -2413,7 +2417,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, */ Assert(so->currPos.currPage == blkno); Assert(BTScanPosIsPinned(so->currPos)); - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + _bt_drop_lock_and_maybe_pin(rel, so); return true; } diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 1a15dfcb7d3..29f0dca1b08 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -63,7 +63,7 @@ static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir, bool *continuescan, int *ikey); static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, - ScanDirection dir, bool forcenonrequired, bool *continuescan); + ScanDirection dir, bool *continuescan); static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, int tupnatts, TupleDesc tupdesc); static int _bt_keep_natts(Relation rel, IndexTuple lastleft, @@ -2902,8 +2902,10 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { + Assert(!forcenonrequired); /* forbidden by _bt_set_startikey */ + if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir, - forcenonrequired, continuescan)) + continuescan)) continue; return false; } @@ -3060,8 +3062,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, */ static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, - TupleDesc tupdesc, ScanDirection dir, - bool forcenonrequired, bool *continuescan) + TupleDesc tupdesc, ScanDirection dir, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; @@ -3101,11 +3102,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, if (isNull) { - if (forcenonrequired) - { - /* treating scan's keys as non-required */ - } - else if (subkey->sk_flags & SK_BT_NULLS_FIRST) + if (subkey->sk_flags & SK_BT_NULLS_FIRST) { /* * Since NULLs are sorted before non-NULLs, we know we have @@ -3159,12 +3156,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, */ Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); subkey--; - if (forcenonrequired) - { - /* treating scan's keys as non-required */ - } - else if ((subkey->sk_flags & SK_BT_REQFWD) && - ScanDirectionIsForward(dir)) + if ((subkey->sk_flags & SK_BT_REQFWD) && + ScanDirectionIsForward(dir)) *continuescan = false; else if ((subkey->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) @@ -3216,7 +3209,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, break; } - if (!result && !forcenonrequired) + if (!result) { /* * Tuple fails this qual. If it's a required qual for the current @@ -3342,75 +3335,71 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, * * Note that if we hold a pin on the target page continuously from initially * reading the items until applying this function, VACUUM cannot have deleted - * any items from the page, and so there is no need to search left from the - * recorded offset. (This observation also guarantees that the item is still - * the right one to delete, which might otherwise be questionable since heap - * TIDs can get recycled.) This holds true even if the page has been modified - * by inserts and page splits, so there is no need to consult the LSN. - * - * If the pin was released after reading the page, then we re-read it. If it - * has been modified since we read it (as determined by the LSN), we dare not - * flag any entries because it is possible that the old entry was vacuumed - * away and the TID was re-used by a completely different heap tuple. + * any items on the page, so the page's TIDs can't have been recycled by now. + * There's no risk that we'll confuse a new index tuple that happens to use a + * recycled TID with a now-removed tuple with the same TID (that used to be on + * this same page). We can't rely on that during scans that drop pins eagerly + * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on + * the page LSN having not changed since back when _bt_readpage saw the page. */ void _bt_killitems(IndexScanDesc scan) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Page page; BTPageOpaque opaque; OffsetNumber minoff; OffsetNumber maxoff; - int i; int numKilled = so->numKilled; bool killedsomething = false; - bool droppedpin PG_USED_FOR_ASSERTS_ONLY; + Assert(numKilled > 0); Assert(BTScanPosIsValid(so->currPos)); + Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */ - /* - * Always reset the scan state, so we don't look for same items on other - * pages. - */ + /* Always invalidate so->killedItems[] before leaving so->currPos */ so->numKilled = 0; - if (BTScanPosIsPinned(so->currPos)) + if (!so->dropPin) { /* * We have held the pin on this page since we read the index tuples, * so all we need to do is lock it. The pin will have prevented - * re-use of any TID on the page, so there is no need to check the - * LSN. + * concurrent VACUUMs from recycling any of the TIDs on the page. */ - droppedpin = false; - _bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ); - - page = BufferGetPage(so->currPos.buf); + Assert(BTScanPosIsPinned(so->currPos)); + _bt_lockbuf(rel, so->currPos.buf, BT_READ); } else { Buffer buf; + XLogRecPtr latestlsn; - droppedpin = true; - /* Attempt to re-read the buffer, getting pin and lock. */ - buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ); + Assert(!BTScanPosIsPinned(so->currPos)); + Assert(RelationNeedsWAL(rel)); + buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ); - page = BufferGetPage(buf); - if (BufferGetLSNAtomic(buf) == so->currPos.lsn) - so->currPos.buf = buf; - else + latestlsn = BufferGetLSNAtomic(buf); + Assert(!XLogRecPtrIsInvalid(so->currPos.lsn)); + Assert(so->currPos.lsn <= latestlsn); + if (so->currPos.lsn != latestlsn) { - /* Modified while not pinned means hinting is not safe. */ - _bt_relbuf(scan->indexRelation, buf); + /* Modified, give up on hinting */ + _bt_relbuf(rel, buf); return; } + + /* Unmodified, hinting is safe */ + so->currPos.buf = buf; } + page = BufferGetPage(so->currPos.buf); opaque = BTPageGetOpaque(page); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); - for (i = 0; i < numKilled; i++) + for (int i = 0; i < numKilled; i++) { int itemIndex = so->killedItems[i]; BTScanPosItem *kitem = &so->currPos.items[itemIndex]; @@ -3442,7 +3431,7 @@ _bt_killitems(IndexScanDesc scan) * correctness. * * Note that the page may have been modified in almost any way - * since we first read it (in the !droppedpin case), so it's + * since we first read it (in the !so->dropPin case), so it's * possible that this posting list tuple wasn't a posting list * tuple when we first encountered its heap TIDs. */ @@ -3458,7 +3447,7 @@ _bt_killitems(IndexScanDesc scan) * though only in the common case where the page can't * have been concurrently modified */ - Assert(kitem->indexOffset == offnum || !droppedpin); + Assert(kitem->indexOffset == offnum || !so->dropPin); /* * Read-ahead to later kitems here. @@ -3525,7 +3514,7 @@ _bt_killitems(IndexScanDesc scan) MarkBufferDirtyHint(so->currPos.buf, true); } - _bt_unlockbuf(scan->indexRelation, so->currPos.buf); + _bt_unlockbuf(rel, so->currPos.buf); } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b885513f765..2e67e998adb 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1045,6 +1045,34 @@ TransactionStartedDuringRecovery(void) } /* + * GetTopReadOnlyTransactionNestLevel + * + * Note: this will return zero when not inside any transaction or when neither + * a top-level transaction nor subtransactions are read-only, one when the + * top-level transaction is read-only, two when one level of subtransaction is + * read-only, etc. + * + * Note: subtransactions of the topmost read-only transaction are also + * read-only, because they inherit read-only mode from the transaction, and + * thus can't change to read-write mode. See check_transaction_read_only(). + */ +int +GetTopReadOnlyTransactionNestLevel(void) +{ + TransactionState s = CurrentTransactionState; + + if (!XactReadOnly) + return 0; + while (s->nestingLevel > 1) + { + if (!s->prevXactReadOnly) + return s->nestingLevel; + s = s->parent; + } + return s->nestingLevel; +} + +/* * EnterParallelMode */ void |