diff options
-rw-r--r-- | doc/src/sgml/config.sgml | 11 | ||||
-rw-r--r-- | src/backend/access/heap/heapam.c | 483 | ||||
-rw-r--r-- | src/backend/access/heap/vacuumlazy.c | 175 | ||||
-rw-r--r-- | src/include/access/heapam.h | 91 |
4 files changed, 463 insertions, 297 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 3071c8eace4..05b3862d09f 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9194,9 +9194,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </term> <listitem> <para> - Specifies the cutoff age (in transactions) that <command>VACUUM</command> - should use to decide whether to freeze row versions - while scanning a table. + Specifies the cutoff age (in transactions) that + <command>VACUUM</command> should use to decide whether to + trigger freezing of pages that have an older XID. The default is 50 million transactions. Although users can set this value anywhere from zero to one billion, <command>VACUUM</command> will silently limit the effective value to half @@ -9274,9 +9274,8 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; <listitem> <para> Specifies the cutoff age (in multixacts) that <command>VACUUM</command> - should use to decide whether to replace multixact IDs with a newer - transaction ID or multixact ID while scanning a table. The default - is 5 million multixacts. + should use to decide whether to trigger freezing of pages with + an older multixact ID. The default is 5 million multixacts. Although users can set this value anywhere from zero to one billion, <command>VACUUM</command> will silently limit the effective value to half the value of <xref linkend="guc-autovacuum-multixact-freeze-max-age"/>, diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 71bd071d2b0..34d83dc7024 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6098,9 +6098,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple) * MultiXactId. * * "flags" is an output value; it's used to tell caller what to do on return. - * - * "mxid_oldest_xid_out" is an output value; it's used to track the oldest - * extant Xid within any Multixact that will remain after freezing executes. + * "pagefrz" is an input/output value, used to manage page level freezing. * * Possible values that we can set in "flags": * FRM_NOOP @@ -6115,17 +6113,37 @@ heap_inplace_update(Relation relation, HeapTuple tuple) * The return value is a new MultiXactId to set as new Xmax. * (caller must obtain proper infomask bits using GetMultiXactIdHintBits) * - * "mxid_oldest_xid_out" is only set when "flags" contains either FRM_NOOP or - * FRM_RETURN_IS_MULTI, since we only leave behind a MultiXactId for these. - * - * NB: Creates a _new_ MultiXactId when FRM_RETURN_IS_MULTI is set in "flags". + * Caller delegates control of page freezing to us. In practice we always + * force freezing of caller's page unless FRM_NOOP processing is indicated. + * We help caller ensure that XIDs < FreezeLimit and MXIDs < MultiXactCutoff + * can never be left behind. We freely choose when and how to process each + * Multi, without ever violating the cutoff postconditions for freezing. + * + * It's useful to remove Multis on a proactive timeline (relative to freezing + * XIDs) to keep MultiXact member SLRU buffer misses to a minimum. It can also + * be cheaper in the short run, for us, since we too can avoid SLRU buffer + * misses through eager processing. + * + * NB: Creates a _new_ MultiXactId when FRM_RETURN_IS_MULTI is set, though only + * when FreezeLimit and/or MultiXactCutoff cutoffs leave us with no choice. + * This can usually be put off, which is usually enough to avoid it altogether. + * Allocating new multis during VACUUM should be avoided on general principle; + * only VACUUM can advance relminmxid, so allocating new Multis here comes with + * its own special risks. + * + * NB: Caller must maintain "no freeze" NewRelfrozenXid/NewRelminMxid trackers + * using heap_tuple_should_freeze when we haven't forced page-level freezing. + * + * NB: Caller should avoid needlessly calling heap_tuple_should_freeze when we + * have already forced page-level freezing, since that might incur the same + * SLRU buffer misses that we specifically intended to avoid by freezing. */ static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, - TransactionId *mxid_oldest_xid_out) + HeapPageFreeze *pagefrz) { - TransactionId newxmax = InvalidTransactionId; + TransactionId newxmax; MultiXactMember *members; int nmembers; bool need_replace; @@ -6134,7 +6152,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, bool has_lockers; TransactionId update_xid; bool update_committed; - TransactionId temp_xid_out; + TransactionId FreezePageRelfrozenXid; *flags = 0; @@ -6144,8 +6162,8 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (!MultiXactIdIsValid(multi) || HEAP_LOCKED_UPGRADED(t_infomask)) { - /* Ensure infomask bits are appropriately set/reset */ *flags |= FRM_INVALIDATE_XMAX; + pagefrz->freeze_required = true; return InvalidTransactionId; } else if (MultiXactIdPrecedes(multi, cutoffs->relminmxid)) @@ -6153,8 +6171,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("found multixact %u from before relminmxid %u", multi, cutoffs->relminmxid))); - else if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff)) + else if (MultiXactIdPrecedes(multi, cutoffs->OldestMxact)) { + TransactionId update_xact; + /* * This old multi cannot possibly have members still running, but * verify just in case. If it was a locker only, it can be removed @@ -6165,52 +6185,46 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("multixact %u from before cutoff %u found to be still running", - multi, cutoffs->MultiXactCutoff))); + errmsg_internal("multixact %u from before multi freeze cutoff %u found to be still running", + multi, cutoffs->OldestMxact))); if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask)) { *flags |= FRM_INVALIDATE_XMAX; - newxmax = InvalidTransactionId; + pagefrz->freeze_required = true; + return InvalidTransactionId; } - else - { - /* replace multi with single XID for its updater */ - newxmax = MultiXactIdGetUpdateXid(multi, t_infomask); - - /* wasn't only a lock, xid needs to be valid */ - Assert(TransactionIdIsValid(newxmax)); - - if (TransactionIdPrecedes(newxmax, cutoffs->relfrozenxid)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found update xid %u from before relfrozenxid %u", - newxmax, cutoffs->relfrozenxid))); + /* replace multi with single XID for its updater? */ + update_xact = MultiXactIdGetUpdateXid(multi, t_infomask); + if (TransactionIdPrecedes(update_xact, cutoffs->relfrozenxid)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("multixact %u contains update XID %u from before relfrozenxid %u", + multi, update_xact, + cutoffs->relfrozenxid))); + else if (TransactionIdPrecedes(update_xact, cutoffs->OldestXmin)) + { /* - * If the new xmax xid is older than OldestXmin, it has to have - * aborted, otherwise the tuple would have been pruned away + * Updater XID has to have aborted (otherwise the tuple would have + * been pruned away instead, since updater XID is < OldestXmin). + * Just remove xmax. */ - if (TransactionIdPrecedes(newxmax, cutoffs->OldestXmin)) - { - if (TransactionIdDidCommit(newxmax)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("cannot freeze committed update xid %u", newxmax))); - *flags |= FRM_INVALIDATE_XMAX; - newxmax = InvalidTransactionId; - } - else - { - *flags |= FRM_RETURN_IS_XID; - } + if (TransactionIdDidCommit(update_xact)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("multixact %u contains committed update XID %u from before removable cutoff %u", + multi, update_xact, + cutoffs->OldestXmin))); + *flags |= FRM_INVALIDATE_XMAX; + pagefrz->freeze_required = true; + return InvalidTransactionId; } - /* - * Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid, or - * when no Xids will remain - */ - return newxmax; + /* Have to keep updater XID as new xmax */ + *flags |= FRM_RETURN_IS_XID; + pagefrz->freeze_required = true; + return update_xact; } /* @@ -6225,11 +6239,30 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, { /* Nothing worth keeping */ *flags |= FRM_INVALIDATE_XMAX; + pagefrz->freeze_required = true; return InvalidTransactionId; } + /* + * The FRM_NOOP case is the only case where we might need to ratchet back + * FreezePageRelfrozenXid or FreezePageRelminMxid. It is also the only + * case where our caller might ratchet back its NoFreezePageRelfrozenXid + * or NoFreezePageRelminMxid "no freeze" trackers to deal with a multi. + * FRM_NOOP handling should result in the NewRelfrozenXid/NewRelminMxid + * trackers managed by VACUUM being ratcheting back by xmax to the degree + * required to make it safe to leave xmax undisturbed, independent of + * whether or not page freezing is triggered somewhere else. + * + * Our policy is to force freezing in every case other than FRM_NOOP, + * which obviates the need to maintain either set of trackers, anywhere. + * Every other case will reliably execute a freeze plan for xmax that + * either replaces xmax with an XID/MXID >= OldestXmin/OldestMxact, or + * sets xmax to an InvalidTransactionId XID, rendering xmax fully frozen. + * (VACUUM's NewRelfrozenXid/NewRelminMxid trackers are initialized with + * OldestXmin/OldestMxact, so later values never need to be tracked here.) + */ need_replace = false; - temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_NOOP */ + FreezePageRelfrozenXid = pagefrz->FreezePageRelfrozenXid; for (int i = 0; i < nmembers; i++) { TransactionId xid = members[i].xid; @@ -6238,26 +6271,29 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit)) { + /* Can't violate the FreezeLimit postcondition */ need_replace = true; break; } - if (TransactionIdPrecedes(members[i].xid, temp_xid_out)) - temp_xid_out = members[i].xid; + if (TransactionIdPrecedes(xid, FreezePageRelfrozenXid)) + FreezePageRelfrozenXid = xid; } - /* - * In the simplest case, there is no member older than FreezeLimit; we can - * keep the existing MultiXactId as-is, avoiding a more expensive second - * pass over the multi - */ + /* Can't violate the MultiXactCutoff postcondition, either */ + if (!need_replace) + need_replace = MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff); + if (!need_replace) { /* - * When mxid_oldest_xid_out gets pushed back here it's likely that the - * update Xid was the oldest member, but we don't rely on that + * vacuumlazy.c might ratchet back NewRelminMxid, NewRelfrozenXid, or + * both together to make it safe to retain this particular multi after + * freezing its page */ *flags |= FRM_NOOP; - *mxid_oldest_xid_out = temp_xid_out; + pagefrz->FreezePageRelfrozenXid = FreezePageRelfrozenXid; + if (MultiXactIdPrecedes(multi, pagefrz->FreezePageRelminMxid)) + pagefrz->FreezePageRelminMxid = multi; pfree(members); return multi; } @@ -6266,13 +6302,15 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * Do a more thorough second pass over the multi to figure out which * member XIDs actually need to be kept. Checking the precise status of * individual members might even show that we don't need to keep anything. + * That is quite possible even though the Multi must be >= OldestMxact, + * since our second pass only keeps member XIDs when it's truly necessary; + * even member XIDs >= OldestXmin often won't be kept by second pass. */ nnewmembers = 0; newmembers = palloc(sizeof(MultiXactMember) * nmembers); has_lockers = false; update_xid = InvalidTransactionId; update_committed = false; - temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_RETURN_IS_MULTI */ /* * Determine whether to keep each member xid, or to ignore it instead @@ -6293,14 +6331,14 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (TransactionIdIsCurrentTransactionId(xid) || TransactionIdIsInProgress(xid)) { + if (TransactionIdPrecedes(xid, cutoffs->OldestXmin)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("multixact %u contains running locker XID %u from before removable cutoff %u", + multi, xid, + cutoffs->OldestXmin))); newmembers[nnewmembers++] = members[i]; has_lockers = true; - - /* - * Cannot possibly be older than VACUUM's OldestXmin, so we - * don't need a NewRelfrozenXid step here - */ - Assert(TransactionIdPrecedesOrEquals(cutoffs->OldestXmin, xid)); } continue; @@ -6310,15 +6348,13 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * Updater XID (not locker XID). Should we keep it? * * Since the tuple wasn't totally removed when vacuum pruned, the - * update Xid cannot possibly be older than OldestXmin cutoff. The - * presence of such a tuple would cause corruption, so be paranoid and - * check. + * update Xid cannot possibly be older than OldestXmin cutoff unless + * the updater XID aborted. If the updater transaction is known + * aborted or crashed then it's okay to ignore it, otherwise not. + * + * In any case the Multi should never contain two updaters, whatever + * their individual commit status. Check for that first, in passing. */ - if (TransactionIdPrecedes(xid, cutoffs->OldestXmin)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("found update xid %u from before removable cutoff %u", - xid, cutoffs->OldestXmin))); if (TransactionIdIsValid(update_xid)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), @@ -6328,9 +6364,6 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, update_xid, xid))); /* - * If the transaction is known aborted or crashed then it's okay to - * ignore it, otherwise not. - * * As with all tuple visibility routines, it's critical to test * TransactionIdIsInProgress before TransactionIdDidCommit, because of * race conditions explained in detail in heapam_visibility.c. @@ -6358,13 +6391,15 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, } /* - * We determined that this is an Xid corresponding to an update that - * must be retained -- add it to new members list for later. Also - * consider pushing back mxid_oldest_xid_out. + * We determined that updater must be kept -- add it to pending new + * members list */ + if (TransactionIdPrecedes(xid, cutoffs->OldestXmin)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("multixact %u contains committed update XID %u from before removable cutoff %u", + multi, xid, cutoffs->OldestXmin))); newmembers[nnewmembers++] = members[i]; - if (TransactionIdPrecedes(xid, temp_xid_out)) - temp_xid_out = xid; } pfree(members); @@ -6375,10 +6410,9 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, */ if (nnewmembers == 0) { - /* nothing worth keeping!? Tell caller to remove the whole thing */ + /* Nothing worth keeping */ *flags |= FRM_INVALIDATE_XMAX; newxmax = InvalidTransactionId; - /* Don't push back mxid_oldest_xid_out -- no Xids will remain */ } else if (TransactionIdIsValid(update_xid) && !has_lockers) { @@ -6394,22 +6428,20 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (update_committed) *flags |= FRM_MARK_COMMITTED; newxmax = update_xid; - /* Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid */ } else { /* * Create a new multixact with the surviving members of the previous - * one, to set as new Xmax in the tuple. The oldest surviving member - * might push back mxid_oldest_xid_out. + * one, to set as new Xmax in the tuple */ newxmax = MultiXactIdCreateFromMembers(nnewmembers, newmembers); *flags |= FRM_RETURN_IS_MULTI; - *mxid_oldest_xid_out = temp_xid_out; } pfree(newmembers); + pagefrz->freeze_required = true; return newxmax; } @@ -6417,9 +6449,9 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * heap_prepare_freeze_tuple * * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac) - * are older than the FreezeLimit and/or MultiXactCutoff freeze cutoffs. If so, - * setup enough state (in the *frz output argument) to later execute and - * WAL-log what caller needs to do for the tuple, and return true. Return + * are older than the OldestXmin and/or OldestMxact freeze cutoffs. If so, + * setup enough state (in the *frz output argument) to enable caller to + * process this tuple as part of freezing its page, and return true. Return * false if nothing can be changed about the tuple right now. * * Also sets *totally_frozen to true if the tuple will be totally frozen once @@ -6427,22 +6459,30 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * frozen by an earlier VACUUM). This indicates that there are no remaining * XIDs or MultiXactIds that will need to be processed by a future VACUUM. * - * VACUUM caller must assemble HeapTupleFreeze entries for every tuple that we - * returned true for when called. A later heap_freeze_execute_prepared call - * will execute freezing for caller's page as a whole. + * VACUUM caller must assemble HeapTupleFreeze freeze plan entries for every + * tuple that we returned true for, and call heap_freeze_execute_prepared to + * execute freezing. Caller must initialize pagefrz fields for page as a + * whole before first call here for each heap page. + * + * VACUUM caller decides on whether or not to freeze the page as a whole. + * We'll often prepare freeze plans for a page that caller just discards. + * However, VACUUM doesn't always get to make a choice; it must freeze when + * pagefrz.freeze_required is set, to ensure that any XIDs < FreezeLimit (and + * MXIDs < MultiXactCutoff) can never be left behind. We help to make sure + * that VACUUM always follows that rule. + * + * We sometimes force freezing of xmax MultiXactId values long before it is + * strictly necessary to do so just to ensure the FreezeLimit postcondition. + * It's worth processing MultiXactIds proactively when it is cheap to do so, + * and it's convenient to make that happen by piggy-backing it on the "force + * freezing" mechanism. Conversely, we sometimes delay freezing MultiXactIds + * because it is expensive right now (though only when it's still possible to + * do so without violating the FreezeLimit/MultiXactCutoff postcondition). * * It is assumed that the caller has checked the tuple with * HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD * (else we should be removing the tuple, not freezing it). * - * The *relfrozenxid_out and *relminmxid_out arguments are the current target - * relfrozenxid and relminmxid for VACUUM caller's heap rel. Any and all - * unfrozen XIDs or MXIDs that remain in caller's rel after VACUUM finishes - * _must_ have values >= the final relfrozenxid/relminmxid values in pg_class. - * This includes XIDs that remain as MultiXact members from any tuple's xmax. - * Each call here pushes back *relfrozenxid_out and/or *relminmxid_out as - * needed to avoid unsafe final values in rel's authoritative pg_class tuple. - * * NB: This function has side effects: it might allocate a new MultiXactId. * It will be set as tuple's new xmax when our *frz output is processed within * heap_execute_freeze_tuple later on. If the tuple is in a shared buffer @@ -6451,9 +6491,8 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, - HeapTupleFreeze *frz, bool *totally_frozen, - TransactionId *relfrozenxid_out, - MultiXactId *relminmxid_out) + HeapPageFreeze *pagefrz, + HeapTupleFreeze *frz, bool *totally_frozen) { bool xmin_already_frozen = false, xmax_already_frozen = false; @@ -6470,7 +6509,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, /* * Process xmin, while keeping track of whether it's already frozen, or - * will become frozen when our freeze plan is executed by caller (could be + * will become frozen iff our freeze plan is executed by caller (could be * neither). */ xid = HeapTupleHeaderGetXmin(tuple); @@ -6484,21 +6523,14 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, errmsg_internal("found xmin %u from before relfrozenxid %u", xid, cutoffs->relfrozenxid))); - freeze_xmin = TransactionIdPrecedes(xid, cutoffs->FreezeLimit); - if (freeze_xmin) - { - if (!TransactionIdDidCommit(xid)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen", - xid, cutoffs->FreezeLimit))); - } - else - { - /* xmin to remain unfrozen. Could push back relfrozenxid_out. */ - if (TransactionIdPrecedes(xid, *relfrozenxid_out)) - *relfrozenxid_out = xid; - } + freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin); + if (freeze_xmin && !TransactionIdDidCommit(xid)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen", + xid, cutoffs->OldestXmin))); + + /* Will set freeze_xmin flags in freeze plan below */ } /* @@ -6515,41 +6547,59 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * For Xvac, we always freeze proactively. This allows totally_frozen * tracking to ignore xvac. */ - replace_xvac = true; + replace_xvac = pagefrz->freeze_required = true; + + /* Will set replace_xvac flags in freeze plan below */ } - /* - * Process xmax. To thoroughly examine the current Xmax value we need to - * resolve a MultiXactId to its member Xids, in case some of them are - * below the given FreezeLimit. In that case, those values might need - * freezing, too. Also, if a multi needs freezing, we cannot simply take - * it out --- if there's a live updater Xid, it needs to be kept. - * - * Make sure to keep heap_tuple_would_freeze in sync with this. - */ + /* Now process xmax */ xid = HeapTupleHeaderGetRawXmax(tuple); - if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { /* Raw xmax is a MultiXactId */ TransactionId newxmax; uint16 flags; - TransactionId mxid_oldest_xid_out = *relfrozenxid_out; + /* + * We will either remove xmax completely (in the "freeze_xmax" path), + * process xmax by replacing it (in the "replace_xmax" path), or + * perform no-op xmax processing. The only constraint is that the + * FreezeLimit/MultiXactCutoff postcondition must never be violated. + */ newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs, - &flags, &mxid_oldest_xid_out); + &flags, pagefrz); - if (flags & FRM_RETURN_IS_XID) + if (flags & FRM_NOOP) + { + /* + * xmax is a MultiXactId, and nothing about it changes for now. + * This is the only case where 'freeze_required' won't have been + * set for us by FreezeMultiXactId, as well as the only case where + * neither freeze_xmax nor replace_xmax are set (given a multi). + * + * This is a no-op, but the call to FreezeMultiXactId might have + * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers + * for us (the "freeze page" variants, specifically). That'll + * make it safe for our caller to freeze the page later on, while + * leaving this particular xmax undisturbed. + * + * FreezeMultiXactId is _not_ responsible for the "no freeze" + * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our + * job. A call to heap_tuple_should_freeze for this same tuple + * will take place below if 'freeze_required' isn't set already. + * (This repeats work from FreezeMultiXactId, but allows "no + * freeze" tracker maintenance to happen in only one place.) + */ + Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff)); + Assert(MultiXactIdIsValid(newxmax) && xid == newxmax); + } + else if (flags & FRM_RETURN_IS_XID) { /* * xmax will become an updater Xid (original MultiXact's updater * member Xid will be carried forward as a simple Xid in Xmax). - * Might have to ratchet back relfrozenxid_out here, though never - * relminmxid_out. */ Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin)); - if (TransactionIdPrecedes(newxmax, *relfrozenxid_out)) - *relfrozenxid_out = newxmax; /* * NB -- some of these transformations are only valid because we @@ -6572,13 +6622,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, /* * xmax is an old MultiXactId that we have to replace with a new * MultiXactId, to carry forward two or more original member XIDs. - * Might have to ratchet back relfrozenxid_out here, though never - * relminmxid_out. */ Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact)); - Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out, - *relfrozenxid_out)); - *relfrozenxid_out = mxid_oldest_xid_out; /* * We can't use GetMultiXactIdHintBits directly on the new multi @@ -6594,20 +6639,6 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, frz->xmax = newxmax; replace_xmax = true; } - else if (flags & FRM_NOOP) - { - /* - * xmax is a MultiXactId, and nothing about it changes for now. - * Might have to ratchet back relminmxid_out, relfrozenxid_out, or - * both together. - */ - Assert(MultiXactIdIsValid(newxmax) && xid == newxmax); - Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out, - *relfrozenxid_out)); - if (MultiXactIdPrecedes(xid, *relminmxid_out)) - *relminmxid_out = xid; - *relfrozenxid_out = mxid_oldest_xid_out; - } else { /* @@ -6617,9 +6648,12 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, Assert(flags & FRM_INVALIDATE_XMAX); Assert(!TransactionIdIsValid(newxmax)); - /* Will set t_infomask/t_infomask2 flags in freeze plan below */ + /* Will set freeze_xmax flags in freeze plan below */ freeze_xmax = true; } + + /* MultiXactId processing forces freezing (barring FRM_NOOP case) */ + Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax)); } else if (TransactionIdIsNormal(xid)) { @@ -6630,28 +6664,21 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, errmsg_internal("found xmax %u from before relfrozenxid %u", xid, cutoffs->relfrozenxid))); - if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit)) - { - /* - * If we freeze xmax, make absolutely sure that it's not an XID - * that is important. (Note, a lock-only xmax can be removed - * independent of committedness, since a committed lock holder has - * released the lock). - */ - if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) && - TransactionIdDidCommit(xid)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("cannot freeze committed xmax %u", - xid))); + if (TransactionIdPrecedes(xid, cutoffs->OldestXmin)) freeze_xmax = true; - /* No need for relfrozenxid_out handling, since we'll freeze xmax */ - } - else - { - if (TransactionIdPrecedes(xid, *relfrozenxid_out)) - *relfrozenxid_out = xid; - } + + /* + * If we freeze xmax, make absolutely sure that it's not an XID that + * is important. (Note, a lock-only xmax can be removed independent + * of committedness, since a committed lock holder has released the + * lock). + */ + if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) && + TransactionIdDidCommit(xid)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("cannot freeze committed xmax %u", + xid))); } else if (!TransactionIdIsValid(xid)) { @@ -6678,6 +6705,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * failed; whereas a non-dead MOVED_IN tuple must mean the xvac * transaction succeeded. */ + Assert(pagefrz->freeze_required); if (tuple->t_infomask & HEAP_MOVED_OFF) frz->frzflags |= XLH_INVALID_XVAC; else @@ -6686,8 +6714,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, if (replace_xmax) { Assert(!xmax_already_frozen && !freeze_xmax); + Assert(pagefrz->freeze_required); - /* Already set t_infomask/t_infomask2 flags in freeze plan */ + /* Already set replace_xmax flags in freeze plan earlier */ } if (freeze_xmax) { @@ -6708,13 +6737,23 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, /* * Determine if this tuple is already totally frozen, or will become - * totally frozen + * totally frozen (provided caller executes freeze plans for the page) */ *totally_frozen = ((freeze_xmin || xmin_already_frozen) && (freeze_xmax || xmax_already_frozen)); - /* A "totally_frozen" tuple must not leave anything behind in xmax */ - Assert(!*totally_frozen || !replace_xmax); + if (!pagefrz->freeze_required && !(xmin_already_frozen && + xmax_already_frozen)) + { + /* + * So far no previous tuple from the page made freezing mandatory. + * Does this tuple force caller to freeze the entire page? + */ + pagefrz->freeze_required = + heap_tuple_should_freeze(tuple, cutoffs, + &pagefrz->NoFreezePageRelfrozenXid, + &pagefrz->NoFreezePageRelminMxid); + } /* Tell caller if this tuple has a usable freeze plan set in *frz */ return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax; @@ -6760,13 +6799,12 @@ heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz) */ void heap_freeze_execute_prepared(Relation rel, Buffer buffer, - TransactionId FreezeLimit, + TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples) { Page page = BufferGetPage(buffer); Assert(ntuples > 0); - Assert(TransactionIdIsNormal(FreezeLimit)); START_CRIT_SECTION(); @@ -6789,19 +6827,10 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer, int nplans; xl_heap_freeze_page xlrec; XLogRecPtr recptr; - TransactionId snapshotConflictHorizon; /* Prepare deduplicated representation for use in WAL record */ nplans = heap_xlog_freeze_plan(tuples, ntuples, plans, offsets); - /* - * FreezeLimit is (approximately) the first XID not frozen by VACUUM. - * Back up caller's FreezeLimit to avoid false conflicts when - * FreezeLimit is precisely equal to VACUUM's OldestXmin cutoff. - */ - snapshotConflictHorizon = FreezeLimit; - TransactionIdRetreat(snapshotConflictHorizon); - xlrec.snapshotConflictHorizon = snapshotConflictHorizon; xlrec.nplans = nplans; @@ -6842,8 +6871,7 @@ heap_freeze_tuple(HeapTupleHeader tuple, bool do_freeze; bool totally_frozen; struct VacuumCutoffs cutoffs; - TransactionId NewRelfrozenXid = FreezeLimit; - MultiXactId NewRelminMxid = MultiXactCutoff; + HeapPageFreeze pagefrz; cutoffs.relfrozenxid = relfrozenxid; cutoffs.relminmxid = relminmxid; @@ -6852,9 +6880,14 @@ heap_freeze_tuple(HeapTupleHeader tuple, cutoffs.FreezeLimit = FreezeLimit; cutoffs.MultiXactCutoff = MultiXactCutoff; + pagefrz.freeze_required = true; + pagefrz.FreezePageRelfrozenXid = FreezeLimit; + pagefrz.FreezePageRelminMxid = MultiXactCutoff; + pagefrz.NoFreezePageRelfrozenXid = FreezeLimit; + pagefrz.NoFreezePageRelminMxid = MultiXactCutoff; + do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs, - &frz, &totally_frozen, - &NewRelfrozenXid, &NewRelminMxid); + &pagefrz, &frz, &totally_frozen); /* * Note that because this is not a WAL-logged operation, we don't need to @@ -7277,22 +7310,24 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) } /* - * heap_tuple_would_freeze + * heap_tuple_should_freeze * * Return value indicates if heap_prepare_freeze_tuple sibling function would - * freeze any of the XID/MXID fields from the tuple, given the same cutoffs. - * We must also deal with dead tuples here, since (xmin, xmax, xvac) fields - * could be processed by pruning away the whole tuple instead of freezing. - * - * The *relfrozenxid_out and *relminmxid_out input/output arguments work just - * like the heap_prepare_freeze_tuple arguments that they're based on. We - * never freeze here, which makes tracking the oldest extant XID/MXID simple. + * (or should) force freezing of the heap page that contains caller's tuple. + * Tuple header XIDs/MXIDs < FreezeLimit/MultiXactCutoff trigger freezing. + * This includes (xmin, xmax, xvac) fields, as well as MultiXact member XIDs. + * + * The *NoFreezePageRelfrozenXid and *NoFreezePageRelminMxid input/output + * arguments help VACUUM track the oldest extant XID/MXID remaining in rel. + * Our working assumption is that caller won't decide to freeze this tuple. + * It's up to caller to only ratchet back its own top-level trackers after the + * point that it fully commits to not freezing the tuple/page in question. */ bool -heap_tuple_would_freeze(HeapTupleHeader tuple, - const struct VacuumCutoffs *cutoffs, - TransactionId *relfrozenxid_out, - MultiXactId *relminmxid_out) +heap_tuple_should_freeze(HeapTupleHeader tuple, + const struct VacuumCutoffs *cutoffs, + TransactionId *NoFreezePageRelfrozenXid, + MultiXactId *NoFreezePageRelminMxid) { TransactionId xid; MultiXactId multi; @@ -7303,8 +7338,8 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, if (TransactionIdIsNormal(xid)) { Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid)); - if (TransactionIdPrecedes(xid, *relfrozenxid_out)) - *relfrozenxid_out = xid; + if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid)) + *NoFreezePageRelfrozenXid = xid; if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit)) freeze = true; } @@ -7321,8 +7356,8 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, { Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid)); /* xmax is a non-permanent XID */ - if (TransactionIdPrecedes(xid, *relfrozenxid_out)) - *relfrozenxid_out = xid; + if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid)) + *NoFreezePageRelfrozenXid = xid; if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit)) freeze = true; } @@ -7333,8 +7368,8 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask)) { /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */ - if (MultiXactIdPrecedes(multi, *relminmxid_out)) - *relminmxid_out = multi; + if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid)) + *NoFreezePageRelminMxid = multi; /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */ freeze = true; } @@ -7345,8 +7380,8 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, int nmembers; Assert(MultiXactIdPrecedesOrEquals(cutoffs->relminmxid, multi)); - if (MultiXactIdPrecedes(multi, *relminmxid_out)) - *relminmxid_out = multi; + if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid)) + *NoFreezePageRelminMxid = multi; if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff)) freeze = true; @@ -7358,8 +7393,8 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, { xid = members[i].xid; Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid)); - if (TransactionIdPrecedes(xid, *relfrozenxid_out)) - *relfrozenxid_out = xid; + if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid)) + *NoFreezePageRelfrozenXid = xid; if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit)) freeze = true; } @@ -7373,9 +7408,9 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, if (TransactionIdIsNormal(xid)) { Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid)); - if (TransactionIdPrecedes(xid, *relfrozenxid_out)) - *relfrozenxid_out = xid; - /* heap_prepare_freeze_tuple always freezes xvac */ + if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid)) + *NoFreezePageRelfrozenXid = xid; + /* heap_prepare_freeze_tuple forces xvac freezing */ freeze = true; } } diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 98ccb98825b..9923994b50e 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1525,8 +1525,8 @@ lazy_scan_prune(LVRelState *vacrel, live_tuples, recently_dead_tuples; int nnewlpdead; - TransactionId NewRelfrozenXid; - MultiXactId NewRelminMxid; + HeapPageFreeze pagefrz; + int64 fpi_before = pgWalUsage.wal_fpi; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; HeapTupleFreeze frozen[MaxHeapTuplesPerPage]; @@ -1542,8 +1542,11 @@ lazy_scan_prune(LVRelState *vacrel, retry: /* Initialize (or reset) page-level state */ - NewRelfrozenXid = vacrel->NewRelfrozenXid; - NewRelminMxid = vacrel->NewRelminMxid; + pagefrz.freeze_required = false; + pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid; + pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid; + pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid; + pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid; tuples_deleted = 0; tuples_frozen = 0; lpdead_items = 0; @@ -1596,27 +1599,23 @@ retry: continue; } - /* - * LP_DEAD items are processed outside of the loop. - * - * Note that we deliberately don't set hastup=true in the case of an - * LP_DEAD item here, which is not how count_nondeletable_pages() does - * it -- it only considers pages empty/truncatable when they have no - * items at all (except LP_UNUSED items). - * - * Our assumption is that any LP_DEAD items we encounter here will - * become LP_UNUSED inside lazy_vacuum_heap_page() before we actually - * call count_nondeletable_pages(). In any case our opinion of - * whether or not a page 'hastup' (which is how our caller sets its - * vacrel->nonempty_pages value) is inherently race-prone. It must be - * treated as advisory/unreliable, so we might as well be slightly - * optimistic. - */ if (ItemIdIsDead(itemid)) { + /* + * Deliberately don't set hastup for LP_DEAD items. We make the + * soft assumption that any LP_DEAD items encountered here will + * become LP_UNUSED later on, before count_nondeletable_pages is + * reached. If we don't make this assumption then rel truncation + * will only happen every other VACUUM, at most. Besides, VACUUM + * must treat hastup/nonempty_pages as provisional no matter how + * LP_DEAD items are handled (handled here, or handled later on). + * + * Also deliberately delay unsetting all_visible until just before + * we return to lazy_scan_heap caller, as explained in full below. + * (This is another case where it's useful to anticipate that any + * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.) + */ deadoffsets[lpdead_items++] = offnum; - prunestate->all_visible = false; - prunestate->has_lpdead_items = true; continue; } @@ -1743,56 +1742,105 @@ retry: prunestate->hastup = true; /* page makes rel truncation unsafe */ /* Tuple with storage -- consider need to freeze */ - if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, - &frozen[tuples_frozen], &totally_frozen, - &NewRelfrozenXid, &NewRelminMxid)) + if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz, + &frozen[tuples_frozen], &totally_frozen)) { /* Save prepared freeze plan for later */ frozen[tuples_frozen++].offset = offnum; } /* - * If tuple is not frozen (and not about to become frozen) then caller - * had better not go on to set this page's VM bit + * If any tuple isn't either totally frozen already or eligible to + * become totally frozen (according to its freeze plan), then the page + * definitely cannot be set all-frozen in the visibility map later on */ if (!totally_frozen) prunestate->all_frozen = false; } - vacrel->offnum = InvalidOffsetNumber; - /* * We have now divided every item on the page into either an LP_DEAD item * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple * that remains and needs to be considered for freezing now (LP_UNUSED and * LP_REDIRECT items also remain, but are of no further interest to us). */ - vacrel->NewRelfrozenXid = NewRelfrozenXid; - vacrel->NewRelminMxid = NewRelminMxid; + vacrel->offnum = InvalidOffsetNumber; /* - * Consider the need to freeze any items with tuple storage from the page - * first (arbitrary) + * Freeze the page when heap_prepare_freeze_tuple indicates that at least + * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also + * freeze when pruning generated an FPI, if doing so means that we set the + * page all-frozen afterwards (might not happen until final heap pass). */ - if (tuples_frozen > 0) + if (pagefrz.freeze_required || tuples_frozen == 0 || + (prunestate->all_visible && prunestate->all_frozen && + fpi_before != pgWalUsage.wal_fpi)) { - Assert(prunestate->hastup); + /* + * We're freezing the page. Our final NewRelfrozenXid doesn't need to + * be affected by the XIDs that are just about to be frozen anyway. + */ + vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid; + vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid; + + if (tuples_frozen == 0) + { + /* + * We're freezing all eligible tuples on the page, but have no + * freeze plans to execute. This is structured as a case where + * the page is nominally frozen so that we set pages all-frozen + * whenever no freeze plans need to be executed to make it safe. + * If this was handled via "no freeze" processing instead then + * VACUUM would senselessly waste certain opportunities to set + * pages all-frozen (not just all-visible) at no added cost. + * + * We never increment the frozen_pages instrumentation counter + * here, since it only counts pages with newly frozen tuples + * (don't confuse that with pages newly set all-frozen in VM). + */ + } + else + { + TransactionId snapshotConflictHorizon; + + Assert(prunestate->hastup); - vacrel->frozen_pages++; + vacrel->frozen_pages++; - /* Execute all freeze plans for page as a single atomic action */ - heap_freeze_execute_prepared(vacrel->rel, buf, - vacrel->cutoffs.FreezeLimit, - frozen, tuples_frozen); + /* + * We can use visibility_cutoff_xid as our cutoff for conflicts + * when the whole page is eligible to become all-frozen in the VM + * once we're done with it. Otherwise we generate a conservative + * cutoff by stepping back from OldestXmin. + */ + if (prunestate->all_visible && prunestate->all_frozen) + snapshotConflictHorizon = prunestate->visibility_cutoff_xid; + else + { + /* Avoids false conflicts when hot_standby_feedback in use */ + snapshotConflictHorizon = vacrel->cutoffs.OldestXmin; + TransactionIdRetreat(snapshotConflictHorizon); + } + + /* Execute all freeze plans for page as a single atomic action */ + heap_freeze_execute_prepared(vacrel->rel, buf, + snapshotConflictHorizon, + frozen, tuples_frozen); + } + } + else + { + /* + * Page requires "no freeze" processing. It might be set all-visible + * in the visibility map, but it can never be set all-frozen. + */ + vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid; + vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid; + prunestate->all_frozen = false; + tuples_frozen = 0; /* avoid miscounts in instrumentation */ } /* - * The second pass over the heap can also set visibility map bits, using - * the same approach. This is important when the table frequently has a - * few old LP_DEAD items on each page by the time we get to it (typically - * because past opportunistic pruning operations freed some non-HOT - * tuples). - * * VACUUM will call heap_page_is_all_visible() during the second pass over * the heap to determine all_visible and all_frozen for the page -- this * is a specialized version of the logic from this function. Now that @@ -1801,7 +1849,7 @@ retry: */ #ifdef USE_ASSERT_CHECKING /* Note that all_frozen value does not matter when !all_visible */ - if (prunestate->all_visible) + if (prunestate->all_visible && lpdead_items == 0) { TransactionId cutoff; bool all_frozen; @@ -1809,9 +1857,6 @@ retry: if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen)) Assert(false); - Assert(lpdead_items == 0); - Assert(prunestate->all_frozen == all_frozen); - /* * It's possible that we froze tuples and made the page's XID cutoff * (for recovery conflict purposes) FrozenTransactionId. This is okay @@ -1831,10 +1876,8 @@ retry: VacDeadItems *dead_items = vacrel->dead_items; ItemPointerData tmp; - Assert(!prunestate->all_visible); - Assert(prunestate->has_lpdead_items); - vacrel->lpdead_item_pages++; + prunestate->has_lpdead_items = true; ItemPointerSetBlockNumber(&tmp, blkno); @@ -1847,6 +1890,19 @@ retry: Assert(dead_items->num_items <= dead_items->max_items); pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES, dead_items->num_items); + + /* + * It was convenient to ignore LP_DEAD items in all_visible earlier on + * to make the choice of whether or not to freeze the page unaffected + * by the short-term presence of LP_DEAD items. These LP_DEAD items + * were effectively assumed to be LP_UNUSED items in the making. It + * doesn't matter which heap pass (initial pass or final pass) ends up + * setting the page all-frozen, as long as the ongoing VACUUM does it. + * + * Now that freezing has been finalized, unset all_visible. It needs + * to reflect the present state of things, as expected by our caller. + */ + prunestate->all_visible = false; } /* Finally, add page-local counts to whole-VACUUM counts */ @@ -1891,8 +1947,8 @@ lazy_scan_noprune(LVRelState *vacrel, recently_dead_tuples, missed_dead_tuples; HeapTupleHeader tupleheader; - TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid; - MultiXactId NewRelminMxid = vacrel->NewRelminMxid; + TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid; + MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; Assert(BufferGetBlockNumber(buf) == blkno); @@ -1937,8 +1993,9 @@ lazy_scan_noprune(LVRelState *vacrel, *hastup = true; /* page prevents rel truncation */ tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); - if (heap_tuple_would_freeze(tupleheader, &vacrel->cutoffs, - &NewRelfrozenXid, &NewRelminMxid)) + if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs, + &NoFreezePageRelfrozenXid, + &NoFreezePageRelminMxid)) { /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */ if (vacrel->aggressive) @@ -2019,8 +2076,8 @@ lazy_scan_noprune(LVRelState *vacrel, * this particular page until the next VACUUM. Remember its details now. * (lazy_scan_prune expects a clean slate, so we have to do this last.) */ - vacrel->NewRelfrozenXid = NewRelfrozenXid; - vacrel->NewRelminMxid = NewRelminMxid; + vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid; + vacrel->NewRelminMxid = NoFreezePageRelminMxid; /* Save any LP_DEAD items found on the page in dead_items array */ if (vacrel->nindexes == 0) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 53eb011766b..09a1993f4d7 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -113,6 +113,82 @@ typedef struct HeapTupleFreeze OffsetNumber offset; } HeapTupleFreeze; +/* + * State used by VACUUM to track the details of freezing all eligible tuples + * on a given heap page. + * + * VACUUM prepares freeze plans for each page via heap_prepare_freeze_tuple + * calls (every tuple with storage gets its own call). This page-level freeze + * state is updated across each call, which ultimately determines whether or + * not freezing the page is required. + * + * Aside from the basic question of whether or not freezing will go ahead, the + * state also tracks the oldest extant XID/MXID in the table as a whole, for + * the purposes of advancing relfrozenxid/relminmxid values in pg_class later + * on. Each heap_prepare_freeze_tuple call pushes NewRelfrozenXid and/or + * NewRelminMxid back as required to avoid unsafe final pg_class values. Any + * and all unfrozen XIDs or MXIDs that remain after VACUUM finishes _must_ + * have values >= the final relfrozenxid/relminmxid values in pg_class. This + * includes XIDs that remain as MultiXact members from any tuple's xmax. + * + * When 'freeze_required' flag isn't set after all tuples are examined, the + * final choice on freezing is made by vacuumlazy.c. It can decide to trigger + * freezing based on whatever criteria it deems appropriate. However, it is + * recommended that vacuumlazy.c avoid early freezing when freezing does not + * enable setting the target page all-frozen in the visibility map afterwards. + */ +typedef struct HeapPageFreeze +{ + /* Is heap_prepare_freeze_tuple caller required to freeze page? */ + bool freeze_required; + + /* + * "Freeze" NewRelfrozenXid/NewRelminMxid trackers. + * + * Trackers used when heap_freeze_execute_prepared freezes the page, and + * when page is "nominally frozen", which happens with pages where every + * call to heap_prepare_freeze_tuple produced no usable freeze plan. + * + * "Nominal freezing" enables vacuumlazy.c's approach of setting a page + * all-frozen in the visibility map when every tuple's 'totally_frozen' + * result is true. That always works in the same way, independent of the + * need to freeze tuples, and without complicating the general rule around + * 'totally_frozen' results (which is that 'totally_frozen' results are + * only to be trusted with a page that goes on to be frozen by caller). + * + * When we freeze a page, we generally freeze all XIDs < OldestXmin, only + * leaving behind XIDs that are ineligible for freezing, if any. And so + * you might wonder why these trackers are necessary at all; why should + * _any_ page that VACUUM freezes _ever_ be left with XIDs/MXIDs that + * ratchet back the top-level NewRelfrozenXid/NewRelminMxid trackers? + * + * It is useful to use a definition of "freeze the page" that does not + * overspecify how MultiXacts are affected. heap_prepare_freeze_tuple + * generally prefers to remove Multis eagerly, but lazy processing is used + * in cases where laziness allows VACUUM to avoid allocating a new Multi. + * The "freeze the page" trackers enable this flexibility. + */ + TransactionId FreezePageRelfrozenXid; + MultiXactId FreezePageRelminMxid; + + /* + * "No freeze" NewRelfrozenXid/NewRelminMxid trackers. + * + * These trackers are maintained in the same way as the trackers used when + * VACUUM scans a page that isn't cleanup locked. Both code paths are + * based on the same general idea (do less work for this page during the + * ongoing VACUUM, at the cost of having to accept older final values). + * + * When vacuumlazy.c caller decides to do "no freeze" processing, it must + * not go on to set the page all-frozen (setting the page all-visible + * could still be okay). heap_prepare_freeze_tuple's 'totally_frozen' + * results can only be used on a page that also gets frozen as instructed. + */ + TransactionId NoFreezePageRelfrozenXid; + MultiXactId NoFreezePageRelminMxid; + +} HeapPageFreeze; + /* ---------------- * function prototypes for heap access method * @@ -180,19 +256,18 @@ extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, extern void heap_inplace_update(Relation relation, HeapTuple tuple); extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, - HeapTupleFreeze *frz, bool *totally_frozen, - TransactionId *relfrozenxid_out, - MultiXactId *relminmxid_out); + HeapPageFreeze *pagefrz, + HeapTupleFreeze *frz, bool *totally_frozen); extern void heap_freeze_execute_prepared(Relation rel, Buffer buffer, - TransactionId FreezeLimit, + TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples); extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff); -extern bool heap_tuple_would_freeze(HeapTupleHeader tuple, - const struct VacuumCutoffs *cutoffs, - TransactionId *relfrozenxid_out, - MultiXactId *relminmxid_out); +extern bool heap_tuple_should_freeze(HeapTupleHeader tuple, + const struct VacuumCutoffs *cutoffs, + TransactionId *NoFreezePageRelfrozenXid, + MultiXactId *NoFreezePageRelminMxid); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); extern void simple_heap_insert(Relation relation, HeapTuple tup); |