aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/heapam.c682
-rw-r--r--src/backend/access/rmgrdesc/heapdesc.c16
-rw-r--r--src/backend/access/transam/multixact.c34
-rw-r--r--src/backend/commands/vacuumlazy.c31
-rw-r--r--src/include/access/heapam_xlog.h45
-rw-r--r--src/include/access/multixact.h3
-rw-r--r--src/include/access/xlog_internal.h2
7 files changed, 582 insertions, 231 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 91cfae1603d..db683b12179 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -5409,14 +5409,282 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
CacheInvalidateHeapTuple(relation, tuple, NULL);
}
+#define FRM_NOOP 0x0001
+#define FRM_INVALIDATE_XMAX 0x0002
+#define FRM_RETURN_IS_XID 0x0004
+#define FRM_RETURN_IS_MULTI 0x0008
+#define FRM_MARK_COMMITTED 0x0010
/*
- * heap_freeze_tuple
+ * FreezeMultiXactId
+ * Determine what to do during freezing when a tuple is marked by a
+ * MultiXactId.
+ *
+ * NB -- this might have the side-effect of creating a new MultiXactId!
+ *
+ * "flags" is an output value; it's used to tell caller what to do on return.
+ * Possible flags are:
+ * FRM_NOOP
+ * don't do anything -- keep existing Xmax
+ * FRM_INVALIDATE_XMAX
+ * mark Xmax as InvalidTransactionId and set XMAX_INVALID flag.
+ * FRM_RETURN_IS_XID
+ * The Xid return value is a single update Xid to set as xmax.
+ * FRM_MARK_COMMITTED
+ * Xmax can be marked as HEAP_XMAX_COMMITTED
+ * FRM_RETURN_IS_MULTI
+ * The return value is a new MultiXactId to set as new Xmax.
+ * (caller must obtain proper infomask bits using GetMultiXactIdHintBits)
+ */
+static TransactionId
+FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
+ TransactionId cutoff_xid, MultiXactId cutoff_multi,
+ uint16 *flags)
+{
+ TransactionId xid = InvalidTransactionId;
+ int i;
+ MultiXactMember *members;
+ int nmembers;
+ bool need_replace;
+ int nnewmembers;
+ MultiXactMember *newmembers;
+ bool has_lockers;
+ TransactionId update_xid;
+ bool update_committed;
+
+ *flags = 0;
+
+ /* We should only be called in Multis */
+ Assert(t_infomask & HEAP_XMAX_IS_MULTI);
+
+ if (!MultiXactIdIsValid(multi))
+ {
+ /* Ensure infomask bits are appropriately set/reset */
+ *flags |= FRM_INVALIDATE_XMAX;
+ return InvalidTransactionId;
+ }
+ else if (MultiXactIdPrecedes(multi, cutoff_multi))
+ {
+ /*
+ * This old multi cannot possibly have members still running. If it
+ * was a locker only, it can be removed without any further
+ * consideration; but if it contained an update, we might need to
+ * preserve it.
+ */
+ Assert(!MultiXactIdIsRunning(multi));
+ if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))
+ {
+ *flags |= FRM_INVALIDATE_XMAX;
+ xid = InvalidTransactionId; /* not strictly necessary */
+ }
+ else
+ {
+ /* replace multi by update xid */
+ xid = MultiXactIdGetUpdateXid(multi, t_infomask);
+
+ /* wasn't only a lock, xid needs to be valid */
+ Assert(TransactionIdIsValid(xid));
+
+ /*
+ * If the xid is older than the cutoff, it has to have aborted,
+ * otherwise the tuple would have gotten pruned away.
+ */
+ if (TransactionIdPrecedes(xid, cutoff_xid))
+ {
+ Assert(!TransactionIdDidCommit(xid));
+ *flags |= FRM_INVALIDATE_XMAX;
+ xid = InvalidTransactionId; /* not strictly necessary */
+ }
+ else
+ {
+ *flags |= FRM_RETURN_IS_XID;
+ }
+ }
+
+ return xid;
+ }
+
+ /*
+ * This multixact might have or might not have members still running, but
+ * we know it's valid and is newer than the cutoff point for multis.
+ * However, some member(s) of it may be below the cutoff for Xids, so we
+ * need to walk the whole members array to figure out what to do, if
+ * anything.
+ */
+
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
+ if (nmembers <= 0)
+ {
+ /* Nothing worth keeping */
+ *flags |= FRM_INVALIDATE_XMAX;
+ return InvalidTransactionId;
+ }
+
+ /* is there anything older than the cutoff? */
+ need_replace = false;
+ for (i = 0; i < nmembers; i++)
+ {
+ if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
+ {
+ need_replace = true;
+ break;
+ }
+ }
+
+ /*
+ * In the simplest case, there is no member older than the cutoff; we can
+ * keep the existing MultiXactId as is.
+ */
+ if (!need_replace)
+ {
+ *flags |= FRM_NOOP;
+ pfree(members);
+ return InvalidTransactionId;
+ }
+
+ /*
+ * If the multi needs to be updated, figure out which members do we need
+ * to keep.
+ */
+ nnewmembers = 0;
+ newmembers = palloc(sizeof(MultiXactMember) * nmembers);
+ has_lockers = false;
+ update_xid = InvalidTransactionId;
+ update_committed = false;
+
+ for (i = 0; i < nmembers; i++)
+ {
+ /*
+ * Determine whether to keep this member or ignore it.
+ */
+ if (ISUPDATE_from_mxstatus(members[i].status))
+ {
+ TransactionId xid = members[i].xid;
+
+ /*
+ * It's an update; should we keep it? If the transaction is known
+ * aborted then it's okay to ignore it, otherwise not. However,
+ * if the Xid is older than the cutoff_xid, we must remove it.
+ * Note that such an old updater cannot possibly be committed,
+ * because HeapTupleSatisfiesVacuum would have returned
+ * HEAPTUPLE_DEAD and we would not be trying to freeze the tuple.
+ *
+ * Note the TransactionIdDidAbort() test is just an optimization
+ * and not strictly necessary for correctness.
+ *
+ * As with all tuple visibility routines, it's critical to test
+ * TransactionIdIsInProgress before the transam.c routines,
+ * because of race conditions explained in detail in tqual.c.
+ */
+ if (TransactionIdIsCurrentTransactionId(xid) ||
+ TransactionIdIsInProgress(xid))
+ {
+ Assert(!TransactionIdIsValid(update_xid));
+ update_xid = xid;
+ }
+ else if (!TransactionIdDidAbort(xid))
+ {
+ /*
+ * Test whether to tell caller to set HEAP_XMAX_COMMITTED
+ * while we have the Xid still in cache. Note this can only
+ * be done if the transaction is known not running.
+ */
+ if (TransactionIdDidCommit(xid))
+ update_committed = true;
+ Assert(!TransactionIdIsValid(update_xid));
+ update_xid = xid;
+ }
+
+ /*
+ * If we determined that it's an Xid corresponding to an update
+ * that must be retained, additionally add it to the list of
+ * members of the new Multis, in case we end up using that. (We
+ * might still decide to use only an update Xid and not a multi,
+ * but it's easier to maintain the list as we walk the old members
+ * list.)
+ *
+ * It is possible to end up with a very old updater Xid that
+ * crashed and thus did not mark itself as aborted in pg_clog.
+ * That would manifest as a pre-cutoff Xid. Make sure to ignore
+ * it.
+ */
+ if (TransactionIdIsValid(update_xid))
+ {
+ if (!TransactionIdPrecedes(update_xid, cutoff_xid))
+ {
+ newmembers[nnewmembers++] = members[i];
+ }
+ else
+ {
+ /* cannot have committed: would be HEAPTUPLE_DEAD */
+ Assert(!TransactionIdDidCommit(update_xid));
+ update_xid = InvalidTransactionId;
+ update_committed = false;
+ }
+ }
+ }
+ else
+ {
+ /* We only keep lockers if they are still running */
+ if (TransactionIdIsCurrentTransactionId(members[i].xid) ||
+ TransactionIdIsInProgress(members[i].xid))
+ {
+ /* running locker cannot possibly be older than the cutoff */
+ Assert(!TransactionIdPrecedes(members[i].xid, cutoff_xid));
+ newmembers[nnewmembers++] = members[i];
+ has_lockers = true;
+ }
+ }
+ }
+
+ pfree(members);
+
+ if (nnewmembers == 0)
+ {
+ /* nothing worth keeping!? Tell caller to remove the whole thing */
+ *flags |= FRM_INVALIDATE_XMAX;
+ xid = InvalidTransactionId;
+ }
+ else if (TransactionIdIsValid(update_xid) && !has_lockers)
+ {
+ /*
+ * If there's a single member and it's an update, pass it back alone
+ * without creating a new Multi. (XXX we could do this when there's a
+ * single remaining locker, too, but that would complicate the API too
+ * much; moreover, the case with the single updater is more
+ * interesting, because those are longer-lived.)
+ */
+ Assert(nnewmembers == 1);
+ *flags |= FRM_RETURN_IS_XID;
+ if (update_committed)
+ *flags |= FRM_MARK_COMMITTED;
+ xid = update_xid;
+ }
+ else
+ {
+ /*
+ * Create a new multixact with the surviving members of the previous
+ * one, to set as new Xmax in the tuple.
+ */
+ xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers);
+ *flags |= FRM_RETURN_IS_MULTI;
+ }
+
+ pfree(newmembers);
+
+ return xid;
+}
+
+/*
+ * heap_prepare_freeze_tuple
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
- * are older than the specified cutoff XID. If so, replace them with
- * FrozenTransactionId or InvalidTransactionId as appropriate, and return
- * TRUE. Return FALSE if nothing was changed.
+ * are older than the specified cutoff XID and cutoff MultiXactId. If so,
+ * setup enough state (in the *frz output argument) to later execute and
+ * WAL-log what we would need to do, and return TRUE. Return FALSE if nothing
+ * is to be changed.
+ *
+ * Caller is responsible for setting the offset field, if appropriate.
*
* It is assumed that the caller has checked the tuple with
* HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
@@ -5425,54 +5693,44 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
* NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
* XID older than it could neither be running nor seen as running by any
* open transaction. This ensures that the replacement will not change
- * anyone's idea of the tuple state. Also, since we assume the tuple is
- * not HEAPTUPLE_DEAD, the fact that an XID is not still running allows us
- * to assume that it is either committed good or aborted, as appropriate;
- * so we need no external state checks to decide what to do. (This is good
- * because this function is applied during WAL recovery, when we don't have
- * access to any such state, and can't depend on the hint bits to be set.)
- * There is an exception we make which is to assume GetMultiXactIdMembers can
- * be called during recovery.
- *
+ * anyone's idea of the tuple state.
* Similarly, cutoff_multi must be less than or equal to the smallest
* MultiXactId used by any transaction currently open.
*
* If the tuple is in a shared buffer, caller must hold an exclusive lock on
* that buffer.
*
- * Note: it might seem we could make the changes without exclusive lock, since
- * TransactionId read/write is assumed atomic anyway. However there is a race
- * condition: someone who just fetched an old XID that we overwrite here could
- * conceivably not finish checking the XID against pg_clog before we finish
- * the VACUUM and perhaps truncate off the part of pg_clog he needs. Getting
- * exclusive lock ensures no other backend is in process of checking the
- * tuple status. Also, getting exclusive lock makes it safe to adjust the
- * infomask bits.
- *
- * NB: Cannot rely on hint bits here, they might not be set after a crash or
- * on a standby.
+ * NB: It is not enough to set hint bits to indicate something is
+ * committed/invalid -- they might not be set on a standby, or after crash
+ * recovery. We really need to remove old xids.
*/
bool
-heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
- MultiXactId cutoff_multi)
+heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ TransactionId cutoff_multi,
+ xl_heap_freeze_tuple *frz)
+
{
bool changed = false;
bool freeze_xmax = false;
TransactionId xid;
+ frz->frzflags = 0;
+ frz->t_infomask2 = tuple->t_infomask2;
+ frz->t_infomask = tuple->t_infomask;
+ frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
+
/* Process xmin */
xid = HeapTupleHeaderGetXmin(tuple);
if (TransactionIdIsNormal(xid) &&
TransactionIdPrecedes(xid, cutoff_xid))
{
- HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
+ frz->frzflags |= XLH_FREEZE_XMIN;
/*
* Might as well fix the hint bits too; usually XMIN_COMMITTED will
* already be set here, but there's a small chance not.
*/
- Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
- tuple->t_infomask |= HEAP_XMIN_COMMITTED;
+ frz->t_infomask |= HEAP_XMIN_COMMITTED;
changed = true;
}
@@ -5489,91 +5747,53 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- if (!MultiXactIdIsValid(xid))
+ TransactionId newxmax;
+ uint16 flags;
+
+ newxmax = FreezeMultiXactId(xid, tuple->t_infomask,
+ cutoff_xid, cutoff_multi, &flags);
+
+ if (flags & FRM_INVALIDATE_XMAX)
+ freeze_xmax = true;
+ else if (flags & FRM_RETURN_IS_XID)
{
- /* no xmax set, ignore */
- ;
+ /*
+ * NB -- some of these transformations are only valid because
+ * we know the return Xid is a tuple updater (i.e. not merely a
+ * locker.) Also note that the only reason we don't explicitely
+ * worry about HEAP_KEYS_UPDATED is because it lives in t_infomask2
+ * rather than t_infomask.
+ */
+ frz->t_infomask &= ~HEAP_XMAX_BITS;
+ frz->xmax = newxmax;
+ if (flags & FRM_MARK_COMMITTED)
+ frz->t_infomask &= HEAP_XMAX_COMMITTED;
+ changed = true;
}
- else if (MultiXactIdPrecedes(xid, cutoff_multi))
+ else if (flags & FRM_RETURN_IS_MULTI)
{
+ uint16 newbits;
+ uint16 newbits2;
+
/*
- * This old multi cannot possibly be running. If it was a locker
- * only, it can be removed without much further thought; but if it
- * contained an update, we need to preserve it.
+ * We can't use GetMultiXactIdHintBits directly on the new multi
+ * here; that routine initializes the masks to all zeroes, which
+ * would lose other bits we need. Doing it this way ensures all
+ * unrelated bits remain untouched.
*/
- if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
- freeze_xmax = true;
- else
- {
- TransactionId update_xid;
+ frz->t_infomask &= ~HEAP_XMAX_BITS;
+ frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
+ frz->t_infomask |= newbits;
+ frz->t_infomask2 |= newbits2;
- update_xid = HeapTupleGetUpdateXid(tuple);
+ frz->xmax = newxmax;
- /*
- * The multixact has an update hidden within. Get rid of it.
- *
- * If the update_xid is below the cutoff_xid, it necessarily
- * must be an aborted transaction. In a primary server, such
- * an Xmax would have gotten marked invalid by
- * HeapTupleSatisfiesVacuum, but in a replica that is not
- * called before we are, so deal with it in the same way.
- *
- * If not below the cutoff_xid, then the tuple would have been
- * pruned by vacuum, if the update committed long enough ago,
- * and we wouldn't be freezing it; so it's either recently
- * committed, or in-progress. Deal with this by setting the
- * Xmax to the update Xid directly and remove the IS_MULTI
- * bit. (We know there cannot be running lockers in this
- * multi, because it's below the cutoff_multi value.)
- */
-
- if (TransactionIdPrecedes(update_xid, cutoff_xid))
- {
- Assert(InRecovery || TransactionIdDidAbort(update_xid));
- freeze_xmax = true;
- }
- else
- {
- Assert(InRecovery || !TransactionIdIsInProgress(update_xid));
- tuple->t_infomask &= ~HEAP_XMAX_BITS;
- HeapTupleHeaderSetXmax(tuple, update_xid);
- changed = true;
- }
- }
- }
- else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
- {
- /* newer than the cutoff, so don't touch it */
- ;
+ changed = true;
}
else
{
- TransactionId update_xid;
-
- /*
- * This is a multixact which is not marked LOCK_ONLY, but which
- * is newer than the cutoff_multi. If the update_xid is below the
- * cutoff_xid point, then we can just freeze the Xmax in the
- * tuple, removing it altogether. This seems simple, but there
- * are several underlying assumptions:
- *
- * 1. A tuple marked by an multixact containing a very old
- * committed update Xid would have been pruned away by vacuum; we
- * wouldn't be freezing this tuple at all.
- *
- * 2. There cannot possibly be any live locking members remaining
- * in the multixact. This is because if they were alive, the
- * update's Xid would had been considered, via the lockers'
- * snapshot's Xmin, as part the cutoff_xid.
- *
- * 3. We don't create new MultiXacts via MultiXactIdExpand() that
- * include a very old aborted update Xid: in that function we only
- * include update Xids corresponding to transactions that are
- * committed or in-progress.
- */
- update_xid = HeapTupleGetUpdateXid(tuple);
- if (TransactionIdPrecedes(update_xid, cutoff_xid))
- freeze_xmax = true;
+ Assert(flags & FRM_NOOP);
}
}
else if (TransactionIdIsNormal(xid) &&
@@ -5584,17 +5804,17 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
if (freeze_xmax)
{
- HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
+ frz->xmax = InvalidTransactionId;
/*
* The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
* LOCKED. Normalize to INVALID just to be sure no one gets confused.
* Also get rid of the HEAP_KEYS_UPDATED bit.
*/
- tuple->t_infomask &= ~HEAP_XMAX_BITS;
- tuple->t_infomask |= HEAP_XMAX_INVALID;
- HeapTupleHeaderClearHotUpdated(tuple);
- tuple->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ frz->t_infomask &= ~HEAP_XMAX_BITS;
+ frz->t_infomask |= HEAP_XMAX_INVALID;
+ frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
+ frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
changed = true;
}
@@ -5614,16 +5834,16 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
* xvac transaction succeeded.
*/
if (tuple->t_infomask & HEAP_MOVED_OFF)
- HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
+ frz->frzflags |= XLH_INVALID_XVAC;
else
- HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
+ frz->frzflags |= XLH_FREEZE_XVAC;
/*
* Might as well fix the hint bits too; usually XMIN_COMMITTED
* will already be set here, but there's a small chance not.
*/
Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
- tuple->t_infomask |= HEAP_XMIN_COMMITTED;
+ frz->t_infomask |= HEAP_XMIN_COMMITTED;
changed = true;
}
}
@@ -5632,6 +5852,70 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
}
/*
+ * heap_execute_freeze_tuple
+ * Execute the prepared freezing of a tuple.
+ *
+ * Caller is responsible for ensuring that no other backend can access the
+ * storage underlying this tuple, either by holding an exclusive lock on the
+ * buffer containing it (which is what lazy VACUUM does), or by having it by
+ * in private storage (which is what CLUSTER and friends do).
+ *
+ * Note: it might seem we could make the changes without exclusive lock, since
+ * TransactionId read/write is assumed atomic anyway. However there is a race
+ * condition: someone who just fetched an old XID that we overwrite here could
+ * conceivably not finish checking the XID against pg_clog before we finish
+ * the VACUUM and perhaps truncate off the part of pg_clog he needs. Getting
+ * exclusive lock ensures no other backend is in process of checking the
+ * tuple status. Also, getting exclusive lock makes it safe to adjust the
+ * infomask bits.
+ *
+ * NB: All code in here must be safe to execute during crash recovery!
+ */
+void
+heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
+{
+ if (frz->frzflags & XLH_FREEZE_XMIN)
+ HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
+
+ HeapTupleHeaderSetXmax(tuple, frz->xmax);
+
+ if (frz->frzflags & XLH_FREEZE_XVAC)
+ HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
+
+ if (frz->frzflags & XLH_INVALID_XVAC)
+ HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
+
+ tuple->t_infomask = frz->t_infomask;
+ tuple->t_infomask2 = frz->t_infomask2;
+}
+
+/*
+ * heap_freeze_tuple
+ * Freeze tuple in place, without WAL logging.
+ *
+ * Useful for callers like CLUSTER that perform their own WAL logging.
+ */
+bool
+heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ TransactionId cutoff_multi)
+{
+ xl_heap_freeze_tuple frz;
+ bool do_freeze;
+
+ do_freeze = heap_prepare_freeze_tuple(tuple, cutoff_xid, cutoff_multi,
+ &frz);
+
+ /*
+ * Note that because this is not a WAL-logged operation, we don't need to
+ * fill in the offset in the freeze record.
+ */
+
+ if (do_freeze)
+ heap_execute_freeze_tuple(tuple, &frz);
+ return do_freeze;
+}
+
+/*
* For a given MultiXactId, return the hint bits that should be set in the
* tuple's infomask.
*
@@ -5934,16 +6218,26 @@ heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
}
else if (MultiXactIdPrecedes(multi, cutoff_multi))
return true;
- else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
- {
- /* only-locker multis don't need internal examination */
- ;
- }
else
{
- if (TransactionIdPrecedes(HeapTupleGetUpdateXid(tuple),
- cutoff_xid))
- return true;
+ MultiXactMember *members;
+ int nmembers;
+ int i;
+
+ /* need to check whether any member of the mxact is too old */
+
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
+
+ for (i = 0; i < nmembers; i++)
+ {
+ if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
+ {
+ pfree(members);
+ return true;
+ }
+ }
+ if (nmembers > 0)
+ pfree(members);
}
}
else
@@ -6193,45 +6487,44 @@ log_heap_clean(Relation reln, Buffer buffer,
}
/*
- * Perform XLogInsert for a heap-freeze operation. Caller must already
- * have modified the buffer and marked it dirty.
+ * Perform XLogInsert for a heap-freeze operation. Caller must have already
+ * modified the buffer and marked it dirty.
*/
XLogRecPtr
-log_heap_freeze(Relation reln, Buffer buffer,
- TransactionId cutoff_xid, MultiXactId cutoff_multi,
- OffsetNumber *offsets, int offcnt)
+log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
+ xl_heap_freeze_tuple *tuples, int ntuples)
{
- xl_heap_freeze xlrec;
+ xl_heap_freeze_page xlrec;
XLogRecPtr recptr;
XLogRecData rdata[2];
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
/* nor when there are no tuples to freeze */
- Assert(offcnt > 0);
+ Assert(ntuples > 0);
xlrec.node = reln->rd_node;
xlrec.block = BufferGetBlockNumber(buffer);
xlrec.cutoff_xid = cutoff_xid;
- xlrec.cutoff_multi = cutoff_multi;
+ xlrec.ntuples = ntuples;
rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapFreeze;
+ rdata[0].len = SizeOfHeapFreezePage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
/*
- * The tuple-offsets array is not actually in the buffer, but pretend that
- * it is. When XLogInsert stores the whole buffer, the offsets array need
+ * The freeze plan array is not actually in the buffer, but pretend that
+ * it is. When XLogInsert stores the whole buffer, the freeze plan need
* not be stored too.
*/
- rdata[1].data = (char *) offsets;
- rdata[1].len = offcnt * sizeof(OffsetNumber);
+ rdata[1].data = (char *) tuples;
+ rdata[1].len = ntuples * sizeof(xl_heap_freeze_tuple);
rdata[1].buffer = buffer;
rdata[1].buffer_std = true;
rdata[1].next = NULL;
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, rdata);
return recptr;
}
@@ -6848,64 +7141,6 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
}
-static void
-heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
-{
- xl_heap_freeze *xlrec = (xl_heap_freeze *) XLogRecGetData(record);
- TransactionId cutoff_xid = xlrec->cutoff_xid;
- MultiXactId cutoff_multi = xlrec->cutoff_multi;
- Buffer buffer;
- Page page;
-
- /*
- * In Hot Standby mode, ensure that there's no queries running which still
- * consider the frozen xids as running.
- */
- if (InHotStandby)
- ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
-
- /* If we have a full-page image, restore it and we're done */
- if (record->xl_info & XLR_BKP_BLOCK(0))
- {
- (void) RestoreBackupBlock(lsn, record, 0, false, false);
- return;
- }
-
- buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
- if (!BufferIsValid(buffer))
- return;
- page = (Page) BufferGetPage(buffer);
-
- if (lsn <= PageGetLSN(page))
- {
- UnlockReleaseBuffer(buffer);
- return;
- }
-
- if (record->xl_len > SizeOfHeapFreeze)
- {
- OffsetNumber *offsets;
- OffsetNumber *offsets_end;
-
- offsets = (OffsetNumber *) ((char *) xlrec + SizeOfHeapFreeze);
- offsets_end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
-
- while (offsets < offsets_end)
- {
- /* offsets[] entries are one-based */
- ItemId lp = PageGetItemId(page, *offsets);
- HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
-
- (void) heap_freeze_tuple(tuple, cutoff_xid, cutoff_multi);
- offsets++;
- }
- }
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
/*
* Replay XLOG_HEAP2_VISIBLE record.
*
@@ -7020,6 +7255,63 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
}
}
+/*
+ * Replay XLOG_HEAP2_FREEZE_PAGE records
+ */
+static void
+heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
+{
+ xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) XLogRecGetData(record);
+ TransactionId cutoff_xid = xlrec->cutoff_xid;
+ Buffer buffer;
+ Page page;
+ int ntup;
+
+ /*
+ * In Hot Standby mode, ensure that there's no queries running which still
+ * consider the frozen xids as running.
+ */
+ if (InHotStandby)
+ ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
+
+ /* If we have a full-page image, restore it and we're done */
+ if (record->xl_info & XLR_BKP_BLOCK(0))
+ {
+ (void) RestoreBackupBlock(lsn, record, 0, false, false);
+ return;
+ }
+
+ buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+ if (!BufferIsValid(buffer))
+ return;
+
+ page = (Page) BufferGetPage(buffer);
+
+ if (lsn <= PageGetLSN(page))
+ {
+ UnlockReleaseBuffer(buffer);
+ return;
+ }
+
+ /* now execute freeze plan for each frozen tuple */
+ for (ntup = 0; ntup < xlrec->ntuples; ntup++)
+ {
+ xl_heap_freeze_tuple *xlrec_tp;
+ ItemId lp;
+ HeapTupleHeader tuple;
+
+ xlrec_tp = &xlrec->tuples[ntup];
+ lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
+ tuple = (HeapTupleHeader) PageGetItem(page, lp);
+
+ heap_execute_freeze_tuple(tuple, xlrec_tp);
+ }
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+}
+
static void
heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
{
@@ -7883,12 +8175,12 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
switch (info & XLOG_HEAP_OPMASK)
{
- case XLOG_HEAP2_FREEZE:
- heap_xlog_freeze(lsn, record);
- break;
case XLOG_HEAP2_CLEAN:
heap_xlog_clean(lsn, record);
break;
+ case XLOG_HEAP2_FREEZE_PAGE:
+ heap_xlog_freeze_page(lsn, record);
+ break;
case XLOG_HEAP2_CLEANUP_INFO:
heap_xlog_cleanup_info(lsn, record);
break;
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 39c53d0022d..4a86b8527de 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -131,23 +131,23 @@ heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
uint8 info = xl_info & ~XLR_INFO_MASK;
info &= XLOG_HEAP_OPMASK;
- if (info == XLOG_HEAP2_FREEZE)
+ if (info == XLOG_HEAP2_CLEAN)
{
- xl_heap_freeze *xlrec = (xl_heap_freeze *) rec;
+ xl_heap_clean *xlrec = (xl_heap_clean *) rec;
- appendStringInfo(buf, "freeze: rel %u/%u/%u; blk %u; cutoff xid %u multi %u",
+ appendStringInfo(buf, "clean: rel %u/%u/%u; blk %u remxid %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block,
- xlrec->cutoff_xid, xlrec->cutoff_multi);
+ xlrec->latestRemovedXid);
}
- else if (info == XLOG_HEAP2_CLEAN)
+ else if (info == XLOG_HEAP2_FREEZE_PAGE)
{
- xl_heap_clean *xlrec = (xl_heap_clean *) rec;
+ xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
- appendStringInfo(buf, "clean: rel %u/%u/%u; blk %u remxid %u",
+ appendStringInfo(buf, "freeze_page: rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block,
- xlrec->latestRemovedXid);
+ xlrec->cutoff_xid, xlrec->ntuples);
}
else if (info == XLOG_HEAP2_CLEANUP_INFO)
{
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 05e1dcb49c5..55a8ca7ac49 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -289,7 +289,6 @@ static MemoryContext MXactContext = NULL;
/* internal MultiXactId management */
static void MultiXactIdSetOldestVisible(void);
-static MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members);
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
int nmembers, MultiXactMember *members);
static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
@@ -336,6 +335,9 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
+ /* MultiXactIdSetOldestMember() must have been called already. */
+ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
+
/*
* Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
* are still running. In typical usage, xid2 will be our own XID and the
@@ -347,7 +349,7 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
members[1].xid = xid2;
members[1].status = status2;
- newMulti = CreateMultiXactId(2, members);
+ newMulti = MultiXactIdCreateFromMembers(2, members);
debug_elog3(DEBUG2, "Create: %s",
mxid_to_string(newMulti, 2, members));
@@ -387,6 +389,9 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
AssertArg(MultiXactIdIsValid(multi));
AssertArg(TransactionIdIsValid(xid));
+ /* MultiXactIdSetOldestMember() must have been called already. */
+ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
+
debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
multi, xid, mxstatus_to_string(status));
@@ -410,7 +415,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
*/
member.xid = xid;
member.status = status;
- newMulti = CreateMultiXactId(1, &member);
+ newMulti = MultiXactIdCreateFromMembers(1, &member);
debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
multi, newMulti);
@@ -462,7 +467,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
newMembers[j].xid = xid;
newMembers[j++].status = status;
- newMulti = CreateMultiXactId(j, newMembers);
+ newMulti = MultiXactIdCreateFromMembers(j, newMembers);
pfree(members);
pfree(newMembers);
@@ -667,16 +672,16 @@ ReadNextMultiXactId(void)
}
/*
- * CreateMultiXactId
- * Make a new MultiXactId
+ * MultiXactIdCreateFromMembers
+ * Make a new MultiXactId from the specified set of members
*
* Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
* given TransactionIds as members. Returns the newly created MultiXactId.
*
* NB: the passed members[] array will be sorted in-place.
*/
-static MultiXactId
-CreateMultiXactId(int nmembers, MultiXactMember *members)
+MultiXactId
+MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
{
MultiXactId multi;
MultiXactOffset offset;
@@ -707,6 +712,13 @@ CreateMultiXactId(int nmembers, MultiXactMember *members)
* Assign the MXID and offsets range to use, and make sure there is space
* in the OFFSETs and MEMBERs files. NB: this routine does
* START_CRIT_SECTION().
+ *
+ * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
+ * that we've called MultiXactIdSetOldestMember here. This is because
+ * this routine is used in some places to create new MultiXactIds of which
+ * the current backend is not a member, notably during freezing of multis
+ * in vacuum. During vacuum, in particular, it would be unacceptable to
+ * keep OldestMulti set, in case it runs for long.
*/
multi = GetNewMultiXactId(nmembers, &offset);
@@ -763,7 +775,8 @@ CreateMultiXactId(int nmembers, MultiXactMember *members)
* RecordNewMultiXact
* Write info about a new multixact into the offsets and members files
*
- * This is broken out of CreateMultiXactId so that xlog replay can use it.
+ * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
+ * use it.
*/
static void
RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
@@ -867,9 +880,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
- /* MultiXactIdSetOldestMember() must have been called already */
- Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
-
/* safety check, we should never get this far in a HS slave */
if (RecoveryInProgress())
elog(ERROR, "cannot assign MultiXactIds during recovery");
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 28e98e8b481..8dd3de5e8e2 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -424,6 +424,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Buffer vmbuffer = InvalidBuffer;
BlockNumber next_not_all_visible_block;
bool skipping_all_visible_blocks;
+ xl_heap_freeze_tuple *frozen;
pg_rusage_init(&ru0);
@@ -446,6 +447,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->latestRemovedXid = InvalidTransactionId;
lazy_space_alloc(vacrelstats, nblocks);
+ frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
/*
* We want to skip pages that don't require vacuuming according to the
@@ -500,7 +502,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
bool tupgone,
hastup;
int prev_dead_count;
- OffsetNumber frozen[MaxOffsetNumber];
int nfrozen;
Size freespace;
bool all_visible_according_to_vm;
@@ -890,9 +891,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* Each non-removable tuple must be checked to see if it needs
* freezing. Note we already have exclusive buffer lock.
*/
- if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
- MultiXactCutoff))
- frozen[nfrozen++] = offnum;
+ if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
+ MultiXactCutoff, &frozen[nfrozen]))
+ frozen[nfrozen++].offset = offnum;
}
} /* scan along page */
@@ -903,15 +904,33 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
*/
if (nfrozen > 0)
{
+ START_CRIT_SECTION();
+
MarkBufferDirty(buf);
+
+ /* execute collected freezes */
+ for (i = 0; i < nfrozen; i++)
+ {
+ ItemId itemid;
+ HeapTupleHeader htup;
+
+ itemid = PageGetItemId(page, frozen[i].offset);
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ heap_execute_freeze_tuple(htup, &frozen[i]);
+ }
+
+ /* Now WAL-log freezing if neccessary */
if (RelationNeedsWAL(onerel))
{
XLogRecPtr recptr;
recptr = log_heap_freeze(onerel, buf, FreezeLimit,
- MultiXactCutoff, frozen, nfrozen);
+ frozen, nfrozen);
PageSetLSN(page, recptr);
}
+
+ END_CRIT_SECTION();
}
/*
@@ -1012,6 +1031,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
RecordPageWithFreeSpace(onerel, blkno, freespace);
}
+ pfree(frozen);
+
/* save stats for use later */
vacrelstats->scanned_tuples = num_tuples;
vacrelstats->tuples_deleted = tups_vacuumed;
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 438e79db48e..4062b422a7d 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -48,9 +48,9 @@
* the ones above associated with RM_HEAP_ID. XLOG_HEAP_OPMASK applies to
* these, too.
*/
-#define XLOG_HEAP2_FREEZE 0x00
+/* 0x00 is free, was XLOG_HEAP2_FREEZE */
#define XLOG_HEAP2_CLEAN 0x10
-/* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */
+#define XLOG_HEAP2_FREEZE_PAGE 0x20
#define XLOG_HEAP2_CLEANUP_INFO 0x30
#define XLOG_HEAP2_VISIBLE 0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
@@ -270,17 +270,36 @@ typedef struct xl_heap_inplace
#define SizeOfHeapInplace (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
-/* This is what we need to know about tuple freezing during vacuum */
-typedef struct xl_heap_freeze
+/*
+ * This struct represents a 'freeze plan', which is what we need to know about
+ * a single tuple being frozen during vacuum.
+ */
+#define XLH_FREEZE_XMIN 0x01
+#define XLH_FREEZE_XVAC 0x02
+#define XLH_INVALID_XVAC 0x04
+
+typedef struct xl_heap_freeze_tuple
+{
+ TransactionId xmax;
+ OffsetNumber offset;
+ uint16 t_infomask2;
+ uint16 t_infomask;
+ uint8 frzflags;
+} xl_heap_freeze_tuple;
+
+/*
+ * This is what we need to know about a block being frozen during vacuum
+ */
+typedef struct xl_heap_freeze_page
{
RelFileNode node;
BlockNumber block;
TransactionId cutoff_xid;
- MultiXactId cutoff_multi;
- /* TUPLE OFFSET NUMBERS FOLLOW AT THE END */
-} xl_heap_freeze;
+ uint16 ntuples;
+ xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
+} xl_heap_freeze_page;
-#define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_multi) + sizeof(MultiXactId))
+#define SizeOfHeapFreezePage offsetof(xl_heap_freeze_page, tuples)
/* This is what we need to know about setting a visibility map bit */
typedef struct xl_heap_visible
@@ -331,8 +350,14 @@ extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
OffsetNumber *nowunused, int nunused,
TransactionId latestRemovedXid);
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
- TransactionId cutoff_xid, MultiXactId cutoff_multi,
- OffsetNumber *offsets, int offcnt);
+ TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples,
+ int ntuples);
+extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+ TransactionId cutoff_xid,
+ TransactionId cutoff_multi,
+ xl_heap_freeze_tuple *frz);
+extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
+ xl_heap_freeze_tuple *xlrec_tp);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
Buffer vm_buffer, TransactionId cutoff_xid);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 6085ea3ec16..0e3b273b9e2 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -81,6 +81,9 @@ extern MultiXactId MultiXactIdCreate(TransactionId xid1,
MultiXactStatus status2);
extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
MultiXactStatus status);
+extern MultiXactId MultiXactIdCreateFromMembers(int nmembers,
+ MultiXactMember *members);
+
extern MultiXactId ReadNextMultiXactId(void);
extern bool MultiXactIdIsRunning(MultiXactId multi);
extern void MultiXactIdSetOldestMember(void);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index c78a2fbfae8..d0022b37516 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -55,7 +55,7 @@ typedef struct BkpBlock
/*
* Each page of XLOG file has a header like this:
*/
-#define XLOG_PAGE_MAGIC 0xD079 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD07A /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{