aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/heap/heapam.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/heap/heapam.c')
-rw-r--r--src/backend/access/heap/heapam.c2187
1 files changed, 1777 insertions, 410 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b19d1cf6c57..57d47e86014 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -84,12 +84,105 @@ static HeapScanDesc heap_beginscan_internal(Relation relation,
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
- ItemPointerData from, Buffer newbuf, HeapTuple newtup,
- bool all_visible_cleared, bool new_all_visible_cleared);
-static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
- HeapTuple oldtup, HeapTuple newtup);
+ Buffer newbuf, HeapTuple oldtup,
+ HeapTuple newtup, bool all_visible_cleared,
+ bool new_all_visible_cleared);
+static void HeapSatisfiesHOTandKeyUpdate(Relation relation,
+ Bitmapset *hot_attrs, Bitmapset *key_attrs,
+ bool *satisfies_hot, bool *satisfies_key,
+ HeapTuple oldtup, HeapTuple newtup);
+static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask,
+ uint16 old_infomask2, TransactionId add_to_xmax,
+ LockTupleMode mode, bool is_update,
+ TransactionId *result_xmax, uint16 *result_infomask,
+ uint16 *result_infomask2);
+static HTSU_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple,
+ ItemPointer ctid, TransactionId xid,
+ LockTupleMode mode);
+static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
+ uint16 *new_infomask2);
+static TransactionId MultiXactIdGetUpdateXid(TransactionId xmax,
+ uint16 t_infomask);
+static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask);
+static bool ConditionalMultiXactIdWait(MultiXactId multi,
+ MultiXactStatus status, int *remaining,
+ uint16 infomask);
+/*
+ * Each tuple lock mode has a corresponding heavyweight lock, and one or two
+ * corresponding MultiXactStatuses (one to merely lock tuples, another one to
+ * update them). This table (and the macros below) helps us determine the
+ * heavyweight lock mode and MultiXactStatus values to use for any particular
+ * tuple lock strength.
+ */
+static const struct
+{
+ LOCKMODE hwlock;
+ MultiXactStatus lockstatus;
+ MultiXactStatus updstatus;
+}
+tupleLockExtraInfo[MaxLockTupleMode + 1] =
+{
+ { /* LockTupleKeyShare */
+ AccessShareLock,
+ MultiXactStatusForKeyShare,
+ -1 /* KeyShare does not allow updating tuples */
+ },
+ { /* LockTupleShare */
+ RowShareLock,
+ MultiXactStatusForShare,
+ -1 /* Share does not allow updating tuples */
+ },
+ { /* LockTupleNoKeyExclusive */
+ ExclusiveLock,
+ MultiXactStatusForNoKeyUpdate,
+ MultiXactStatusNoKeyUpdate
+ },
+ { /* LockTupleExclusive */
+ AccessExclusiveLock,
+ MultiXactStatusForUpdate,
+ MultiXactStatusUpdate
+ }
+};
+/* Get the LOCKMODE for a given MultiXactStatus */
+#define LOCKMODE_from_mxstatus(status) \
+ (tupleLockExtraInfo[TUPLOCK_from_mxstatus((status))].hwlock)
+
+/*
+ * Acquire heavyweight locks on tuples, using a LockTupleMode strength value.
+ * This is more readable than having every caller translate it to lock.h's
+ * LOCKMODE.
+ */
+#define LockTupleTuplock(rel, tup, mode) \
+ LockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
+#define UnlockTupleTuplock(rel, tup, mode) \
+ UnlockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
+#define ConditionalLockTupleTuplock(rel, tup, mode) \
+ ConditionalLockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
+
+/*
+ * This table maps tuple lock strength values for each particular
+ * MultiXactStatus value.
+ */
+static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
+{
+ LockTupleKeyShare, /* ForKeyShare */
+ LockTupleShare, /* ForShare */
+ LockTupleNoKeyExclusive, /* ForNoKeyUpdate */
+ LockTupleExclusive, /* ForUpdate */
+ LockTupleNoKeyExclusive, /* NoKeyUpdate */
+ LockTupleExclusive /* Update */
+};
+
+/* Get the LockTupleMode for a given MultiXactStatus */
+#define TUPLOCK_from_mxstatus(status) \
+ (MultiXactStatusLock[(status)])
+/* Get the is_update bit for a given MultiXactStatus */
+#define ISUPDATE_from_mxstatus(status) \
+ ((status) > MultiXactStatusForUpdate)
+
/* ----------------------------------------------------------------
* heap support routines
* ----------------------------------------------------------------
@@ -1664,7 +1757,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
ItemPointerGetBlockNumber(tid));
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
at_chain_start = false;
- prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
+ prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
}
else
break; /* end of chain */
@@ -1787,7 +1880,7 @@ heap_get_latest_tid(Relation relation,
* tuple. Check for XMIN match.
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
+ !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
{
UnlockReleaseBuffer(buffer);
break;
@@ -1805,7 +1898,8 @@ heap_get_latest_tid(Relation relation,
/*
* If there's a valid t_ctid link, follow it, else we're done.
*/
- if ((tp.t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) ||
+ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
{
UnlockReleaseBuffer(buffer);
@@ -1813,7 +1907,7 @@ heap_get_latest_tid(Relation relation,
}
ctid = tp.t_data->t_ctid;
- priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
+ priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
UnlockReleaseBuffer(buffer);
} /* end of loop */
}
@@ -1826,17 +1920,25 @@ heap_get_latest_tid(Relation relation,
* If the transaction aborted, we guarantee the XMAX_INVALID hint bit will
* be set on exit. If the transaction committed, we set the XMAX_COMMITTED
* hint bit if possible --- but beware that that may not yet be possible,
- * if the transaction committed asynchronously. Hence callers should look
- * only at XMAX_INVALID.
+ * if the transaction committed asynchronously.
+ *
+ * Note that if the transaction was a locker only, we set HEAP_XMAX_INVALID
+ * even if it commits.
+ *
+ * Hence callers should look only at XMAX_INVALID.
+ *
+ * Note this is not allowed for tuples whose xmax is a multixact.
*/
static void
UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
{
- Assert(TransactionIdEquals(HeapTupleHeaderGetXmax(tuple), xid));
+ Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid));
+ Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID)))
{
- if (TransactionIdDidCommit(xid))
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) &&
+ TransactionIdDidCommit(xid))
HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
xid);
else
@@ -2374,6 +2476,26 @@ simple_heap_insert(Relation relation, HeapTuple tup)
}
/*
+ * Given infomask/infomask2, compute the bits that must be saved in the
+ * "infobits" field of xl_heap_delete, xl_heap_update, xl_heap_lock,
+ * xl_heap_lock_updated WAL records.
+ *
+ * See fix_infomask_from_infobits.
+ */
+static uint8
+compute_infobits(uint16 infomask, uint16 infomask2)
+{
+ return
+ ((infomask & HEAP_XMAX_IS_MULTI) != 0 ? XLHL_XMAX_IS_MULTI : 0) |
+ ((infomask & HEAP_XMAX_LOCK_ONLY) != 0 ? XLHL_XMAX_LOCK_ONLY : 0) |
+ ((infomask & HEAP_XMAX_EXCL_LOCK) != 0 ? XLHL_XMAX_EXCL_LOCK : 0) |
+ /* note we ignore HEAP_XMAX_SHR_LOCK here */
+ ((infomask & HEAP_XMAX_KEYSHR_LOCK) != 0 ? XLHL_XMAX_KEYSHR_LOCK : 0) |
+ ((infomask2 & HEAP_KEYS_UPDATED) != 0 ?
+ XLHL_KEYS_UPDATED : 0);
+}
+
+/*
* heap_delete - delete a tuple
*
* NB: do not call this directly unless you are prepared to deal with
@@ -2393,7 +2515,8 @@ simple_heap_insert(Relation relation, HeapTuple tup)
* (the last only possible if wait == false).
*
* In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax, and t_cmax (the last only for HeapTupleSelfUpdated, since we
+ * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
+ * (the last only for HeapTupleSelfUpdated, since we
* cannot obtain cmax from a combocid generated by another transaction).
* See comments for struct HeapUpdateFailureData for additional info.
*/
@@ -2410,6 +2533,9 @@ heap_delete(Relation relation, ItemPointer tid,
BlockNumber block;
Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
+ TransactionId new_xmax;
+ uint16 new_infomask,
+ new_infomask2;
bool have_tuple_lock = false;
bool iscombo;
bool all_visible_cleared = false;
@@ -2465,7 +2591,7 @@ l1:
uint16 infomask;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetXmax(tp.t_data);
+ xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
infomask = tp.t_data->t_infomask;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -2481,20 +2607,20 @@ l1:
*/
if (!have_tuple_lock)
{
- LockTuple(relation, &(tp.t_self), ExclusiveLock);
+ LockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
have_tuple_lock = true;
}
/*
* Sleep until concurrent transaction ends. Note that we don't care
- * if the locker has an exclusive or shared lock, because we need
- * exclusive.
+ * which lock mode the locker has, because we need the strongest one.
*/
if (infomask & HEAP_XMAX_IS_MULTI)
{
/* wait for multixact */
- MultiXactIdWait((MultiXactId) xwait);
+ MultiXactIdWait((MultiXactId) xwait, MultiXactStatusUpdate,
+ NULL, infomask);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
@@ -2503,7 +2629,7 @@ l1:
* change, and start over if so.
*/
if (!(tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
xwait))
goto l1;
@@ -2529,7 +2655,7 @@ l1:
* Check for xmax change, and start over if so.
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
xwait))
goto l1;
@@ -2541,8 +2667,9 @@ l1:
* We may overwrite if previous xmax aborted, or if it committed but
* only locked the tuple without updating it.
*/
- if (tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED))
+ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) ||
+ HeapTupleHeaderIsOnlyLocked(tp.t_data))
result = HeapTupleMayBeUpdated;
else
result = HeapTupleUpdated;
@@ -2562,14 +2689,14 @@ l1:
result == HeapTupleBeingUpdated);
Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
hufd->ctid = tp.t_data->t_ctid;
- hufd->xmax = HeapTupleHeaderGetXmax(tp.t_data);
+ hufd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
else
hufd->cmax = 0; /* for lack of an InvalidCommandId value */
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
- UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
return result;
@@ -2603,14 +2730,29 @@ l1:
vmbuffer);
}
+ /*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),
+ tp.t_data->t_infomask, tp.t_data->t_infomask2,
+ xid, LockTupleExclusive, true,
+ &new_xmax, &new_infomask, &new_infomask2);
+
/* store transaction information of xact deleting the tuple */
- tp.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ tp.t_data->t_infomask |= new_infomask;
+ tp.t_data->t_infomask2 |= new_infomask2;
HeapTupleHeaderClearHotUpdated(tp.t_data);
- HeapTupleHeaderSetXmax(tp.t_data, xid);
+ HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
/* Make sure there is no forward chain link in t_ctid */
tp.t_data->t_ctid = tp.t_self;
@@ -2625,8 +2767,11 @@ l1:
XLogRecData rdata[2];
xlrec.all_visible_cleared = all_visible_cleared;
+ xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
+ tp.t_data->t_infomask2);
xlrec.target.node = relation->rd_node;
xlrec.target.tid = tp.t_self;
+ xlrec.xmax = new_xmax;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapDelete;
rdata[0].buffer = InvalidBuffer;
@@ -2679,7 +2824,7 @@ l1:
* Release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
pgstat_count_heap_delete(relation);
@@ -2739,6 +2884,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
* crosscheck - if not InvalidSnapshot, also check old tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* hufd - output parameter, filled in failure cases (see below)
+ * lockmode - output parameter, filled with lock mode acquired on tuple
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we *did* update it. Failure return codes are
@@ -2752,23 +2898,26 @@ simple_heap_delete(Relation relation, ItemPointer tid)
* data are not reflected into *newtup.
*
* In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax, and t_cmax (the last only for HeapTupleSelfUpdated, since we
+ * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
+ * (the last only for HeapTupleSelfUpdated, since we
* cannot obtain cmax from a combocid generated by another transaction).
* See comments for struct HeapUpdateFailureData for additional info.
*/
HTSU_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd)
+ HeapUpdateFailureData *hufd, LockTupleMode *lockmode)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
Bitmapset *hot_attrs;
+ Bitmapset *key_attrs;
ItemId lp;
HeapTupleData oldtup;
HeapTuple heaptup;
Page page;
BlockNumber block;
+ MultiXactStatus mxact_status;
Buffer buffer,
newbuf,
vmbuffer = InvalidBuffer,
@@ -2779,9 +2928,20 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
pagefree;
bool have_tuple_lock = false;
bool iscombo;
+ bool satisfies_hot;
+ bool satisfies_key;
bool use_hot_update = false;
+ bool key_intact;
bool all_visible_cleared = false;
bool all_visible_cleared_new = false;
+ bool checked_lockers;
+ bool locker_remains;
+ TransactionId xmax_new_tuple,
+ xmax_old_tuple;
+ uint16 infomask_old_tuple,
+ infomask2_old_tuple,
+ infomask_new_tuple,
+ infomask2_new_tuple;
Assert(ItemPointerIsValid(otid));
@@ -2797,7 +2957,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
* Note that we get a copy here, so we need not worry about relcache flush
* happening midway through.
*/
- hot_attrs = RelationGetIndexAttrBitmap(relation);
+ hot_attrs = RelationGetIndexAttrBitmap(relation, false);
+ key_attrs = RelationGetIndexAttrBitmap(relation, true);
block = ItemPointerGetBlockNumber(otid);
buffer = ReadBuffer(relation, block);
@@ -2822,6 +2983,44 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
oldtup.t_self = *otid;
/*
+ * If we're not updating any "key" column, we can grab a weaker lock type.
+ * This allows for more concurrency when we are running simultaneously with
+ * foreign key checks.
+ *
+ * Note that if a column gets detoasted while executing the update, but the
+ * value ends up being the same, this test will fail and we will use the
+ * stronger lock. This is acceptable; the important case to optimize is
+ * updates that don't manipulate key columns, not those that
+ * serendipitiously arrive at the same key values.
+ */
+ HeapSatisfiesHOTandKeyUpdate(relation, hot_attrs, key_attrs,
+ &satisfies_hot, &satisfies_key,
+ &oldtup, newtup);
+ if (satisfies_key)
+ {
+ *lockmode = LockTupleNoKeyExclusive;
+ mxact_status = MultiXactStatusNoKeyUpdate;
+ key_intact = true;
+
+ /*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+ }
+ else
+ {
+ *lockmode = LockTupleExclusive;
+ mxact_status = MultiXactStatusUpdate;
+ key_intact = false;
+ }
+
+ /*
* Note: beyond this point, use oldtup not otid to refer to old tuple.
* otid may very well point at newtup->t_self, which we will overwrite
* with the new tuple's location, so there's great risk of confusion if we
@@ -2829,8 +3028,13 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
*/
l2:
+ checked_lockers = false;
+ locker_remains = false;
result = HeapTupleSatisfiesUpdate(oldtup.t_data, cid, buffer);
+ /* see below about the "no wait" case */
+ Assert(result != HeapTupleBeingUpdated || wait);
+
if (result == HeapTupleInvisible)
{
UnlockReleaseBuffer(buffer);
@@ -2838,11 +3042,26 @@ l2:
}
else if (result == HeapTupleBeingUpdated && wait)
{
- TransactionId xwait;
+ TransactionId xwait;
uint16 infomask;
+ bool can_continue = false;
+
+ checked_lockers = true;
+
+ /*
+ * XXX note that we don't consider the "no wait" case here. This
+ * isn't a problem currently because no caller uses that case, but it
+ * should be fixed if such a caller is introduced. It wasn't a problem
+ * previously because this code would always wait, but now that some
+ * tuple locks do not conflict with one of the lock modes we use, it is
+ * possible that this case is interesting to handle specially.
+ *
+ * This may cause failures with third-party code that calls heap_update
+ * directly.
+ */
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetXmax(oldtup.t_data);
+ xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
infomask = oldtup.t_data->t_infomask;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -2858,20 +3077,29 @@ l2:
*/
if (!have_tuple_lock)
{
- LockTuple(relation, &(oldtup.t_self), ExclusiveLock);
+ LockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
have_tuple_lock = true;
}
/*
- * Sleep until concurrent transaction ends. Note that we don't care
- * if the locker has an exclusive or shared lock, because we need
- * exclusive.
+ * Now we have to do something about the existing locker. If it's a
+ * multi, sleep on it; we might be awakened before it is completely
+ * gone (or even not sleep at all in some cases); we need to preserve
+ * it as locker, unless it is gone completely.
+ *
+ * If it's not a multi, we need to check for sleeping conditions before
+ * actually going to sleep. If the update doesn't conflict with the
+ * locks, we just continue without sleeping (but making sure it is
+ * preserved).
*/
-
if (infomask & HEAP_XMAX_IS_MULTI)
{
+ TransactionId update_xact;
+ int remain;
+
/* wait for multixact */
- MultiXactIdWait((MultiXactId) xwait);
+ MultiXactIdWait((MultiXactId) xwait, mxact_status, &remain,
+ infomask);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
@@ -2880,49 +3108,87 @@ l2:
* change, and start over if so.
*/
if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
xwait))
goto l2;
/*
- * You might think the multixact is necessarily done here, but not
- * so: it could have surviving members, namely our own xact or
- * other subxacts of this backend. It is legal for us to update
- * the tuple in either case, however (the latter case is
- * essentially a situation of upgrading our former shared lock to
- * exclusive). We don't bother changing the on-disk hint bits
- * since we are about to overwrite the xmax altogether.
+ * Note that the multixact may not be done by now. It could have
+ * surviving members; our own xact or other subxacts of this
+ * backend, and also any other concurrent transaction that locked
+ * the tuple with KeyShare if we only got TupleLockUpdate. If this
+ * is the case, we have to be careful to mark the updated tuple
+ * with the surviving members in Xmax.
+ *
+ * Note that there could have been another update in the MultiXact.
+ * In that case, we need to check whether it committed or aborted.
+ * If it aborted we are safe to update it again; otherwise there is
+ * an update conflict, and we have to return HeapTupleUpdated
+ * below.
+ *
+ * In the LockTupleExclusive case, we still need to preserve the
+ * surviving members: those would include the tuple locks we had
+ * before this one, which are important to keep in case this
+ * subxact aborts.
*/
+ update_xact = InvalidTransactionId;
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
+ update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
+
+ /* there was no UPDATE in the MultiXact; or it aborted. */
+ if (!TransactionIdIsValid(update_xact) ||
+ TransactionIdDidAbort(update_xact))
+ can_continue = true;
+
+ locker_remains = remain != 0;
}
else
{
- /* wait for regular transaction to end */
- XactLockTableWait(xwait);
- LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-
/*
- * xwait is done, but if xwait had just locked the tuple then some
- * other xact could update this tuple before we get to this point.
- * Check for xmax change, and start over if so.
+ * If it's just a key-share locker, and we're not changing the
+ * key columns, we don't need to wait for it to end; but we
+ * need to preserve it as locker.
*/
- if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
- xwait))
- goto l2;
+ if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
+ {
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
- /* Otherwise check if it committed or aborted */
- UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
+ /*
+ * recheck the locker; if someone else changed the tuple while we
+ * weren't looking, start over.
+ */
+ if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ xwait))
+ goto l2;
+
+ can_continue = true;
+ locker_remains = true;
+ }
+ else
+ {
+ /* wait for regular transaction to end */
+ XactLockTableWait(xwait);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * xwait is done, but if xwait had just locked the tuple then some
+ * other xact could update this tuple before we get to this point.
+ * Check for xmax change, and start over if so.
+ */
+ if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ xwait))
+ goto l2;
+
+ /* Otherwise check if it committed or aborted */
+ UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
+ if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
+ can_continue = true;
+ }
}
- /*
- * We may overwrite if previous xmax aborted, or if it committed but
- * only locked the tuple without updating it.
- */
- if (oldtup.t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED))
- result = HeapTupleMayBeUpdated;
- else
- result = HeapTupleUpdated;
+ result = can_continue ? HeapTupleMayBeUpdated : HeapTupleUpdated;
}
if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
@@ -2939,17 +3205,18 @@ l2:
result == HeapTupleBeingUpdated);
Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
hufd->ctid = oldtup.t_data->t_ctid;
- hufd->xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
+ hufd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
else
hufd->cmax = 0; /* for lack of an InvalidCommandId value */
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
- UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
bms_free(hot_attrs);
+ bms_free(key_attrs);
return result;
}
@@ -2958,7 +3225,7 @@ l2:
* visible while we were busy locking the buffer, or during some
* subsequent window during which we had it unlocked, we'll have to unlock
* and re-lock, to avoid holding the buffer lock across an I/O. That's a
- * bit unfortunate, esepecially since we'll now have to recheck whether
+ * bit unfortunate, especially since we'll now have to recheck whether
* the tuple has been locked or updated under us, but hopefully it won't
* happen very often.
*/
@@ -2991,12 +3258,54 @@ l2:
Assert(!(newtup->t_data->t_infomask & HEAP_HASOID));
}
+ /*
+ * If the tuple we're updating is locked, we need to preserve the locking
+ * info in the old tuple's Xmax. Prepare a new Xmax value for this.
+ */
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ oldtup.t_data->t_infomask,
+ oldtup.t_data->t_infomask2,
+ xid, *lockmode, true,
+ &xmax_old_tuple, &infomask_old_tuple,
+ &infomask2_old_tuple);
+
+ /* And also prepare an Xmax value for the new copy of the tuple */
+ if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ (checked_lockers && !locker_remains))
+ xmax_new_tuple = InvalidTransactionId;
+ else
+ xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+
+ if (!TransactionIdIsValid(xmax_new_tuple))
+ {
+ infomask_new_tuple = HEAP_XMAX_INVALID;
+ infomask2_new_tuple = 0;
+ }
+ else
+ {
+ if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
+ &infomask2_new_tuple);
+ }
+ else
+ {
+ infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
+ infomask2_new_tuple = 0;
+ }
+ }
+
+ /*
+ * Prepare the new tuple with the appropriate initial values of Xmin and
+ * Xmax, as well as initial infomask bits as computed above.
+ */
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
- newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
HeapTupleHeaderSetXmin(newtup->t_data, xid);
HeapTupleHeaderSetCmin(newtup->t_data, cid);
- HeapTupleHeaderSetXmax(newtup->t_data, 0); /* for cleanliness */
+ newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
+ newtup->t_data->t_infomask2 |= infomask2_new_tuple;
+ HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
newtup->t_tableOid = RelationGetRelid(relation);
/*
@@ -3035,14 +3344,14 @@ l2:
if (need_toast || newtupsize > pagefree)
{
/* Clear obsolete visibility flags ... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
HeapTupleClearHotUpdated(&oldtup);
/* ... and store info about transaction updating this tuple */
- HeapTupleHeaderSetXmax(oldtup.t_data, xid);
+ Assert(TransactionIdIsValid(xmax_old_tuple));
+ HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
+ oldtup.t_data->t_infomask |= infomask_old_tuple;
+ oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
/* temporarily make it look not-updated */
oldtup.t_data->t_ctid = oldtup.t_self;
@@ -3145,7 +3454,7 @@ l2:
* to do a HOT update. Check if any of the index columns have been
* changed. If not, then HOT update is possible.
*/
- if (HeapSatisfiesHOTUpdate(relation, hot_attrs, &oldtup, heaptup))
+ if (satisfies_hot)
use_hot_update = true;
}
else
@@ -3193,13 +3502,13 @@ l2:
if (!already_marked)
{
/* Clear obsolete visibility flags ... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/* ... and store info about transaction updating this tuple */
- HeapTupleHeaderSetXmax(oldtup.t_data, xid);
+ Assert(TransactionIdIsValid(xmax_old_tuple));
+ HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
+ oldtup.t_data->t_infomask |= infomask_old_tuple;
+ oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
}
@@ -3229,8 +3538,8 @@ l2:
/* XLOG stuff */
if (RelationNeedsWAL(relation))
{
- XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self,
- newbuf, heaptup,
+ XLogRecPtr recptr = log_heap_update(relation, buffer,
+ newbuf, &oldtup, heaptup,
all_visible_cleared,
all_visible_cleared_new);
@@ -3272,7 +3581,7 @@ l2:
* Release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
pgstat_count_heap_update(relation, use_hot_update);
@@ -3287,13 +3596,14 @@ l2:
}
bms_free(hot_attrs);
+ bms_free(key_attrs);
return HeapTupleMayBeUpdated;
}
/*
* Check if the specified attribute's value is same in both given tuples.
- * Subroutine for HeapSatisfiesHOTUpdate.
+ * Subroutine for HeapSatisfiesHOTandKeyUpdate.
*/
static bool
heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
@@ -3327,7 +3637,7 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
/*
* Extract the corresponding values. XXX this is pretty inefficient if
- * there are many indexed columns. Should HeapSatisfiesHOTUpdate do a
+ * there are many indexed columns. Should HeapSatisfiesHOTandKeyUpdate do a
* single heap_deform_tuple call on each tuple, instead? But that doesn't
* work for system columns ...
*/
@@ -3370,35 +3680,101 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
}
/*
- * Check if the old and new tuples represent a HOT-safe update. To be able
- * to do a HOT update, we must not have changed any columns used in index
- * definitions.
+ * Check which columns are being updated.
*
- * The set of attributes to be checked is passed in (we dare not try to
- * compute it while holding exclusive buffer lock...) NOTE that hot_attrs
- * is destructively modified! That is OK since this is invoked at most once
- * by heap_update().
+ * This simultaneously checks conditions for HOT updates and for FOR KEY
+ * SHARE updates. Since much of the time they will be checking very similar
+ * sets of columns, and doing the same tests on them, it makes sense to
+ * optimize and do them together.
*
- * Returns true if safe to do HOT update.
+ * We receive two bitmapsets comprising the two sets of columns we're
+ * interested in. Note these are destructively modified; that is OK since
+ * this is invoked at most once in heap_update.
+ *
+ * hot_result is set to TRUE if it's okay to do a HOT update (i.e. it does not
+ * modified indexed columns); key_result is set to TRUE if the update does not
+ * modify columns used in the key.
*/
-static bool
-HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
- HeapTuple oldtup, HeapTuple newtup)
+static void
+HeapSatisfiesHOTandKeyUpdate(Relation relation,
+ Bitmapset *hot_attrs, Bitmapset *key_attrs,
+ bool *satisfies_hot, bool *satisfies_key,
+ HeapTuple oldtup, HeapTuple newtup)
{
- int attrnum;
+ int next_hot_attnum;
+ int next_key_attnum;
+ bool hot_result = true;
+ bool key_result = true;
+ bool key_done = false;
+ bool hot_done = false;
+
+ next_hot_attnum = bms_first_member(hot_attrs);
+ if (next_hot_attnum == -1)
+ hot_done = true;
+ else
+ /* Adjust for system attributes */
+ next_hot_attnum += FirstLowInvalidHeapAttributeNumber;
- while ((attrnum = bms_first_member(hot_attrs)) >= 0)
- {
+ next_key_attnum = bms_first_member(key_attrs);
+ if (next_key_attnum == -1)
+ key_done = true;
+ else
/* Adjust for system attributes */
- attrnum += FirstLowInvalidHeapAttributeNumber;
+ next_key_attnum += FirstLowInvalidHeapAttributeNumber;
- /* If the attribute value has changed, we can't do HOT update */
- if (!heap_tuple_attr_equals(RelationGetDescr(relation), attrnum,
- oldtup, newtup))
- return false;
+ for (;;)
+ {
+ int check_now;
+ bool changed;
+
+ /* both bitmapsets are now empty */
+ if (key_done && hot_done)
+ break;
+
+ /* XXX there's probably an easier way ... */
+ if (hot_done)
+ check_now = next_key_attnum;
+ if (key_done)
+ check_now = next_hot_attnum;
+ else
+ check_now = Min(next_hot_attnum, next_key_attnum);
+
+ changed = !heap_tuple_attr_equals(RelationGetDescr(relation),
+ check_now, oldtup, newtup);
+ if (changed)
+ {
+ if (check_now == next_hot_attnum)
+ hot_result = false;
+ if (check_now == next_key_attnum)
+ key_result = false;
+ }
+
+ /* if both are false now, we can stop checking */
+ if (!hot_result && !key_result)
+ break;
+
+ if (check_now == next_hot_attnum)
+ {
+ next_hot_attnum = bms_first_member(hot_attrs);
+ if (next_hot_attnum == -1)
+ hot_done = true;
+ else
+ /* Adjust for system attributes */
+ next_hot_attnum += FirstLowInvalidHeapAttributeNumber;
+ }
+ if (check_now == next_key_attnum)
+ {
+ next_key_attnum = bms_first_member(key_attrs);
+ if (next_key_attnum == -1)
+ key_done = true;
+ else
+ /* Adjust for system attributes */
+ next_key_attnum += FirstLowInvalidHeapAttributeNumber;
+ }
}
- return true;
+ *satisfies_hot = hot_result;
+ *satisfies_key = key_result;
}
/*
@@ -3414,11 +3790,12 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
{
HTSU_Result result;
HeapUpdateFailureData hufd;
+ LockTupleMode lockmode;
result = heap_update(relation, otid, tup,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */,
- &hufd);
+ &hufd, &lockmode);
switch (result)
{
case HeapTupleSelfUpdated:
@@ -3440,6 +3817,28 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
}
}
+
+/*
+ * Return the MultiXactStatus corresponding to the given tuple lock mode.
+ */
+static MultiXactStatus
+get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
+{
+ MultiXactStatus retval;
+
+ if (is_update)
+ retval = tupleLockExtraInfo[mode].updstatus;
+ else
+ retval = tupleLockExtraInfo[mode].lockstatus;
+
+ if (retval == -1)
+ elog(ERROR, "invalid lock tuple mode %d/%s", mode,
+ is_update ? "true" : "false");
+
+ return retval;
+}
+
+
/*
* heap_lock_tuple - lock a tuple in shared or exclusive mode
*
@@ -3452,6 +3851,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
* tuple's cmax if lock is successful)
* mode: indicates if shared or exclusive tuple lock is desired
* nowait: if true, ereport rather than blocking if lock not available
+ * follow_updates: if true, follow the update chain to also lock descendant
+ * tuples.
*
* Output parameters:
* *tuple: all fields filled in
@@ -3464,61 +3865,30 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
* HeapTupleUpdated: lock failed because tuple updated by other xact
*
* In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax, and t_cmax (the last only for HeapTupleSelfUpdated, since we
+ * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
+ * (the last only for HeapTupleSelfUpdated, since we
* cannot obtain cmax from a combocid generated by another transaction).
* See comments for struct HeapUpdateFailureData for additional info.
*
- *
- * NOTES: because the shared-memory lock table is of finite size, but users
- * could reasonably want to lock large numbers of tuples, we do not rely on
- * the standard lock manager to store tuple-level locks over the long term.
- * Instead, a tuple is marked as locked by setting the current transaction's
- * XID as its XMAX, and setting additional infomask bits to distinguish this
- * usage from the more normal case of having deleted the tuple. When
- * multiple transactions concurrently share-lock a tuple, the first locker's
- * XID is replaced in XMAX with a MultiTransactionId representing the set of
- * XIDs currently holding share-locks.
- *
- * When it is necessary to wait for a tuple-level lock to be released, the
- * basic delay is provided by XactLockTableWait or MultiXactIdWait on the
- * contents of the tuple's XMAX. However, that mechanism will release all
- * waiters concurrently, so there would be a race condition as to which
- * waiter gets the tuple, potentially leading to indefinite starvation of
- * some waiters. The possibility of share-locking makes the problem much
- * worse --- a steady stream of share-lockers can easily block an exclusive
- * locker forever. To provide more reliable semantics about who gets a
- * tuple-level lock first, we use the standard lock manager. The protocol
- * for waiting for a tuple-level lock is really
- * LockTuple()
- * XactLockTableWait()
- * mark tuple as locked by me
- * UnlockTuple()
- * When there are multiple waiters, arbitration of who is to get the lock next
- * is provided by LockTuple(). However, at most one tuple-level lock will
- * be held or awaited per backend at any time, so we don't risk overflow
- * of the lock table. Note that incoming share-lockers are required to
- * do LockTuple as well, if there is any conflict, to ensure that they don't
- * starve out waiting exclusive-lockers. However, if there is not any active
- * conflict for a tuple, we don't incur any extra overhead.
+ * See README.tuplock for a thorough explanation of this mechanism.
*/
HTSU_Result
heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, bool nowait,
+ bool follow_updates,
Buffer *buffer, HeapUpdateFailureData *hufd)
{
HTSU_Result result;
ItemPointer tid = &(tuple->t_self);
ItemId lp;
Page page;
- TransactionId xid;
- TransactionId xmax;
- uint16 old_infomask;
- uint16 new_infomask;
- LOCKMODE tuple_lock_type;
+ TransactionId xid,
+ xmax;
+ uint16 old_infomask,
+ new_infomask,
+ new_infomask2;
bool have_tuple_lock = false;
- tuple_lock_type = (mode == LockTupleShared) ? ShareLock : ExclusiveLock;
-
*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3542,30 +3912,58 @@ l3:
{
TransactionId xwait;
uint16 infomask;
+ uint16 infomask2;
+ bool require_sleep;
+ ItemPointerData t_ctid;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetXmax(tuple->t_data);
+ xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
infomask = tuple->t_data->t_infomask;
+ infomask2 = tuple->t_data->t_infomask2;
+ ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
/*
- * If we wish to acquire share lock, and the tuple is already
- * share-locked by a multixact that includes any subtransaction of the
- * current top transaction, then we effectively hold the desired lock
- * already. We *must* succeed without trying to take the tuple lock,
- * else we will deadlock against anyone waiting to acquire exclusive
- * lock. We don't need to make any state changes in this case.
+ * If any subtransaction of the current top transaction already holds a
+ * lock as strong or stronger than what we're requesting, we
+ * effectively hold the desired lock already. We *must* succeed
+ * without trying to take the tuple lock, else we will deadlock against
+ * anyone wanting to acquire a stronger lock.
*/
- if (mode == LockTupleShared &&
- (infomask & HEAP_XMAX_IS_MULTI) &&
- MultiXactIdIsCurrent((MultiXactId) xwait))
+ if (infomask & HEAP_XMAX_IS_MULTI)
{
- Assert(infomask & HEAP_XMAX_SHARED_LOCK);
- /* Probably can't hold tuple lock here, but may as well check */
- if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
- return HeapTupleMayBeUpdated;
+ int i;
+ int nmembers;
+ MultiXactMember *members;
+
+ /*
+ * We don't need to allow old multixacts here; if that had been the
+ * case, HeapTupleSatisfiesUpdate would have returned MayBeUpdated
+ * and we wouldn't be here.
+ */
+ nmembers = GetMultiXactIdMembers(xwait, &members, false);
+
+ for (i = 0; i < nmembers; i++)
+ {
+ if (TransactionIdIsCurrentTransactionId(members[i].xid))
+ {
+ LockTupleMode membermode;
+
+ membermode = TUPLOCK_from_mxstatus(members[i].status);
+
+ if (membermode >= mode)
+ {
+ if (have_tuple_lock)
+ UnlockTupleTuplock(relation, tid, mode);
+
+ pfree(members);
+ return HeapTupleMayBeUpdated;
+ }
+ }
+ }
+
+ pfree(members);
}
/*
@@ -3581,126 +3979,355 @@ l3:
{
if (nowait)
{
- if (!ConditionalLockTuple(relation, tid, tuple_lock_type))
+ if (!ConditionalLockTupleTuplock(relation, tid, mode))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
- errmsg("could not obtain lock on row in relation \"%s\"",
- RelationGetRelationName(relation))));
+ errmsg("could not obtain lock on row in relation \"%s\"",
+ RelationGetRelationName(relation))));
}
else
- LockTuple(relation, tid, tuple_lock_type);
+ LockTupleTuplock(relation, tid, mode);
have_tuple_lock = true;
}
- if (mode == LockTupleShared && (infomask & HEAP_XMAX_SHARED_LOCK))
+ /*
+ * Initially assume that we will have to wait for the locking
+ * transaction(s) to finish. We check various cases below in which
+ * this can be turned off.
+ */
+ require_sleep = true;
+ if (mode == LockTupleKeyShare)
{
/*
- * Acquiring sharelock when there's at least one sharelocker
- * already. We need not wait for him/them to complete.
- */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
-
- /*
- * Make sure it's still a shared lock, else start over. (It's OK
- * if the ownership of the shared lock has changed, though.)
+ * If we're requesting KeyShare, and there's no update present, we
+ * don't need to wait. Even if there is an update, we can still
+ * continue if the key hasn't been modified.
+ *
+ * However, if there are updates, we need to walk the update chain
+ * to mark future versions of the row as locked, too. That way, if
+ * somebody deletes that future version, we're protected against
+ * the key going away. This locking of future versions could block
+ * momentarily, if a concurrent transaction is deleting a key; or
+ * it could return a value to the effect that the transaction
+ * deleting the key has already committed. So we do this before
+ * re-locking the buffer; otherwise this would be prone to
+ * deadlocks.
+ *
+ * Note that the TID we're locking was grabbed before we unlocked
+ * the buffer. For it to change while we're not looking, the other
+ * properties we're testing for below after re-locking the buffer
+ * would also change, in which case we would restart this loop
+ * above.
*/
- if (!(tuple->t_data->t_infomask & HEAP_XMAX_SHARED_LOCK))
- goto l3;
- }
- else if (infomask & HEAP_XMAX_IS_MULTI)
- {
- /* wait for multixact to end */
- if (nowait)
+ if (!(infomask2 & HEAP_KEYS_UPDATED))
{
- if (!ConditionalMultiXactIdWait((MultiXactId) xwait))
- ereport(ERROR,
- (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
- errmsg("could not obtain lock on row in relation \"%s\"",
- RelationGetRelationName(relation))));
- }
- else
- MultiXactIdWait((MultiXactId) xwait);
+ bool updated;
+
+ updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
+
+ /*
+ * If there are updates, follow the update chain; bail out
+ * if that cannot be done.
+ */
+ if (follow_updates && updated)
+ {
+ HTSU_Result res;
+
+ res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ GetCurrentTransactionId(),
+ mode);
+ if (res != HeapTupleMayBeUpdated)
+ {
+ result = res;
+ /* recovery code expects to have buffer lock held */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ goto failed;
+ }
+ }
+
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * Make sure it's still an appropriate lock, else start over.
+ * Also, if it wasn't updated before we released the lock, but
+ * is updated now, we start over too; the reason is that we now
+ * need to follow the update chain to lock the new versions.
+ */
+ if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
+ ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
+ !updated))
+ goto l3;
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ /* Things look okay, so we can skip sleeping */
+ require_sleep = false;
+ /*
+ * Note we allow Xmax to change here; other updaters/lockers
+ * could have modified it before we grabbed the buffer lock.
+ * However, this is not a problem, because with the recheck we
+ * just did we ensure that they still don't conflict with the
+ * lock we want.
+ */
+ }
+ }
+ else if (mode == LockTupleShare)
+ {
/*
- * If xwait had just locked the tuple then some other xact could
- * update this tuple before we get to this point. Check for xmax
- * change, and start over if so.
+ * If we're requesting Share, we can similarly avoid sleeping if
+ * there's no update and no exclusive lock present.
*/
- if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
- xwait))
- goto l3;
+ if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
+ !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
+ {
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ /*
+ * Make sure it's still an appropriate lock, else start over.
+ * See above about allowing xmax to change.
+ */
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
+ HEAP_XMAX_IS_EXCL_LOCKED(tuple->t_data->t_infomask))
+ goto l3;
+ require_sleep = false;
+ }
+ }
+ else if (mode == LockTupleNoKeyExclusive)
+ {
/*
- * You might think the multixact is necessarily done here, but not
- * so: it could have surviving members, namely our own xact or
- * other subxacts of this backend. It is legal for us to lock the
- * tuple in either case, however. We don't bother changing the
- * on-disk hint bits since we are about to overwrite the xmax
- * altogether.
+ * If we're requesting NoKeyExclusive, we might also be able to
+ * avoid sleeping; just ensure that there's no other lock type than
+ * KeyShare. Note that this is a bit more involved than just
+ * checking hint bits -- we need to expand the multixact to figure
+ * out lock modes for each one (unless there was only one such
+ * locker).
*/
+ if (infomask & HEAP_XMAX_IS_MULTI)
+ {
+ int nmembers;
+ MultiXactMember *members;
+
+ /*
+ * We don't need to allow old multixacts here; if that had been
+ * the case, HeapTupleSatisfiesUpdate would have returned
+ * MayBeUpdated and we wouldn't be here.
+ */
+ nmembers = GetMultiXactIdMembers(xwait, &members, false);
+
+ if (nmembers <= 0)
+ {
+ /*
+ * No need to keep the previous xmax here. This is unlikely
+ * to happen.
+ */
+ require_sleep = false;
+ }
+ else
+ {
+ int i;
+ bool allowed = true;
+
+ for (i = 0; i < nmembers; i++)
+ {
+ if (members[i].status != MultiXactStatusForKeyShare)
+ {
+ allowed = false;
+ break;
+ }
+ }
+ if (allowed)
+ {
+ /*
+ * if the xmax changed under us in the meantime, start
+ * over.
+ */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ {
+ pfree(members);
+ goto l3;
+ }
+ /* otherwise, we're good */
+ require_sleep = false;
+ }
+
+ pfree(members);
+ }
+ }
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
+ {
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* if the xmax changed in the meantime, start over */
+ if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ goto l3;
+ /* otherwise, we're good */
+ require_sleep = false;
+ }
}
- else
+
+ /*
+ * By here, we either have already acquired the buffer exclusive lock,
+ * or we must wait for the locking transaction or multixact; so below
+ * we ensure that we grab buffer lock after the sleep.
+ */
+
+ if (require_sleep)
{
- /* wait for regular transaction to end */
- if (nowait)
+ if (infomask & HEAP_XMAX_IS_MULTI)
{
- if (!ConditionalXactLockTableWait(xwait))
- ereport(ERROR,
- (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
- errmsg("could not obtain lock on row in relation \"%s\"",
- RelationGetRelationName(relation))));
+ MultiXactStatus status = get_mxact_status_for_lock(mode, false);
+
+ /* We only ever lock tuples, never update them */
+ if (status >= MultiXactStatusNoKeyUpdate)
+ elog(ERROR, "invalid lock mode in heap_lock_tuple");
+
+ /* wait for multixact to end */
+ if (nowait)
+ {
+ if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
+ status, NULL, infomask))
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on row in relation \"%s\"",
+ RelationGetRelationName(relation))));
+ }
+ else
+ MultiXactIdWait((MultiXactId) xwait, status, NULL, infomask);
+
+ /* if there are updates, follow the update chain */
+ if (follow_updates &&
+ !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
+ {
+ HTSU_Result res;
+
+ res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ GetCurrentTransactionId(),
+ mode);
+ if (res != HeapTupleMayBeUpdated)
+ {
+ result = res;
+ /* recovery code expects to have buffer lock held */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ goto failed;
+ }
+ }
+
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * If xwait had just locked the tuple then some other xact
+ * could update this tuple before we get to this point. Check
+ * for xmax change, and start over if so.
+ */
+ if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ goto l3;
+
+ /*
+ * Of course, the multixact might not be done here: if we're
+ * requesting a light lock mode, other transactions with light
+ * locks could still be alive, as well as locks owned by our
+ * own xact or other subxacts of this backend. We need to
+ * preserve the surviving MultiXact members. Note that it
+ * isn't absolutely necessary in the latter case, but doing so
+ * is simpler.
+ */
}
else
- XactLockTableWait(xwait);
+ {
+ /* wait for regular transaction to end */
+ if (nowait)
+ {
+ if (!ConditionalXactLockTableWait(xwait))
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on row in relation \"%s\"",
+ RelationGetRelationName(relation))));
+ }
+ else
+ XactLockTableWait(xwait);
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ /* if there are updates, follow the update chain */
+ if (follow_updates &&
+ !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
+ {
+ HTSU_Result res;
+
+ res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ GetCurrentTransactionId(),
+ mode);
+ if (res != HeapTupleMayBeUpdated)
+ {
+ result = res;
+ /* recovery code expects to have buffer lock held */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ goto failed;
+ }
+ }
- /*
- * xwait is done, but if xwait had just locked the tuple then some
- * other xact could update this tuple before we get to this point.
- * Check for xmax change, and start over if so.
- */
- if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
- xwait))
- goto l3;
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
- /* Otherwise check if it committed or aborted */
- UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
+ /*
+ * xwait is done, but if xwait had just locked the tuple then
+ * some other xact could update this tuple before we get to
+ * this point. Check for xmax change, and start over if so.
+ */
+ if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ goto l3;
+
+ /*
+ * Otherwise check if it committed or aborted. Note we cannot
+ * be here if the tuple was only locked by somebody who didn't
+ * conflict with us; that should have been handled above. So
+ * that transaction must necessarily be gone by now.
+ */
+ UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
+ }
}
+ /* By here, we're certain that we hold buffer exclusive lock again */
+
/*
* We may lock if previous xmax aborted, or if it committed but only
- * locked the tuple without updating it. The case where we didn't
- * wait because we are joining an existing shared lock is correctly
- * handled, too.
+ * locked the tuple without updating it; or if we didn't have to wait
+ * at all for whatever reason.
*/
- if (tuple->t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED))
+ if (!require_sleep ||
+ (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
+ HeapTupleHeaderIsOnlyLocked(tuple->t_data))
result = HeapTupleMayBeUpdated;
else
result = HeapTupleUpdated;
}
+failed:
if (result != HeapTupleMayBeUpdated)
{
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
hufd->ctid = tuple->t_data->t_ctid;
- hufd->xmax = HeapTupleHeaderGetXmax(tuple->t_data);
+ hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
else
hufd->cmax = 0; /* for lack of an InvalidCommandId value */
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
+ UnlockTupleTuplock(relation, tid, mode);
return result;
}
+ xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ old_infomask = tuple->t_data->t_infomask;
+
/*
* We might already hold the desired lock (or stronger), possibly under a
* different subtransaction of the current top transaction. If so, there
@@ -3709,113 +4336,48 @@ l3:
* for cases where it is a plain TransactionId.
*
* Note in particular that this covers the case where we already hold
- * exclusive lock on the tuple and the caller only wants shared lock. It
- * would certainly not do to give up the exclusive lock.
+ * exclusive lock on the tuple and the caller only wants key share or share
+ * lock. It would certainly not do to give up the exclusive lock.
*/
- xmax = HeapTupleHeaderGetXmax(tuple->t_data);
- old_infomask = tuple->t_data->t_infomask;
-
if (!(old_infomask & (HEAP_XMAX_INVALID |
HEAP_XMAX_COMMITTED |
HEAP_XMAX_IS_MULTI)) &&
- (mode == LockTupleShared ?
- (old_infomask & HEAP_IS_LOCKED) :
- (old_infomask & HEAP_XMAX_EXCL_LOCK)) &&
+ (mode == LockTupleKeyShare ?
+ (HEAP_XMAX_IS_KEYSHR_LOCKED(old_infomask) ||
+ HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
+ HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
+ mode == LockTupleShare ?
+ (HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
+ HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
+ (HEAP_XMAX_IS_EXCL_LOCKED(old_infomask))) &&
TransactionIdIsCurrentTransactionId(xmax))
{
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
/* Probably can't hold tuple lock here, but may as well check */
if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
+ UnlockTupleTuplock(relation, tid, mode);
return HeapTupleMayBeUpdated;
}
/*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+
+ /*
* Compute the new xmax and infomask to store into the tuple. Note we do
* not modify the tuple just yet, because that would leave it in the wrong
* state if multixact.c elogs.
*/
- xid = GetCurrentTransactionId();
-
- new_infomask = old_infomask & ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
-
- if (mode == LockTupleShared)
- {
- /*
- * If this is the first acquisition of a shared lock in the current
- * transaction, set my per-backend OldestMemberMXactId setting. We can
- * be certain that the transaction will never become a member of any
- * older MultiXactIds than that. (We have to do this even if we end
- * up just using our own TransactionId below, since some other backend
- * could incorporate our XID into a MultiXact immediately afterwards.)
- */
- MultiXactIdSetOldestMember();
-
- new_infomask |= HEAP_XMAX_SHARED_LOCK;
-
- /*
- * Check to see if we need a MultiXactId because there are multiple
- * lockers.
- *
- * HeapTupleSatisfiesUpdate will have set the HEAP_XMAX_INVALID bit if
- * the xmax was a MultiXactId but it was not running anymore. There is
- * a race condition, which is that the MultiXactId may have finished
- * since then, but that uncommon case is handled within
- * MultiXactIdExpand.
- *
- * There is a similar race condition possible when the old xmax was a
- * regular TransactionId. We test TransactionIdIsInProgress again
- * just to narrow the window, but it's still possible to end up
- * creating an unnecessary MultiXactId. Fortunately this is harmless.
- */
- if (!(old_infomask & (HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED)))
- {
- if (old_infomask & HEAP_XMAX_IS_MULTI)
- {
- /*
- * If the XMAX is already a MultiXactId, then we need to
- * expand it to include our own TransactionId.
- */
- xid = MultiXactIdExpand((MultiXactId) xmax, xid);
- new_infomask |= HEAP_XMAX_IS_MULTI;
- }
- else if (TransactionIdIsInProgress(xmax))
- {
- /*
- * If the XMAX is a valid TransactionId, then we need to
- * create a new MultiXactId that includes both the old locker
- * and our own TransactionId.
- */
- xid = MultiXactIdCreate(xmax, xid);
- new_infomask |= HEAP_XMAX_IS_MULTI;
- }
- else
- {
- /*
- * Can get here iff HeapTupleSatisfiesUpdate saw the old xmax
- * as running, but it finished before
- * TransactionIdIsInProgress() got to run. Treat it like
- * there's no locker in the tuple.
- */
- }
- }
- else
- {
- /*
- * There was no previous locker, so just insert our own
- * TransactionId.
- */
- }
- }
- else
- {
- /* We want an exclusive lock on the tuple */
- new_infomask |= HEAP_XMAX_EXCL_LOCK;
- }
+ compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
+ GetCurrentTransactionId(), mode, false,
+ &xid, &new_infomask, &new_infomask2);
START_CRIT_SECTION();
@@ -3823,13 +4385,29 @@ l3:
* Store transaction information of xact locking the tuple.
*
* Note: Cmax is meaningless in this context, so don't set it; this avoids
- * possibly generating a useless combo CID.
+ * possibly generating a useless combo CID. Moreover, if we're locking a
+ * previously updated tuple, it's important to preserve the Cmax.
+ *
+ * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
+ * we would break the HOT chain.
*/
- tuple->t_data->t_infomask = new_infomask;
- HeapTupleHeaderClearHotUpdated(tuple->t_data);
+ tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
+ tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ tuple->t_data->t_infomask |= new_infomask;
+ tuple->t_data->t_infomask2 |= new_infomask2;
+ if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
+ HeapTupleHeaderClearHotUpdated(tuple->t_data);
HeapTupleHeaderSetXmax(tuple->t_data, xid);
- /* Make sure there is no forward chain link in t_ctid */
- tuple->t_data->t_ctid = *tid;
+
+ /*
+ * Make sure there is no forward chain link in t_ctid. Note that in the
+ * cases where the tuple has been updated, we must not overwrite t_ctid,
+ * because it was set by the updater. Moreover, if the tuple has been
+ * updated, we need to follow the update chain to lock the new versions
+ * of the tuple as well.
+ */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
+ tuple->t_data->t_ctid = *tid;
MarkBufferDirty(*buffer);
@@ -3854,8 +4432,8 @@ l3:
xlrec.target.node = relation->rd_node;
xlrec.target.tid = tuple->t_self;
xlrec.locking_xid = xid;
- xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
- xlrec.shared_lock = (mode == LockTupleShared);
+ xlrec.infobits_set = compute_infobits(new_infomask,
+ tuple->t_data->t_infomask2);
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapLock;
rdata[0].buffer = InvalidBuffer;
@@ -3887,8 +4465,469 @@ l3:
* release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
+ UnlockTupleTuplock(relation, tid, mode);
+
+ return HeapTupleMayBeUpdated;
+}
+
+
+/*
+ * Given an original set of Xmax and infomask, and a transaction (identified by
+ * add_to_xmax) acquiring a new lock of some mode, compute the new Xmax and
+ * corresponding infomasks to use on the tuple.
+ *
+ * Note that this might have side effects such as creating a new MultiXactId.
+ *
+ * Most callers will have called HeapTupleSatisfiesUpdate before this function;
+ * that will have set the HEAP_XMAX_INVALID bit if the xmax was a MultiXactId
+ * but it was not running anymore. There is a race condition, which is that the
+ * MultiXactId may have finished since then, but that uncommon case is handled
+ * either here, or within MultiXactIdExpand.
+ *
+ * There is a similar race condition possible when the old xmax was a regular
+ * TransactionId. We test TransactionIdIsInProgress again just to narrow the
+ * window, but it's still possible to end up creating an unnecessary
+ * MultiXactId. Fortunately this is harmless.
+ */
+static void
+compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask,
+ uint16 old_infomask2, TransactionId add_to_xmax,
+ LockTupleMode mode, bool is_update,
+ TransactionId *result_xmax, uint16 *result_infomask,
+ uint16 *result_infomask2)
+{
+ TransactionId new_xmax;
+ uint16 new_infomask,
+ new_infomask2;
+
+l5:
+ new_infomask = 0;
+ new_infomask2 = 0;
+ if (old_infomask & HEAP_XMAX_INVALID)
+ {
+ /*
+ * No previous locker; we just insert our own TransactionId.
+ */
+ if (is_update)
+ {
+ new_xmax = add_to_xmax;
+ if (mode == LockTupleExclusive)
+ new_infomask2 |= HEAP_KEYS_UPDATED;
+ }
+ else
+ {
+ new_infomask |= HEAP_XMAX_LOCK_ONLY;
+ switch (mode)
+ {
+ case LockTupleKeyShare:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_KEYSHR_LOCK;
+ break;
+ case LockTupleShare:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_SHR_LOCK;
+ break;
+ case LockTupleNoKeyExclusive:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_EXCL_LOCK;
+ break;
+ case LockTupleExclusive:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_EXCL_LOCK;
+ new_infomask2 |= HEAP_KEYS_UPDATED;
+ break;
+ default:
+ new_xmax = InvalidTransactionId; /* silence compiler */
+ elog(ERROR, "invalid lock mode");
+ }
+ }
+ }
+ else if (old_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ MultiXactStatus new_status;
+
+ /*
+ * Currently we don't allow XMAX_COMMITTED to be set for multis,
+ * so cross-check.
+ */
+ Assert(!(old_infomask & HEAP_XMAX_COMMITTED));
+
+ /*
+ * A multixact together with LOCK_ONLY set but neither lock bit set
+ * (i.e. a pg_upgraded share locked tuple) cannot possibly be running
+ * anymore. This check is critical for databases upgraded by
+ * pg_upgrade; both MultiXactIdIsRunning and MultiXactIdExpand assume
+ * that such multis are never passed.
+ */
+ if (!(old_infomask & HEAP_LOCK_MASK) &&
+ HEAP_XMAX_IS_LOCKED_ONLY(old_infomask))
+ {
+ old_infomask &= ~HEAP_XMAX_IS_MULTI;
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+
+ /*
+ * If the XMAX is already a MultiXactId, then we need to expand it to
+ * include add_to_xmax; but if all the members were lockers and are all
+ * gone, we can do away with the IS_MULTI bit and just set add_to_xmax
+ * as the only locker/updater. If all lockers are gone and we have an
+ * updater that aborted, we can also do without a multi.
+ *
+ * The cost of doing GetMultiXactIdMembers would be paid by
+ * MultiXactIdExpand if we weren't to do this, so this check is not
+ * incurring extra work anyhow.
+ */
+ if (!MultiXactIdIsRunning(xmax))
+ {
+ if (HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) ||
+ TransactionIdDidAbort(MultiXactIdGetUpdateXid(xmax,
+ old_infomask)))
+ {
+ /*
+ * Reset these bits and restart; otherwise fall through to
+ * create a new multi below.
+ */
+ old_infomask &= ~HEAP_XMAX_IS_MULTI;
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+ }
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+
+ new_xmax = MultiXactIdExpand((MultiXactId) xmax, add_to_xmax,
+ new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else if (old_infomask & HEAP_XMAX_COMMITTED)
+ {
+ /*
+ * It's a committed update, so we need to preserve him as updater of
+ * the tuple.
+ */
+ MultiXactStatus status;
+ MultiXactStatus new_status;
+
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusUpdate;
+ else
+ status = MultiXactStatusNoKeyUpdate;
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+ /*
+ * since it's not running, it's obviously impossible for the old
+ * updater to be identical to the current one, so we need not check
+ * for that case as we do in the block above.
+ */
+ new_xmax = MultiXactIdCreate(xmax, status, add_to_xmax, new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else if (TransactionIdIsInProgress(xmax))
+ {
+ /*
+ * If the XMAX is a valid, in-progress TransactionId, then we need to
+ * create a new MultiXactId that includes both the old locker or
+ * updater and our own TransactionId.
+ */
+ MultiXactStatus status;
+ MultiXactStatus new_status;
+
+ if (HEAP_XMAX_IS_LOCKED_ONLY(old_infomask))
+ {
+ if (HEAP_XMAX_IS_KEYSHR_LOCKED(old_infomask))
+ status = MultiXactStatusForKeyShare;
+ else if (HEAP_XMAX_IS_SHR_LOCKED(old_infomask))
+ status = MultiXactStatusForShare;
+ else if (HEAP_XMAX_IS_EXCL_LOCKED(old_infomask))
+ {
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusForUpdate;
+ else
+ status = MultiXactStatusForNoKeyUpdate;
+ }
+ else
+ {
+ /*
+ * LOCK_ONLY can be present alone only when a page has been
+ * upgraded by pg_upgrade. But in that case,
+ * TransactionIdIsInProgress() should have returned false. We
+ * assume it's no longer locked in this case.
+ */
+ elog(WARNING, "LOCK_ONLY found for Xid in progress %u", xmax);
+ old_infomask |= HEAP_XMAX_INVALID;
+ old_infomask &= ~HEAP_XMAX_LOCK_ONLY;
+ goto l5;
+ }
+ }
+ else
+ {
+ /* it's an update, but which kind? */
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusUpdate;
+ else
+ status = MultiXactStatusNoKeyUpdate;
+ }
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+
+ /*
+ * If the existing lock mode is identical to or weaker than the new
+ * one, we can act as though there is no existing lock, so set
+ * XMAX_INVALID and restart.
+ */
+ if (xmax == add_to_xmax)
+ {
+ LockTupleMode old_mode = TUPLOCK_from_mxstatus(status);
+ bool old_isupd = ISUPDATE_from_mxstatus(status);
+
+ /*
+ * We can do this if the new LockTupleMode is higher or equal than
+ * the old one; and if there was previously an update, we need an
+ * update, but if there wasn't, then we can accept there not being
+ * one.
+ */
+ if ((mode >= old_mode) && (is_update || !old_isupd))
+ {
+ /*
+ * Note that the infomask might contain some other dirty bits.
+ * However, since the new infomask is reset to zero, we only
+ * set what's minimally necessary, and that the case that
+ * checks HEAP_XMAX_INVALID is the very first above, there is
+ * no need for extra cleanup of the infomask here.
+ */
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+ }
+ new_xmax = MultiXactIdCreate(xmax, status, add_to_xmax, new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else if (!HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) &&
+ TransactionIdDidCommit(xmax))
+ {
+ /*
+ * It's a committed update, so we gotta preserve him as updater of the
+ * tuple.
+ */
+ MultiXactStatus status;
+ MultiXactStatus new_status;
+
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusUpdate;
+ else
+ status = MultiXactStatusNoKeyUpdate;
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+ /*
+ * since it's not running, it's obviously impossible for the old
+ * updater to be identical to the current one, so we need not check
+ * for that case as we do in the block above.
+ */
+ new_xmax = MultiXactIdCreate(xmax, status, add_to_xmax, new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else
+ {
+ /*
+ * Can get here iff the locking/updating transaction was running when
+ * the infomask was extracted from the tuple, but finished before
+ * TransactionIdIsInProgress got to run. Deal with it as if there was
+ * no locker at all in the first place.
+ */
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+
+ *result_infomask = new_infomask;
+ *result_infomask2 = new_infomask2;
+ *result_xmax = new_xmax;
+}
+
+
+/*
+ * Recursive part of heap_lock_updated_tuple
+ *
+ * Fetch the tuple pointed to by tid in rel, and mark it as locked by the given
+ * xid with the given mode; if this tuple is updated, recurse to lock the new
+ * version as well.
+ */
+static HTSU_Result
+heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid,
+ LockTupleMode mode)
+{
+ ItemPointerData tupid;
+ HeapTupleData mytup;
+ Buffer buf;
+ uint16 new_infomask,
+ new_infomask2,
+ old_infomask;
+ TransactionId xmax,
+ new_xmax;
+
+ ItemPointerCopy(tid, &tupid);
+ for (;;)
+ {
+ new_infomask = 0;
+ new_xmax = InvalidTransactionId;
+ ItemPointerCopy(&tupid, &(mytup.t_self));
+
+ if (!heap_fetch(rel, SnapshotAny, &mytup, &buf, false, NULL))
+ elog(ERROR, "unable to fetch updated version of tuple");
+
+l4:
+ CHECK_FOR_INTERRUPTS();
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+ old_infomask = mytup.t_data->t_infomask;
+ xmax = HeapTupleHeaderGetRawXmax(mytup.t_data);
+
+ /*
+ * If this tuple is updated and the key has been modified (or deleted),
+ * what we do depends on the status of the updating transaction: if
+ * it's live, we sleep until it finishes; if it has committed, we have
+ * to fail (i.e. return HeapTupleUpdated); if it aborted, we ignore it.
+ * For updates that didn't touch the key, we can just plough ahead.
+ */
+ if (!(old_infomask & HEAP_XMAX_INVALID) &&
+ (mytup.t_data->t_infomask2 & HEAP_KEYS_UPDATED))
+ {
+ TransactionId update_xid;
+
+ /*
+ * Note: we *must* check TransactionIdIsInProgress before
+ * TransactionIdDidAbort/Commit; see comment at top of tqual.c for
+ * an explanation.
+ */
+ update_xid = HeapTupleHeaderGetUpdateXid(mytup.t_data);
+ if (TransactionIdIsCurrentTransactionId(update_xid))
+ {
+ UnlockReleaseBuffer(buf);
+ return HeapTupleSelfUpdated;
+ }
+ else if (TransactionIdIsInProgress(update_xid))
+ {
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ /* No LockTupleTuplock here -- see heap_lock_updated_tuple */
+ XactLockTableWait(update_xid);
+ goto l4;
+ }
+ else if (TransactionIdDidAbort(update_xid))
+ ; /* okay to proceed */
+ else if (TransactionIdDidCommit(update_xid))
+ {
+ UnlockReleaseBuffer(buf);
+ return HeapTupleUpdated;
+ }
+ }
+
+ /* compute the new Xmax and infomask values for the tuple ... */
+ compute_new_xmax_infomask(xmax, old_infomask, mytup.t_data->t_infomask2,
+ xid, mode, false,
+ &new_xmax, &new_infomask, &new_infomask2);
+
+ START_CRIT_SECTION();
+
+ /* ... and set them */
+ HeapTupleHeaderSetXmax(mytup.t_data, new_xmax);
+ mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS;
+ mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ mytup.t_data->t_infomask |= new_infomask;
+ mytup.t_data->t_infomask2 |= new_infomask2;
+
+ MarkBufferDirty(buf);
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(rel))
+ {
+ xl_heap_lock_updated xlrec;
+ XLogRecPtr recptr;
+ XLogRecData rdata[2];
+ Page page = BufferGetPage(buf);
+
+ xlrec.target.node = rel->rd_node;
+ xlrec.target.tid = mytup.t_self;
+ xlrec.xmax = new_xmax;
+ xlrec.infobits_set = compute_infobits(new_infomask, new_infomask2);
+
+ rdata[0].data = (char *) &xlrec;
+ rdata[0].len = SizeOfHeapLockUpdated;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].next = &(rdata[1]);
+
+ rdata[1].data = NULL;
+ rdata[1].len = 0;
+ rdata[1].buffer = buf;
+ rdata[1].buffer_std = true;
+ rdata[1].next = NULL;
+
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, rdata);
+
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+
+ END_CRIT_SECTION();
+
+ /* if we find the end of update chain, we're done. */
+ if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
+ ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
+ HeapTupleHeaderIsOnlyLocked(mytup.t_data))
+ {
+ UnlockReleaseBuffer(buf);
+ return HeapTupleMayBeUpdated;
+ }
+
+ /* tail recursion */
+ ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid);
+ UnlockReleaseBuffer(buf);
+ }
+}
+
+/*
+ * heap_lock_updated_tuple
+ * Follow update chain when locking an updated tuple, acquiring locks (row
+ * marks) on the updated versions.
+ *
+ * The initial tuple is assumed to be already locked.
+ *
+ * This function doesn't check visibility, it just inconditionally marks the
+ * tuple(s) as locked. If any tuple in the updated chain is being deleted
+ * concurrently (or updated with the key being modified), sleep until the
+ * transaction doing it is finished.
+ *
+ * Note that we don't acquire heavyweight tuple locks on the tuples we walk
+ * when we have to wait for other transactions to release them, as opposed to
+ * what heap_lock_tuple does. The reason is that having more than one
+ * transaction walking the chain is probably uncommon enough that risk of
+ * starvation is not likely: one of the preconditions for being here is that
+ * the snapshot in use predates the update that created this tuple (because we
+ * started at an earlier version of the tuple), but at the same time such a
+ * transaction cannot be using repeatable read or serializable isolation
+ * levels, because that would lead to a serializability failure.
+ */
+static HTSU_Result
+heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
+ TransactionId xid, LockTupleMode mode)
+{
+ if (!ItemPointerEquals(&tuple->t_self, ctid))
+ {
+ /*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+
+ return heap_lock_updated_tuple_rec(rel, ctid, xid, mode);
+ }
+
+ /* nothing to lock */
return HeapTupleMayBeUpdated;
}
@@ -4010,6 +5049,9 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
* because this function is applied during WAL recovery, when we don't have
* access to any such state, and can't depend on the hint bits to be set.)
*
+ * Similarly, cutoff_multi must be less than or equal to the smallest
+ * MultiXactId used by any transaction currently open.
+ *
* If the tuple is in a shared buffer, caller must hold an exclusive lock on
* that buffer.
*
@@ -4023,7 +5065,8 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
* infomask bits.
*/
bool
-heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid)
+heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ MultiXactId cutoff_multi)
{
bool changed = false;
TransactionId xid;
@@ -4043,43 +5086,29 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid)
changed = true;
}
- if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+ /*
+ * Note that this code handles IS_MULTI Xmax values, too, but only to mark
+ * the tuple frozen if the updating Xid in the mxact is below the freeze
+ * cutoff; it doesn't remove dead members of a very old multixact.
+ */
+ xid = HeapTupleHeaderGetRawXmax(tuple);
+ if (TransactionIdIsNormal(xid) &&
+ (((!(tuple->t_infomask & HEAP_XMAX_IS_MULTI) &&
+ TransactionIdPrecedes(xid, cutoff_xid))) ||
+ MultiXactIdPrecedes(xid, cutoff_multi)))
{
- xid = HeapTupleHeaderGetXmax(tuple);
- if (TransactionIdIsNormal(xid) &&
- TransactionIdPrecedes(xid, cutoff_xid))
- {
- HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
+ HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
- /*
- * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED
- * + LOCKED. Normalize to INVALID just to be sure no one gets
- * confused.
- */
- tuple->t_infomask &= ~HEAP_XMAX_COMMITTED;
- tuple->t_infomask |= HEAP_XMAX_INVALID;
- HeapTupleHeaderClearHotUpdated(tuple);
- changed = true;
- }
- }
- else
- {
- /*----------
- * XXX perhaps someday we should zero out very old MultiXactIds here?
- *
- * The only way a stale MultiXactId could pose a problem is if a
- * tuple, having once been multiply-share-locked, is not touched by
- * any vacuum or attempted lock or deletion for just over 4G MultiXact
- * creations, and then in the probably-narrow window where its xmax
- * is again a live MultiXactId, someone tries to lock or delete it.
- * Even then, another share-lock attempt would work fine. An
- * exclusive-lock or delete attempt would face unexpected delay, or
- * in the very worst case get a deadlock error. This seems an
- * extremely low-probability scenario with minimal downside even if
- * it does happen, so for now we don't do the extra bookkeeping that
- * would be needed to clean out MultiXactIds.
- *----------
+ /*
+ * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED
+ * + LOCKED. Normalize to INVALID just to be sure no one gets
+ * confused. Also get rid of the HEAP_KEYS_UPDATED bit.
*/
+ tuple->t_infomask &= ~HEAP_XMAX_BITS;
+ tuple->t_infomask |= HEAP_XMAX_INVALID;
+ HeapTupleHeaderClearHotUpdated(tuple);
+ tuple->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ changed = true;
}
/*
@@ -4116,17 +5145,268 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid)
}
/*
+ * For a given MultiXactId, return the hint bits that should be set in the
+ * tuple's infomask.
+ *
+ * Normally this should be called for a multixact that was just created, and
+ * so is on our local cache, so the GetMembers call is fast.
+ */
+static void
+GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
+ uint16 *new_infomask2)
+{
+ int nmembers;
+ MultiXactMember *members;
+ int i;
+ uint16 bits = HEAP_XMAX_IS_MULTI;
+ uint16 bits2 = 0;
+ bool has_update = false;
+
+ /*
+ * We only use this in multis we just created, so they cannot be values
+ * pre-pg_upgrade.
+ */
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
+
+ for (i = 0; i < nmembers; i++)
+ {
+ switch (members[i].status)
+ {
+ case MultiXactStatusForKeyShare:
+ bits |= HEAP_XMAX_KEYSHR_LOCK;
+ break;
+ case MultiXactStatusForShare:
+ bits |= HEAP_XMAX_SHR_LOCK;
+ break;
+ case MultiXactStatusForNoKeyUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ break;
+ case MultiXactStatusForUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ bits2 |= HEAP_KEYS_UPDATED;
+ break;
+ case MultiXactStatusNoKeyUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ has_update = true;
+ break;
+ case MultiXactStatusUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ bits2 |= HEAP_KEYS_UPDATED;
+ has_update = true;
+ break;
+ }
+ }
+ if (!has_update)
+ bits |= HEAP_XMAX_LOCK_ONLY;
+
+ if (nmembers > 0)
+ pfree(members);
+
+ *new_infomask = bits;
+ *new_infomask2 = bits2;
+}
+
+/*
+ * MultiXactIdGetUpdateXid
+ *
+ * Given a multixact Xmax and corresponding infomask, which does not have the
+ * HEAP_XMAX_LOCK_ONLY bit set, obtain and return the Xid of the updating
+ * transaction.
+ */
+static TransactionId
+MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask)
+{
+ TransactionId update_xact = InvalidTransactionId;
+ MultiXactMember *members;
+ int nmembers;
+
+ Assert(!(t_infomask & HEAP_XMAX_LOCK_ONLY));
+ Assert(t_infomask & HEAP_XMAX_IS_MULTI);
+
+ /*
+ * Since we know the LOCK_ONLY bit is not set, this cannot be a
+ * multi from pre-pg_upgrade.
+ */
+ nmembers = GetMultiXactIdMembers(xmax, &members, false);
+
+ if (nmembers > 0)
+ {
+ int i;
+
+ for (i = 0; i < nmembers; i++)
+ {
+ /* Ignore lockers */
+ if (members[i].status == MultiXactStatusForKeyShare ||
+ members[i].status == MultiXactStatusForShare ||
+ members[i].status == MultiXactStatusForNoKeyUpdate ||
+ members[i].status == MultiXactStatusForUpdate)
+ continue;
+
+ /* ignore aborted transactions */
+ if (TransactionIdDidAbort(members[i].xid))
+ continue;
+ /* there should be at most one non-aborted updater */
+ Assert(update_xact == InvalidTransactionId);
+ Assert(members[i].status == MultiXactStatusNoKeyUpdate ||
+ members[i].status == MultiXactStatusUpdate);
+ update_xact = members[i].xid;
+#ifndef USE_ASSERT_CHECKING
+ /*
+ * in an assert-enabled build, walk the whole array to ensure
+ * there's no other updater.
+ */
+ break;
+#endif
+ }
+
+ pfree(members);
+ }
+
+ return update_xact;
+}
+
+/*
+ * HeapTupleGetUpdateXid
+ * As above, but use a HeapTupleHeader
+ *
+ * See also HeapTupleHeaderGetUpdateXid, which can be used without previously
+ * checking the hint bits.
+ */
+TransactionId
+HeapTupleGetUpdateXid(HeapTupleHeader tuple)
+{
+ return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple),
+ tuple->t_infomask);
+}
+
+/*
+ * Do_MultiXactIdWait
+ * Actual implementation for the two functions below.
+ *
+ * We do this by sleeping on each member using XactLockTableWait. Any
+ * members that belong to the current backend are *not* waited for, however;
+ * this would not merely be useless but would lead to Assert failure inside
+ * XactLockTableWait. By the time this returns, it is certain that all
+ * transactions *of other backends* that were members of the MultiXactId
+ * that conflict with the requested status are dead (and no new ones can have
+ * been added, since it is not legal to add members to an existing
+ * MultiXactId).
+ *
+ * But by the time we finish sleeping, someone else may have changed the Xmax
+ * of the containing tuple, so the caller needs to iterate on us somehow.
+ *
+ * Note that in case we return false, the number of remaining members is
+ * not to be trusted.
+ */
+static bool
+Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask, bool nowait)
+{
+ bool allow_old;
+ bool result = true;
+ MultiXactMember *members;
+ int nmembers;
+ int remain = 0;
+
+ allow_old = !(infomask & HEAP_LOCK_MASK) && HEAP_XMAX_IS_LOCKED_ONLY(infomask);
+ nmembers = GetMultiXactIdMembers(multi, &members, allow_old);
+
+ if (nmembers >= 0)
+ {
+ int i;
+
+ for (i = 0; i < nmembers; i++)
+ {
+ TransactionId memxid = members[i].xid;
+ MultiXactStatus memstatus = members[i].status;
+
+ if (TransactionIdIsCurrentTransactionId(memxid))
+ {
+ remain++;
+ continue;
+ }
+
+ if (!DoLockModesConflict(LOCKMODE_from_mxstatus(memstatus),
+ LOCKMODE_from_mxstatus(status)))
+ {
+ if (remaining && TransactionIdIsInProgress(memxid))
+ remain++;
+ continue;
+ }
+
+ /*
+ * This member conflicts with our multi, so we have to sleep (or
+ * return failure, if asked to avoid waiting.)
+ */
+ if (nowait)
+ {
+ result = ConditionalXactLockTableWait(memxid);
+ if (!result)
+ break;
+ }
+ else
+ XactLockTableWait(memxid);
+ }
+
+ pfree(members);
+ }
+
+ if (remaining)
+ *remaining = remain;
+
+ return result;
+}
+
+/*
+ * MultiXactIdWait
+ * Sleep on a MultiXactId.
+ *
+ * By the time we finish sleeping, someone else may have changed the Xmax
+ * of the containing tuple, so the caller needs to iterate on us somehow.
+ *
+ * We return (in *remaining, if not NULL) the number of members that are still
+ * running, including any (non-aborted) subtransactions of our own transaction.
+ *
+ */
+static void
+MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask)
+{
+ Do_MultiXactIdWait(multi, status, remaining, infomask, false);
+}
+
+/*
+ * ConditionalMultiXactIdWait
+ * As above, but only lock if we can get the lock without blocking.
+ *
+ * By the time we finish sleeping, someone else may have changed the Xmax
+ * of the containing tuple, so the caller needs to iterate on us somehow.
+ *
+ * If the multixact is now all gone, return true. Returns false if some
+ * transactions might still be running.
+ *
+ * We return (in *remaining, if not NULL) the number of members that are still
+ * running, including any (non-aborted) subtransactions of our own transaction.
+ */
+static bool
+ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask)
+{
+ return Do_MultiXactIdWait(multi, status, remaining, infomask, true);
+}
+
+/*
* heap_tuple_needs_freeze
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
- * are older than the specified cutoff XID. If so, return TRUE.
+ * are older than the specified cutoff XID or MultiXactId. If so, return TRUE.
*
* It doesn't matter whether the tuple is alive or dead, we are checking
* to see if a tuple needs to be removed or frozen to avoid wraparound.
*/
bool
heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
- Buffer buf)
+ MultiXactId cutoff_multi, Buffer buf)
{
TransactionId xid;
@@ -4135,12 +5415,23 @@ heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
TransactionIdPrecedes(xid, cutoff_xid))
return true;
- if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+ if (!(tuple->t_infomask & HEAP_XMAX_INVALID))
{
- xid = HeapTupleHeaderGetXmax(tuple);
- if (TransactionIdIsNormal(xid) &&
- TransactionIdPrecedes(xid, cutoff_xid))
- return true;
+ if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+ {
+ xid = HeapTupleHeaderGetRawXmax(tuple);
+ if (TransactionIdIsNormal(xid) &&
+ TransactionIdPrecedes(xid, cutoff_xid))
+ return true;
+ }
+ else
+ {
+ MultiXactId multi;
+
+ multi = HeapTupleHeaderGetRawXmax(tuple);
+ if (MultiXactIdPrecedes(multi, cutoff_multi))
+ return true;
+ }
}
if (tuple->t_infomask & HEAP_MOVED)
@@ -4231,7 +5522,7 @@ HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
TransactionId *latestRemovedXid)
{
TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
- TransactionId xmax = HeapTupleHeaderGetXmax(tuple);
+ TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple);
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
if (tuple->t_infomask & HEAP_MOVED)
@@ -4387,7 +5678,7 @@ log_heap_clean(Relation reln, Buffer buffer,
*/
XLogRecPtr
log_heap_freeze(Relation reln, Buffer buffer,
- TransactionId cutoff_xid,
+ TransactionId cutoff_xid, MultiXactId cutoff_multi,
OffsetNumber *offsets, int offcnt)
{
xl_heap_freeze xlrec;
@@ -4402,6 +5693,7 @@ log_heap_freeze(Relation reln, Buffer buffer,
xlrec.node = reln->rd_node;
xlrec.block = BufferGetBlockNumber(buffer);
xlrec.cutoff_xid = cutoff_xid;
+ xlrec.cutoff_multi = cutoff_multi;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapFreeze;
@@ -4463,8 +5755,8 @@ log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer,
* have modified the buffer(s) and marked them dirty.
*/
static XLogRecPtr
-log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
- Buffer newbuf, HeapTuple newtup,
+log_heap_update(Relation reln, Buffer oldbuf,
+ Buffer newbuf, HeapTuple oldtup, HeapTuple newtup,
bool all_visible_cleared, bool new_all_visible_cleared)
{
xl_heap_update xlrec;
@@ -4483,7 +5775,11 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
info = XLOG_HEAP_UPDATE;
xlrec.target.node = reln->rd_node;
- xlrec.target.tid = from;
+ xlrec.target.tid = oldtup->t_self;
+ xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2);
+ xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
xlrec.all_visible_cleared = all_visible_cleared;
xlrec.newtid = newtup->t_self;
xlrec.new_all_visible_cleared = new_all_visible_cleared;
@@ -4748,6 +6044,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
{
xl_heap_freeze *xlrec = (xl_heap_freeze *) XLogRecGetData(record);
TransactionId cutoff_xid = xlrec->cutoff_xid;
+ MultiXactId cutoff_multi = xlrec->cutoff_multi;
Buffer buffer;
Page page;
@@ -4790,7 +6087,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
ItemId lp = PageGetItemId(page, *offsets);
HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
- (void) heap_freeze_tuple(tuple, cutoff_xid);
+ (void) heap_freeze_tuple(tuple, cutoff_xid, cutoff_multi);
offsets++;
}
}
@@ -4937,6 +6234,33 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer);
}
+/*
+ * Given an "infobits" field from an XLog record, set the correct bits in the
+ * given infomask and infomask2 for the tuple touched by the record.
+ *
+ * (This is the reverse of compute_infobits).
+ */
+static void
+fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
+{
+ *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
+ HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
+ *infomask2 &= ~HEAP_KEYS_UPDATED;
+
+ if (infobits & XLHL_XMAX_IS_MULTI)
+ *infomask |= HEAP_XMAX_IS_MULTI;
+ if (infobits & XLHL_XMAX_LOCK_ONLY)
+ *infomask |= HEAP_XMAX_LOCK_ONLY;
+ if (infobits & XLHL_XMAX_EXCL_LOCK)
+ *infomask |= HEAP_XMAX_EXCL_LOCK;
+ /* note HEAP_XMAX_SHR_LOCK isn't considered here */
+ if (infobits & XLHL_XMAX_KEYSHR_LOCK)
+ *infomask |= HEAP_XMAX_KEYSHR_LOCK;
+
+ if (infobits & XLHL_KEYS_UPDATED)
+ *infomask2 |= HEAP_KEYS_UPDATED;
+}
+
static void
heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
{
@@ -4992,13 +6316,12 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
htup = (HeapTupleHeader) PageGetItem(page, lp);
- htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
HeapTupleHeaderClearHotUpdated(htup);
- HeapTupleHeaderSetXmax(htup, record->xl_xid);
+ fix_infomask_from_infobits(xlrec->infobits_set,
+ &htup->t_infomask, &htup->t_infomask2);
+ HeapTupleHeaderSetXmax(htup, xlrec->xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */
@@ -5368,16 +6691,15 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
htup = (HeapTupleHeader) PageGetItem(page, lp);
- htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
if (hot_update)
HeapTupleHeaderSetHotUpdated(htup);
else
HeapTupleHeaderClearHotUpdated(htup);
- HeapTupleHeaderSetXmax(htup, record->xl_xid);
+ fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
+ &htup->t_infomask2);
+ HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
@@ -5484,6 +6806,7 @@ newsame:;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
+ HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
@@ -5564,17 +6887,8 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
htup = (HeapTupleHeader) PageGetItem(page, lp);
- htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
- if (xlrec->xid_is_mxact)
- htup->t_infomask |= HEAP_XMAX_IS_MULTI;
- if (xlrec->shared_lock)
- htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
- else
- htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
+ fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
+ &htup->t_infomask2);
HeapTupleHeaderClearHotUpdated(htup);
HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
@@ -5587,6 +6901,56 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
}
static void
+heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
+{
+ xl_heap_lock_updated *xlrec =
+ (xl_heap_lock_updated *) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum;
+ ItemId lp = NULL;
+ HeapTupleHeader htup;
+
+ /* If we have a full-page image, restore it and we're done */
+ if (record->xl_info & XLR_BKP_BLOCK(0))
+ {
+ (void) RestoreBackupBlock(lsn, record, 0, false, false);
+ return;
+ }
+
+ buffer = XLogReadBuffer(xlrec->target.node,
+ ItemPointerGetBlockNumber(&(xlrec->target.tid)),
+ false);
+ if (!BufferIsValid(buffer))
+ return;
+ page = (Page) BufferGetPage(buffer);
+
+ if (lsn <= PageGetLSN(page)) /* changes are applied */
+ {
+ UnlockReleaseBuffer(buffer);
+ return;
+ }
+
+ offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ if (PageGetMaxOffsetNumber(page) >= offnum)
+ lp = PageGetItemId(page, offnum);
+
+ if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ elog(PANIC, "heap_xlog_lock_updated: invalid lp");
+
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
+ &htup->t_infomask2);
+ HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+}
+
+static void
heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
{
xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
@@ -5702,6 +7066,9 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
case XLOG_HEAP2_MULTI_INSERT:
heap_xlog_multi_insert(lsn, record);
break;
+ case XLOG_HEAP2_LOCK_UPDATED:
+ heap_xlog_lock_updated(lsn, record);
+ break;
default:
elog(PANIC, "heap2_redo: unknown op code %u", info);
}