diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2005-08-25 22:07:21 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2005-08-25 22:07:21 +0000 |
commit | 2ba05400242c31604c14a1d0276abeabc7d7ed77 (patch) | |
tree | 05477f5f8d56e3f28da62e254349d486ab035dc3 /src/backend/access | |
parent | c9e69d71bda9a4e1b41bd2c08a2a57b70b97b334 (diff) | |
download | postgresql-2ba05400242c31604c14a1d0276abeabc7d7ed77.tar.gz postgresql-2ba05400242c31604c14a1d0276abeabc7d7ed77.zip |
Back-patch fixes for problems with VACUUM destroying t_ctid chains too soon,
and with insufficient paranoia in code that follows t_ctid links.
This patch covers the 7.4 branch.
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/heap/heapam.c | 251 |
1 files changed, 172 insertions, 79 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index fe44363a929..5cb3a1b4b95 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.157.2.1 2004/10/13 22:22:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.157.2.2 2005/08/25 22:07:11 tgl Exp $ * * * INTERFACE ROUTINES @@ -961,90 +961,136 @@ heap_fetch(Relation relation, /* * heap_get_latest_tid - get the latest tid of a specified tuple + * + * Actually, this gets the latest version that is visible according to + * the passed snapshot. You can pass SnapshotDirty to get the very latest, + * possibly uncommitted version. + * + * *tid is both an input and an output parameter: it is updated to + * show the latest version of the row. Note that it will not be changed + * if no version of the row passes the snapshot test. */ -ItemPointer +void heap_get_latest_tid(Relation relation, Snapshot snapshot, ItemPointer tid) { - ItemId lp = NULL; - Buffer buffer; - PageHeader dp; - OffsetNumber offnum; - HeapTupleData tp; - HeapTupleHeader t_data; + BlockNumber blk; ItemPointerData ctid; - bool invalidBlock, - linkend, - valid; + TransactionId priorXmax; + + /* this is to avoid Assert failures on bad input */ + if (!ItemPointerIsValid(tid)) + return; /* - * get the buffer from the relation descriptor Note that this does a - * buffer pin. + * Since this can be called with user-supplied TID, don't trust the + * input too much. (RelationGetNumberOfBlocks is an expensive check, + * so we don't check t_ctid links again this way. Note that it would + * not do to call it just once and save the result, either.) */ - - buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); - - if (!BufferIsValid(buffer)) - elog(ERROR, "ReadBuffer(\"%s\", %lu) failed", - RelationGetRelationName(relation), - (unsigned long) ItemPointerGetBlockNumber(tid)); - - LockBuffer(buffer, BUFFER_LOCK_SHARE); + blk = ItemPointerGetBlockNumber(tid); + if (blk >= RelationGetNumberOfBlocks(relation)) + elog(ERROR, "block number %u is out of range for relation \"%s\"", + blk, RelationGetRelationName(relation)); /* - * get the item line pointer corresponding to the requested tid + * Loop to chase down t_ctid links. At top of loop, ctid is the + * tuple we need to examine, and *tid is the TID we will return if + * ctid turns out to be bogus. + * + * Note that we will loop until we reach the end of the t_ctid chain. + * Depending on the snapshot passed, there might be at most one visible + * version of the row, but we don't try to optimize for that. */ - dp = (PageHeader) BufferGetPage(buffer); - offnum = ItemPointerGetOffsetNumber(tid); - invalidBlock = true; - if (!PageIsNew(dp)) - { - lp = PageGetItemId(dp, offnum); - if (ItemIdIsUsed(lp)) - invalidBlock = false; - } - if (invalidBlock) + ctid = *tid; + priorXmax = InvalidTransactionId; /* cannot check first XMIN */ + for (;;) { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return NULL; - } + Buffer buffer; + PageHeader dp; + OffsetNumber offnum; + ItemId lp; + HeapTupleData tp; + bool valid; - /* - * more sanity checks - */ + /* + * Read, pin, and lock the page. + */ + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid)); - tp.t_datamcxt = NULL; - t_data = tp.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); - tp.t_len = ItemIdGetLength(lp); - tp.t_self = *tid; - ctid = tp.t_data->t_ctid; + if (!BufferIsValid(buffer)) + elog(ERROR, "ReadBuffer(\"%s\", %lu) failed", + RelationGetRelationName(relation), + (unsigned long) ItemPointerGetBlockNumber(&ctid)); - /* - * check time qualification of tid - */ + LockBuffer(buffer, BUFFER_LOCK_SHARE); + dp = (PageHeader) BufferGetPage(buffer); - HeapTupleSatisfies(&tp, relation, buffer, dp, - snapshot, 0, (ScanKey) NULL, valid); + /* + * Check for bogus item number. This is not treated as an error + * condition because it can happen while following a t_ctid link. + * We just assume that the prior tid is OK and return it unchanged. + */ + offnum = ItemPointerGetOffsetNumber(&ctid); + if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) + { + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + break; + } + lp = PageGetItemId(dp, offnum); + if (!ItemIdIsUsed(lp)) + { + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + break; + } - linkend = true; - if ((t_data->t_infomask & HEAP_XMIN_COMMITTED) != 0 && - !ItemPointerEquals(tid, &ctid)) - linkend = false; + /* OK to access the tuple */ + tp.t_self = ctid; + tp.t_datamcxt = NULL; + tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp); + tp.t_len = ItemIdGetLength(lp); - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); + /* + * After following a t_ctid link, we might arrive at an unrelated + * tuple. Check for XMIN match. + */ + if (TransactionIdIsValid(priorXmax) && + !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) + { + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + break; + } - if (!valid) - { - if (linkend) - return NULL; - heap_get_latest_tid(relation, snapshot, &ctid); - *tid = ctid; - } + /* + * Check time qualification of tuple; if visible, set it as the new + * result candidate. + */ + HeapTupleSatisfies(&tp, relation, buffer, dp, + snapshot, 0, NULL, valid); + if (valid) + *tid = ctid; - return tid; + /* + * If there's a valid t_ctid link, follow it, else we're done. + */ + if ((tp.t_data->t_infomask & (HEAP_XMAX_INVALID | + HEAP_MARKED_FOR_UPDATE)) || + ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) + { + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + break; + } + + ctid = tp.t_data->t_ctid; + priorXmax = HeapTupleHeaderGetXmax(tp.t_data); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + } /* end of loop */ } /* @@ -1210,6 +1256,7 @@ simple_heap_insert(Relation relation, HeapTuple tup) * relation - table to be modified * tid - TID of tuple to be deleted * ctid - output parameter, used only for failure case (see below) + * update_xmax - output parameter, used only for failure case (see below) * cid - delete command ID to use in verifying tuple visibility * crosscheck - if not SnapshotAny, also check tuple against this * wait - true if should wait for any conflicting update to commit/abort @@ -1217,13 +1264,17 @@ simple_heap_insert(Relation relation, HeapTuple tup) * Normal, successful return value is HeapTupleMayBeUpdated, which * actually means we did delete it. Failure return codes are * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated - * (the last only possible if wait == false). On a failure return, - * *ctid is set to the ctid link of the target tuple (possibly a later - * version of the row). + * (the last only possible if wait == false). + * + * In the failure cases, the routine returns the tuple's t_ctid and t_xmax. + * If t_ctid is the same as tid, the tuple was deleted; if different, the + * tuple was updated, and t_ctid is the location of the replacement tuple. + * (t_xmax is needed to verify that the replacement tuple matches.) */ int heap_delete(Relation relation, ItemPointer tid, - ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait) + ItemPointer ctid, TransactionId *update_xmax, + CommandId cid, Snapshot crosscheck, bool wait) { ItemId lp; HeapTupleData tp; @@ -1243,11 +1294,11 @@ heap_delete(Relation relation, ItemPointer tid, dp = (PageHeader) BufferGetPage(buffer); lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid)); + tp.t_datamcxt = NULL; - tp.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; - tp.t_tableOid = relation->rd_id; l1: sv_infomask = tp.t_data->t_infomask; @@ -1307,7 +1358,9 @@ l1: Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || result == HeapTupleBeingUpdated); + Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID)); *ctid = tp.t_data->t_ctid; + *update_xmax = HeapTupleHeaderGetXmax(tp.t_data); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); return result; @@ -1397,11 +1450,12 @@ l1: void simple_heap_delete(Relation relation, ItemPointer tid) { - ItemPointerData ctid; int result; + ItemPointerData update_ctid; + TransactionId update_xmax; result = heap_delete(relation, tid, - &ctid, + &update_ctid, &update_xmax, GetCurrentCommandId(), SnapshotAny, true /* wait for commit */); switch (result) @@ -1435,6 +1489,7 @@ simple_heap_delete(Relation relation, ItemPointer tid) * otid - TID of old tuple to be replaced * newtup - newly constructed tuple data to store * ctid - output parameter, used only for failure case (see below) + * update_xmax - output parameter, used only for failure case (see below) * cid - update command ID to use in verifying old tuple visibility * crosscheck - if not SnapshotAny, also check old tuple against this * wait - true if should wait for any conflicting update to commit/abort @@ -1442,15 +1497,20 @@ simple_heap_delete(Relation relation, ItemPointer tid) * Normal, successful return value is HeapTupleMayBeUpdated, which * actually means we *did* update it. Failure return codes are * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated - * (the last only possible if wait == false). On a failure return, - * *ctid is set to the ctid link of the old tuple (possibly a later - * version of the row). + * (the last only possible if wait == false). + * * On success, newtup->t_self is set to the TID where the new tuple * was inserted. + * + * In the failure cases, the routine returns the tuple's t_ctid and t_xmax. + * If t_ctid is the same as otid, the tuple was deleted; if different, the + * tuple was updated, and t_ctid is the location of the replacement tuple. + * (t_xmax is needed to verify that the replacement tuple matches.) */ int heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, - ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait) + ItemPointer ctid, TransactionId *update_xmax, + CommandId cid, Snapshot crosscheck, bool wait) { ItemId lp; HeapTupleData oldtup; @@ -1544,7 +1604,9 @@ l2: Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || result == HeapTupleBeingUpdated); + Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); *ctid = oldtup.t_data->t_ctid; + *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); return result; @@ -1764,11 +1826,12 @@ l2: void simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) { - ItemPointerData ctid; int result; + ItemPointerData update_ctid; + TransactionId update_xmax; result = heap_update(relation, otid, tup, - &ctid, + &update_ctid, &update_xmax, GetCurrentCommandId(), SnapshotAny, true /* wait for commit */); switch (result) @@ -1794,9 +1857,34 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) /* * heap_mark4update - mark a tuple for update + * + * Note that this acquires a buffer pin, which the caller must release. + * + * Input parameters: + * relation: relation containing tuple (caller must hold suitable lock) + * tuple->t_self: TID of tuple to lock (rest of struct need not be valid) + * cid: current command ID (used for visibility test, and stored into + * tuple's cmax if lock is successful) + * + * Output parameters: + * *tuple: all fields filled in + * *buffer: set to buffer holding tuple (pinned but not locked at exit) + * *ctid: set to tuple's t_ctid, but only in failure cases + * *update_xmax: set to tuple's xmax, but only in failure cases + * + * Function result may be: + * HeapTupleMayBeUpdated: lock was successfully acquired + * HeapTupleSelfUpdated: lock failed because tuple updated by self + * HeapTupleUpdated: lock failed because tuple updated by other xact + * + * In the failure cases, the routine returns the tuple's t_ctid and t_xmax. + * If t_ctid is the same as t_self, the tuple was deleted; if different, the + * tuple was updated, and t_ctid is the location of the replacement tuple. + * (t_xmax is needed to verify that the replacement tuple matches.) */ int heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer, + ItemPointer ctid, TransactionId *update_xmax, CommandId cid) { ItemPointer tid = &(tuple->t_self); @@ -1814,9 +1902,12 @@ heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer, dp = (PageHeader) BufferGetPage(*buffer); lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid)); + Assert(ItemIdIsUsed(lp)); + tuple->t_datamcxt = NULL; tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); tuple->t_len = ItemIdGetLength(lp); + tuple->t_tableOid = RelationGetRelid(relation); l3: sv_infomask = tuple->t_data->t_infomask; @@ -1863,7 +1954,9 @@ l3: if (result != HeapTupleMayBeUpdated) { Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated); - tuple->t_self = tuple->t_data->t_ctid; + Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)); + *ctid = tuple->t_data->t_ctid; + *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data); LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); return result; } |