aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-08-25 22:07:21 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-08-25 22:07:21 +0000
commit2ba05400242c31604c14a1d0276abeabc7d7ed77 (patch)
tree05477f5f8d56e3f28da62e254349d486ab035dc3 /src/backend/access
parentc9e69d71bda9a4e1b41bd2c08a2a57b70b97b334 (diff)
downloadpostgresql-2ba05400242c31604c14a1d0276abeabc7d7ed77.tar.gz
postgresql-2ba05400242c31604c14a1d0276abeabc7d7ed77.zip
Back-patch fixes for problems with VACUUM destroying t_ctid chains too soon,
and with insufficient paranoia in code that follows t_ctid links. This patch covers the 7.4 branch.
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/heap/heapam.c251
1 files changed, 172 insertions, 79 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index fe44363a929..5cb3a1b4b95 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.157.2.1 2004/10/13 22:22:02 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.157.2.2 2005/08/25 22:07:11 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -961,90 +961,136 @@ heap_fetch(Relation relation,
/*
* heap_get_latest_tid - get the latest tid of a specified tuple
+ *
+ * Actually, this gets the latest version that is visible according to
+ * the passed snapshot. You can pass SnapshotDirty to get the very latest,
+ * possibly uncommitted version.
+ *
+ * *tid is both an input and an output parameter: it is updated to
+ * show the latest version of the row. Note that it will not be changed
+ * if no version of the row passes the snapshot test.
*/
-ItemPointer
+void
heap_get_latest_tid(Relation relation,
Snapshot snapshot,
ItemPointer tid)
{
- ItemId lp = NULL;
- Buffer buffer;
- PageHeader dp;
- OffsetNumber offnum;
- HeapTupleData tp;
- HeapTupleHeader t_data;
+ BlockNumber blk;
ItemPointerData ctid;
- bool invalidBlock,
- linkend,
- valid;
+ TransactionId priorXmax;
+
+ /* this is to avoid Assert failures on bad input */
+ if (!ItemPointerIsValid(tid))
+ return;
/*
- * get the buffer from the relation descriptor Note that this does a
- * buffer pin.
+ * Since this can be called with user-supplied TID, don't trust the
+ * input too much. (RelationGetNumberOfBlocks is an expensive check,
+ * so we don't check t_ctid links again this way. Note that it would
+ * not do to call it just once and save the result, either.)
*/
-
- buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
-
- if (!BufferIsValid(buffer))
- elog(ERROR, "ReadBuffer(\"%s\", %lu) failed",
- RelationGetRelationName(relation),
- (unsigned long) ItemPointerGetBlockNumber(tid));
-
- LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ blk = ItemPointerGetBlockNumber(tid);
+ if (blk >= RelationGetNumberOfBlocks(relation))
+ elog(ERROR, "block number %u is out of range for relation \"%s\"",
+ blk, RelationGetRelationName(relation));
/*
- * get the item line pointer corresponding to the requested tid
+ * Loop to chase down t_ctid links. At top of loop, ctid is the
+ * tuple we need to examine, and *tid is the TID we will return if
+ * ctid turns out to be bogus.
+ *
+ * Note that we will loop until we reach the end of the t_ctid chain.
+ * Depending on the snapshot passed, there might be at most one visible
+ * version of the row, but we don't try to optimize for that.
*/
- dp = (PageHeader) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(tid);
- invalidBlock = true;
- if (!PageIsNew(dp))
- {
- lp = PageGetItemId(dp, offnum);
- if (ItemIdIsUsed(lp))
- invalidBlock = false;
- }
- if (invalidBlock)
+ ctid = *tid;
+ priorXmax = InvalidTransactionId; /* cannot check first XMIN */
+ for (;;)
{
- LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- ReleaseBuffer(buffer);
- return NULL;
- }
+ Buffer buffer;
+ PageHeader dp;
+ OffsetNumber offnum;
+ ItemId lp;
+ HeapTupleData tp;
+ bool valid;
- /*
- * more sanity checks
- */
+ /*
+ * Read, pin, and lock the page.
+ */
+ buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
- tp.t_datamcxt = NULL;
- t_data = tp.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
- tp.t_len = ItemIdGetLength(lp);
- tp.t_self = *tid;
- ctid = tp.t_data->t_ctid;
+ if (!BufferIsValid(buffer))
+ elog(ERROR, "ReadBuffer(\"%s\", %lu) failed",
+ RelationGetRelationName(relation),
+ (unsigned long) ItemPointerGetBlockNumber(&ctid));
- /*
- * check time qualification of tid
- */
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ dp = (PageHeader) BufferGetPage(buffer);
- HeapTupleSatisfies(&tp, relation, buffer, dp,
- snapshot, 0, (ScanKey) NULL, valid);
+ /*
+ * Check for bogus item number. This is not treated as an error
+ * condition because it can happen while following a t_ctid link.
+ * We just assume that the prior tid is OK and return it unchanged.
+ */
+ offnum = ItemPointerGetOffsetNumber(&ctid);
+ if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
+ lp = PageGetItemId(dp, offnum);
+ if (!ItemIdIsUsed(lp))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
- linkend = true;
- if ((t_data->t_infomask & HEAP_XMIN_COMMITTED) != 0 &&
- !ItemPointerEquals(tid, &ctid))
- linkend = false;
+ /* OK to access the tuple */
+ tp.t_self = ctid;
+ tp.t_datamcxt = NULL;
+ tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
+ tp.t_len = ItemIdGetLength(lp);
- LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- ReleaseBuffer(buffer);
+ /*
+ * After following a t_ctid link, we might arrive at an unrelated
+ * tuple. Check for XMIN match.
+ */
+ if (TransactionIdIsValid(priorXmax) &&
+ !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
- if (!valid)
- {
- if (linkend)
- return NULL;
- heap_get_latest_tid(relation, snapshot, &ctid);
- *tid = ctid;
- }
+ /*
+ * Check time qualification of tuple; if visible, set it as the new
+ * result candidate.
+ */
+ HeapTupleSatisfies(&tp, relation, buffer, dp,
+ snapshot, 0, NULL, valid);
+ if (valid)
+ *tid = ctid;
- return tid;
+ /*
+ * If there's a valid t_ctid link, follow it, else we're done.
+ */
+ if ((tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
+ HEAP_MARKED_FOR_UPDATE)) ||
+ ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
+
+ ctid = tp.t_data->t_ctid;
+ priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ } /* end of loop */
}
/*
@@ -1210,6 +1256,7 @@ simple_heap_insert(Relation relation, HeapTuple tup)
* relation - table to be modified
* tid - TID of tuple to be deleted
* ctid - output parameter, used only for failure case (see below)
+ * update_xmax - output parameter, used only for failure case (see below)
* cid - delete command ID to use in verifying tuple visibility
* crosscheck - if not SnapshotAny, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
@@ -1217,13 +1264,17 @@ simple_heap_insert(Relation relation, HeapTuple tup)
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we did delete it. Failure return codes are
* HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
- * (the last only possible if wait == false). On a failure return,
- * *ctid is set to the ctid link of the target tuple (possibly a later
- * version of the row).
+ * (the last only possible if wait == false).
+ *
+ * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
+ * If t_ctid is the same as tid, the tuple was deleted; if different, the
+ * tuple was updated, and t_ctid is the location of the replacement tuple.
+ * (t_xmax is needed to verify that the replacement tuple matches.)
*/
int
heap_delete(Relation relation, ItemPointer tid,
- ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait)
+ ItemPointer ctid, TransactionId *update_xmax,
+ CommandId cid, Snapshot crosscheck, bool wait)
{
ItemId lp;
HeapTupleData tp;
@@ -1243,11 +1294,11 @@ heap_delete(Relation relation, ItemPointer tid,
dp = (PageHeader) BufferGetPage(buffer);
lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
+
tp.t_datamcxt = NULL;
- tp.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+ tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
- tp.t_tableOid = relation->rd_id;
l1:
sv_infomask = tp.t_data->t_infomask;
@@ -1307,7 +1358,9 @@ l1:
Assert(result == HeapTupleSelfUpdated ||
result == HeapTupleUpdated ||
result == HeapTupleBeingUpdated);
+ Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
*ctid = tp.t_data->t_ctid;
+ *update_xmax = HeapTupleHeaderGetXmax(tp.t_data);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return result;
@@ -1397,11 +1450,12 @@ l1:
void
simple_heap_delete(Relation relation, ItemPointer tid)
{
- ItemPointerData ctid;
int result;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
result = heap_delete(relation, tid,
- &ctid,
+ &update_ctid, &update_xmax,
GetCurrentCommandId(), SnapshotAny,
true /* wait for commit */);
switch (result)
@@ -1435,6 +1489,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
* otid - TID of old tuple to be replaced
* newtup - newly constructed tuple data to store
* ctid - output parameter, used only for failure case (see below)
+ * update_xmax - output parameter, used only for failure case (see below)
* cid - update command ID to use in verifying old tuple visibility
* crosscheck - if not SnapshotAny, also check old tuple against this
* wait - true if should wait for any conflicting update to commit/abort
@@ -1442,15 +1497,20 @@ simple_heap_delete(Relation relation, ItemPointer tid)
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we *did* update it. Failure return codes are
* HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
- * (the last only possible if wait == false). On a failure return,
- * *ctid is set to the ctid link of the old tuple (possibly a later
- * version of the row).
+ * (the last only possible if wait == false).
+ *
* On success, newtup->t_self is set to the TID where the new tuple
* was inserted.
+ *
+ * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
+ * If t_ctid is the same as otid, the tuple was deleted; if different, the
+ * tuple was updated, and t_ctid is the location of the replacement tuple.
+ * (t_xmax is needed to verify that the replacement tuple matches.)
*/
int
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
- ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait)
+ ItemPointer ctid, TransactionId *update_xmax,
+ CommandId cid, Snapshot crosscheck, bool wait)
{
ItemId lp;
HeapTupleData oldtup;
@@ -1544,7 +1604,9 @@ l2:
Assert(result == HeapTupleSelfUpdated ||
result == HeapTupleUpdated ||
result == HeapTupleBeingUpdated);
+ Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
*ctid = oldtup.t_data->t_ctid;
+ *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return result;
@@ -1764,11 +1826,12 @@ l2:
void
simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
{
- ItemPointerData ctid;
int result;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
result = heap_update(relation, otid, tup,
- &ctid,
+ &update_ctid, &update_xmax,
GetCurrentCommandId(), SnapshotAny,
true /* wait for commit */);
switch (result)
@@ -1794,9 +1857,34 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
/*
* heap_mark4update - mark a tuple for update
+ *
+ * Note that this acquires a buffer pin, which the caller must release.
+ *
+ * Input parameters:
+ * relation: relation containing tuple (caller must hold suitable lock)
+ * tuple->t_self: TID of tuple to lock (rest of struct need not be valid)
+ * cid: current command ID (used for visibility test, and stored into
+ * tuple's cmax if lock is successful)
+ *
+ * Output parameters:
+ * *tuple: all fields filled in
+ * *buffer: set to buffer holding tuple (pinned but not locked at exit)
+ * *ctid: set to tuple's t_ctid, but only in failure cases
+ * *update_xmax: set to tuple's xmax, but only in failure cases
+ *
+ * Function result may be:
+ * HeapTupleMayBeUpdated: lock was successfully acquired
+ * HeapTupleSelfUpdated: lock failed because tuple updated by self
+ * HeapTupleUpdated: lock failed because tuple updated by other xact
+ *
+ * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
+ * If t_ctid is the same as t_self, the tuple was deleted; if different, the
+ * tuple was updated, and t_ctid is the location of the replacement tuple.
+ * (t_xmax is needed to verify that the replacement tuple matches.)
*/
int
heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer,
+ ItemPointer ctid, TransactionId *update_xmax,
CommandId cid)
{
ItemPointer tid = &(tuple->t_self);
@@ -1814,9 +1902,12 @@ heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer,
dp = (PageHeader) BufferGetPage(*buffer);
lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
+ Assert(ItemIdIsUsed(lp));
+
tuple->t_datamcxt = NULL;
tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
tuple->t_len = ItemIdGetLength(lp);
+ tuple->t_tableOid = RelationGetRelid(relation);
l3:
sv_infomask = tuple->t_data->t_infomask;
@@ -1863,7 +1954,9 @@ l3:
if (result != HeapTupleMayBeUpdated)
{
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
- tuple->t_self = tuple->t_data->t_ctid;
+ Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
+ *ctid = tuple->t_data->t_ctid;
+ *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
return result;
}