diff options
author | Robert Haas <rhaas@postgresql.org> | 2020-10-22 08:44:18 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2020-10-22 08:44:18 -0400 |
commit | 866e24d47db1743dfcff5bd595b57e3a143f2cb1 (patch) | |
tree | 8a0ed0502ebba60526dcb77f13522c6cae4684e9 /contrib/amcheck/verify_heapam.c | |
parent | f8721bd752790859df747905bc44fb5ad8dbf07d (diff) | |
download | postgresql-866e24d47db1743dfcff5bd595b57e3a143f2cb1.tar.gz postgresql-866e24d47db1743dfcff5bd595b57e3a143f2cb1.zip |
Extend amcheck to check heap pages.
Mark Dilger, reviewed by Peter Geoghegan, Andres Freund, Álvaro Herrera,
Michael Paquier, Amul Sul, and by me. Some last-minute cosmetic
revisions by me.
Discussion: http://postgr.es/m/12ED3DA8-25F0-4B68-937D-D907CFBF08E7@enterprisedb.com
Diffstat (limited to 'contrib/amcheck/verify_heapam.c')
-rw-r--r-- | contrib/amcheck/verify_heapam.c | 1447 |
1 files changed, 1447 insertions, 0 deletions
diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c new file mode 100644 index 00000000000..0156c1e74aa --- /dev/null +++ b/contrib/amcheck/verify_heapam.c @@ -0,0 +1,1447 @@ +/*------------------------------------------------------------------------- + * + * verify_heapam.c + * Functions to check postgresql heap relations for corruption + * + * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * + * contrib/amcheck/verify_heapam.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/detoast.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "access/heaptoast.h" +#include "access/multixact.h" +#include "access/toast_internals.h" +#include "access/visibilitymap.h" +#include "catalog/pg_am.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/procarray.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" + +PG_FUNCTION_INFO_V1(verify_heapam); + +/* The number of columns in tuples returned by verify_heapam */ +#define HEAPCHECK_RELATION_COLS 4 + +/* + * Despite the name, we use this for reporting problems with both XIDs and + * MXIDs. + */ +typedef enum XidBoundsViolation +{ + XID_INVALID, + XID_IN_FUTURE, + XID_PRECEDES_CLUSTERMIN, + XID_PRECEDES_RELMIN, + XID_BOUNDS_OK +} XidBoundsViolation; + +typedef enum XidCommitStatus +{ + XID_COMMITTED, + XID_IN_PROGRESS, + XID_ABORTED +} XidCommitStatus; + +typedef enum SkipPages +{ + SKIP_PAGES_ALL_FROZEN, + SKIP_PAGES_ALL_VISIBLE, + SKIP_PAGES_NONE +} SkipPages; + +/* + * Struct holding the running context information during + * a lifetime of a verify_heapam execution. + */ +typedef struct HeapCheckContext +{ + /* + * Cached copies of values from ShmemVariableCache and computed values + * from them. + */ + FullTransactionId next_fxid; /* ShmemVariableCache->nextXid */ + TransactionId next_xid; /* 32-bit version of next_fxid */ + TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */ + FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed + * relative to next_fxid */ + + /* + * Cached copy of value from MultiXactState + */ + MultiXactId next_mxact; /* MultiXactState->nextMXact */ + MultiXactId oldest_mxact; /* MultiXactState->oldestMultiXactId */ + + /* + * Cached copies of the most recently checked xid and its status. + */ + TransactionId cached_xid; + XidCommitStatus cached_status; + + /* Values concerning the heap relation being checked */ + Relation rel; + TransactionId relfrozenxid; + FullTransactionId relfrozenfxid; + TransactionId relminmxid; + Relation toast_rel; + Relation *toast_indexes; + Relation valid_toast_index; + int num_toast_indexes; + + /* Values for iterating over pages in the relation */ + BlockNumber blkno; + BufferAccessStrategy bstrategy; + Buffer buffer; + Page page; + + /* Values for iterating over tuples within a page */ + OffsetNumber offnum; + ItemId itemid; + uint16 lp_len; + HeapTupleHeader tuphdr; + int natts; + + /* Values for iterating over attributes within the tuple */ + uint32 offset; /* offset in tuple data */ + AttrNumber attnum; + + /* Values for iterating over toast for the attribute */ + int32 chunkno; + int32 attrsize; + int32 endchunk; + int32 totalchunks; + + /* Whether verify_heapam has yet encountered any corrupt tuples */ + bool is_corrupt; + + /* The descriptor and tuplestore for verify_heapam's result tuples */ + TupleDesc tupdesc; + Tuplestorestate *tupstore; +} HeapCheckContext; + +/* Internal implementation */ +static void sanity_check_relation(Relation rel); +static void check_tuple(HeapCheckContext *ctx); +static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx); + +static bool check_tuple_attribute(HeapCheckContext *ctx); +static bool check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, + HeapCheckContext *ctx); + +static void report_corruption(HeapCheckContext *ctx, char *msg); +static TupleDesc verify_heapam_tupdesc(void); +static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, + const HeapCheckContext *ctx); +static void update_cached_xid_range(HeapCheckContext *ctx); +static void update_cached_mxid_range(HeapCheckContext *ctx); +static XidBoundsViolation check_mxid_in_range(MultiXactId mxid, + HeapCheckContext *ctx); +static XidBoundsViolation check_mxid_valid_in_rel(MultiXactId mxid, + HeapCheckContext *ctx); +static XidBoundsViolation get_xid_status(TransactionId xid, + HeapCheckContext *ctx, + XidCommitStatus *status); + +/* + * Scan and report corruption in heap pages, optionally reconciling toasted + * attributes with entries in the associated toast table. Intended to be + * called from SQL with the following parameters: + * + * relation: + * The Oid of the heap relation to be checked. + * + * on_error_stop: + * Whether to stop at the end of the first page for which errors are + * detected. Note that multiple rows may be returned. + * + * check_toast: + * Whether to check each toasted attribute against the toast table to + * verify that it can be found there. + * + * skip: + * What kinds of pages in the heap relation should be skipped. Valid + * options are "all-visible", "all-frozen", and "none". + * + * Returns to the SQL caller a set of tuples, each containing the location + * and a description of a corruption found in the heap. + * + * This code goes to some trouble to avoid crashing the server even if the + * table pages are badly corrupted, but it's probably not perfect. If + * check_toast is true, we'll use regular index lookups to try to fetch TOAST + * tuples, which can certainly cause crashes if the right kind of corruption + * exists in the toast table or index. No matter what parameters you pass, + * we can't protect against crashes that might occur trying to look up the + * commit status of transaction IDs (though we avoid trying to do such lookups + * for transaction IDs that can't legally appear in the table). + */ +Datum +verify_heapam(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + MemoryContext old_context; + bool random_access; + HeapCheckContext ctx; + Buffer vmbuffer = InvalidBuffer; + Oid relid; + bool on_error_stop; + bool check_toast; + SkipPages skip_option = SKIP_PAGES_NONE; + BlockNumber first_block; + BlockNumber last_block; + BlockNumber nblocks; + const char *skip; + + /* Check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Check supplied arguments */ + if (PG_ARGISNULL(0)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relation cannot be null"))); + relid = PG_GETARG_OID(0); + + if (PG_ARGISNULL(1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("on_error_stop cannot be null"))); + on_error_stop = PG_GETARG_BOOL(1); + + if (PG_ARGISNULL(2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("check_toast cannot be null"))); + check_toast = PG_GETARG_BOOL(2); + + if (PG_ARGISNULL(3)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("skip cannot be null"))); + skip = text_to_cstring(PG_GETARG_TEXT_PP(3)); + if (pg_strcasecmp(skip, "all-visible") == 0) + skip_option = SKIP_PAGES_ALL_VISIBLE; + else if (pg_strcasecmp(skip, "all-frozen") == 0) + skip_option = SKIP_PAGES_ALL_FROZEN; + else if (pg_strcasecmp(skip, "none") == 0) + skip_option = SKIP_PAGES_NONE; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid skip option"), + errhint("Valid skip options are \"all-visible\", \"all-frozen\", and \"none\"."))); + + memset(&ctx, 0, sizeof(HeapCheckContext)); + ctx.cached_xid = InvalidTransactionId; + + /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */ + old_context = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + random_access = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0; + ctx.tupdesc = verify_heapam_tupdesc(); + ctx.tupstore = tuplestore_begin_heap(random_access, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = ctx.tupstore; + rsinfo->setDesc = ctx.tupdesc; + MemoryContextSwitchTo(old_context); + + /* Open relation, check relkind and access method, and check privileges */ + ctx.rel = relation_open(relid, AccessShareLock); + sanity_check_relation(ctx.rel); + + /* Early exit if the relation is empty */ + nblocks = RelationGetNumberOfBlocks(ctx.rel); + if (!nblocks) + { + relation_close(ctx.rel, AccessShareLock); + PG_RETURN_NULL(); + } + + ctx.bstrategy = GetAccessStrategy(BAS_BULKREAD); + ctx.buffer = InvalidBuffer; + ctx.page = NULL; + + /* Validate block numbers, or handle nulls. */ + if (PG_ARGISNULL(4)) + first_block = 0; + else + { + int64 fb = PG_GETARG_INT64(4); + + if (fb < 0 || fb >= nblocks) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("starting block number must be between 0 and %u", + nblocks - 1))); + first_block = (BlockNumber) fb; + } + if (PG_ARGISNULL(5)) + last_block = nblocks - 1; + else + { + int64 lb = PG_GETARG_INT64(5); + + if (lb < 0 || lb >= nblocks) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ending block number must be between 0 and %u", + nblocks - 1))); + last_block = (BlockNumber) lb; + } + + /* Optionally open the toast relation, if any. */ + if (ctx.rel->rd_rel->reltoastrelid && check_toast) + { + int offset; + + /* Main relation has associated toast relation */ + ctx.toast_rel = table_open(ctx.rel->rd_rel->reltoastrelid, + AccessShareLock); + offset = toast_open_indexes(ctx.toast_rel, + AccessShareLock, + &(ctx.toast_indexes), + &(ctx.num_toast_indexes)); + ctx.valid_toast_index = ctx.toast_indexes[offset]; + } + else + { + /* + * Main relation has no associated toast relation, or we're + * intentionally skipping it. + */ + ctx.toast_rel = NULL; + ctx.toast_indexes = NULL; + ctx.num_toast_indexes = 0; + } + + update_cached_xid_range(&ctx); + update_cached_mxid_range(&ctx); + ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid; + ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx); + ctx.relminmxid = ctx.rel->rd_rel->relminmxid; + + if (TransactionIdIsNormal(ctx.relfrozenxid)) + ctx.oldest_xid = ctx.relfrozenxid; + + for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++) + { + OffsetNumber maxoff; + + /* Optionally skip over all-frozen or all-visible blocks */ + if (skip_option != SKIP_PAGES_NONE) + { + int32 mapbits; + + mapbits = (int32) visibilitymap_get_status(ctx.rel, ctx.blkno, + &vmbuffer); + if (skip_option == SKIP_PAGES_ALL_FROZEN) + { + if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0) + continue; + } + + if (skip_option == SKIP_PAGES_ALL_VISIBLE) + { + if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0) + continue; + } + } + + /* Read and lock the next page. */ + ctx.buffer = ReadBufferExtended(ctx.rel, MAIN_FORKNUM, ctx.blkno, + RBM_NORMAL, ctx.bstrategy); + LockBuffer(ctx.buffer, BUFFER_LOCK_SHARE); + ctx.page = BufferGetPage(ctx.buffer); + + /* Perform tuple checks */ + maxoff = PageGetMaxOffsetNumber(ctx.page); + for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff; + ctx.offnum = OffsetNumberNext(ctx.offnum)) + { + ctx.itemid = PageGetItemId(ctx.page, ctx.offnum); + + /* Skip over unused/dead line pointers */ + if (!ItemIdIsUsed(ctx.itemid) || ItemIdIsDead(ctx.itemid)) + continue; + + /* + * If this line pointer has been redirected, check that it + * redirects to a valid offset within the line pointer array. + */ + if (ItemIdIsRedirected(ctx.itemid)) + { + OffsetNumber rdoffnum = ItemIdGetRedirect(ctx.itemid); + ItemId rditem; + + if (rdoffnum < FirstOffsetNumber || rdoffnum > maxoff) + { + report_corruption(&ctx, + psprintf("line pointer redirection to item at offset %u exceeds maximum offset %u", + (unsigned) rdoffnum, + (unsigned) maxoff)); + continue; + } + rditem = PageGetItemId(ctx.page, rdoffnum); + if (!ItemIdIsUsed(rditem)) + report_corruption(&ctx, + psprintf("line pointer redirection to unused item at offset %u", + (unsigned) rdoffnum)); + continue; + } + + /* Set up context information about this next tuple */ + ctx.lp_len = ItemIdGetLength(ctx.itemid); + ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid); + ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr); + + /* Ok, ready to check this next tuple */ + check_tuple(&ctx); + } + + /* clean up */ + UnlockReleaseBuffer(ctx.buffer); + + if (on_error_stop && ctx.is_corrupt) + break; + } + + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + + /* Close the associated toast table and indexes, if any. */ + if (ctx.toast_indexes) + toast_close_indexes(ctx.toast_indexes, ctx.num_toast_indexes, + AccessShareLock); + if (ctx.toast_rel) + table_close(ctx.toast_rel, AccessShareLock); + + /* Close the main relation */ + relation_close(ctx.rel, AccessShareLock); + + PG_RETURN_NULL(); +} + +/* + * Check that a relation's relkind and access method are both supported, + * and that the caller has select privilege on the relation. + */ +static void +sanity_check_relation(Relation rel) +{ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW && + rel->rd_rel->relkind != RELKIND_TOASTVALUE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table, materialized view, or TOAST table", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); +} + +/* + * Record a single corruption found in the table. The values in ctx should + * reflect the location of the corruption, and the msg argument should contain + * a human readable description of the corruption. + * + * The msg argument is pfree'd by this function. + */ +static void +report_corruption(HeapCheckContext *ctx, char *msg) +{ + Datum values[HEAPCHECK_RELATION_COLS]; + bool nulls[HEAPCHECK_RELATION_COLS]; + HeapTuple tuple; + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int64GetDatum(ctx->blkno); + values[1] = Int32GetDatum(ctx->offnum); + values[2] = Int32GetDatum(ctx->attnum); + nulls[2] = (ctx->attnum < 0); + values[3] = CStringGetTextDatum(msg); + + /* + * In principle, there is nothing to prevent a scan over a large, highly + * corrupted table from using work_mem worth of memory building up the + * tuplestore. That's ok, but if we also leak the msg argument memory + * until the end of the query, we could exceed work_mem by more than a + * trivial amount. Therefore, free the msg argument each time we are + * called rather than waiting for our current memory context to be freed. + */ + pfree(msg); + + tuple = heap_form_tuple(ctx->tupdesc, values, nulls); + tuplestore_puttuple(ctx->tupstore, tuple); + ctx->is_corrupt = true; +} + +/* + * Construct the TupleDesc used to report messages about corruptions found + * while scanning the heap. + */ +static TupleDesc +verify_heapam_tupdesc(void) +{ + TupleDesc tupdesc; + AttrNumber a = 0; + + tupdesc = CreateTemplateTupleDesc(HEAPCHECK_RELATION_COLS); + TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, ++a, "offnum", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, ++a, "attnum", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, ++a, "msg", TEXTOID, -1, 0); + Assert(a == HEAPCHECK_RELATION_COLS); + + return BlessTupleDesc(tupdesc); +} + +/* + * Check for tuple header corruption and tuple visibility. + * + * Since we do not hold a snapshot, tuple visibility is not a question of + * whether we should be able to see the tuple relative to any particular + * snapshot, but rather a question of whether it is safe and reasonable to + * to check the tuple attributes. + * + * Some kinds of corruption make it unsafe to check the tuple attributes, for + * example when the line pointer refers to a range of bytes outside the page. + * In such cases, we return false (not visible) after recording appropriate + * corruption messages. + * + * Some other kinds of tuple header corruption confuse the question of where + * the tuple attributes begin, or how long the nulls bitmap is, etc., making it + * unreasonable to attempt to check attributes, even if all candidate answers + * to those questions would not result in reading past the end of the line + * pointer or page. In such cases, like above, we record corruption messages + * about the header and then return false. + * + * Other kinds of tuple header corruption do not bear on the question of + * whether the tuple attributes can be checked, so we record corruption + * messages for them but do not base our visibility determination on them. (In + * other words, we do not return false merely because we detected them.) + * + * For visibility determination not specifically related to corruption, what we + * want to know is if a tuple is potentially visible to any running + * transaction. If you are tempted to replace this function's visibility logic + * with a call to another visibility checking function, keep in mind that this + * function does not update hint bits, as it seems imprudent to write hint bits + * (or anything at all) to a table during a corruption check. Nor does this + * function bother classifying tuple visibility beyond a boolean visible vs. + * not visible. + * + * The caller should already have checked that xmin and xmax are not out of + * bounds for the relation. + * + * Returns whether the tuple is both visible and sufficiently sensible to + * undergo attribute checks. + */ +static bool +check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) +{ + uint16 infomask = tuphdr->t_infomask; + bool header_garbled = false; + unsigned expected_hoff;; + + if (ctx->tuphdr->t_hoff > ctx->lp_len) + { + report_corruption(ctx, + psprintf("data begins at offset %u beyond the tuple length %u", + ctx->tuphdr->t_hoff, ctx->lp_len)); + header_garbled = true; + } + if ((ctx->tuphdr->t_infomask & HEAP_XMAX_LOCK_ONLY) && + (ctx->tuphdr->t_infomask2 & HEAP_KEYS_UPDATED)) + { + report_corruption(ctx, + pstrdup("tuple is marked as only locked, but also claims key columns were updated")); + header_garbled = true; + } + + if ((ctx->tuphdr->t_infomask & HEAP_XMAX_COMMITTED) && + (ctx->tuphdr->t_infomask & HEAP_XMAX_IS_MULTI)) + { + report_corruption(ctx, + pstrdup("multixact should not be marked committed")); + + /* + * This condition is clearly wrong, but we do not consider the header + * garbled, because we don't rely on this property for determining if + * the tuple is visible or for interpreting other relevant header + * fields. + */ + } + + if (infomask & HEAP_HASNULL) + expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts)); + else + expected_hoff = MAXALIGN(SizeofHeapTupleHeader); + if (ctx->tuphdr->t_hoff != expected_hoff) + { + if ((infomask & HEAP_HASNULL) && ctx->natts == 1) + report_corruption(ctx, + psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, has nulls)", + expected_hoff, ctx->tuphdr->t_hoff)); + else if ((infomask & HEAP_HASNULL)) + report_corruption(ctx, + psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, has nulls)", + expected_hoff, ctx->tuphdr->t_hoff, ctx->natts)); + else if (ctx->natts == 1) + report_corruption(ctx, + psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, no nulls)", + expected_hoff, ctx->tuphdr->t_hoff)); + else + report_corruption(ctx, + psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, no nulls)", + expected_hoff, ctx->tuphdr->t_hoff, ctx->natts)); + header_garbled = true; + } + + if (header_garbled) + return false; /* checking of this tuple should not continue */ + + /* + * Ok, we can examine the header for tuple visibility purposes, though we + * still need to be careful about a few remaining types of header + * corruption. This logic roughly follows that of + * HeapTupleSatisfiesVacuum. Where possible the comments indicate which + * HTSV_Result we think that function might return for this tuple. + */ + if (!HeapTupleHeaderXminCommitted(tuphdr)) + { + TransactionId raw_xmin = HeapTupleHeaderGetRawXmin(tuphdr); + + if (HeapTupleHeaderXminInvalid(tuphdr)) + return false; /* HEAPTUPLE_DEAD */ + /* Used by pre-9.0 binary upgrades */ + else if (infomask & HEAP_MOVED_OFF || + infomask & HEAP_MOVED_IN) + { + XidCommitStatus status; + TransactionId xvac = HeapTupleHeaderGetXvac(tuphdr); + + switch (get_xid_status(xvac, ctx, &status)) + { + case XID_INVALID: + report_corruption(ctx, + pstrdup("old-style VACUUM FULL transaction ID is invalid")); + return false; /* corrupt */ + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("old-style VACUUM FULL transaction ID %u equals or exceeds next valid transaction ID %u:%u", + xvac, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + return false; /* corrupt */ + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("old-style VACUUM FULL transaction ID %u precedes relation freeze threshold %u:%u", + xvac, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + return false; /* corrupt */ + break; + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("old-style VACUUM FULL transaction ID %u precedes oldest valid transaction ID %u:%u", + xvac, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + return false; /* corrupt */ + break; + case XID_BOUNDS_OK: + switch (status) + { + case XID_IN_PROGRESS: + return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ + case XID_COMMITTED: + case XID_ABORTED: + return false; /* HEAPTUPLE_DEAD */ + } + } + } + else + { + XidCommitStatus status; + + switch (get_xid_status(raw_xmin, ctx, &status)) + { + case XID_INVALID: + report_corruption(ctx, + pstrdup("raw xmin is invalid")); + return false; + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("raw xmin %u equals or exceeds next valid transaction ID %u:%u", + raw_xmin, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + return false; /* corrupt */ + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("raw xmin %u precedes relation freeze threshold %u:%u", + raw_xmin, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + return false; /* corrupt */ + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("raw xmin %u precedes oldest valid transaction ID %u:%u", + raw_xmin, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + return false; /* corrupt */ + case XID_BOUNDS_OK: + switch (status) + { + case XID_COMMITTED: + break; + case XID_IN_PROGRESS: + return true; /* insert or delete in progress */ + case XID_ABORTED: + return false; /* HEAPTUPLE_DEAD */ + } + } + } + } + + if (!(infomask & HEAP_XMAX_INVALID) && !HEAP_XMAX_IS_LOCKED_ONLY(infomask)) + { + if (infomask & HEAP_XMAX_IS_MULTI) + { + XidCommitStatus status; + TransactionId xmax = HeapTupleGetUpdateXid(tuphdr); + + switch (get_xid_status(xmax, ctx, &status)) + { + /* not LOCKED_ONLY, so it has to have an xmax */ + case XID_INVALID: + report_corruption(ctx, + pstrdup("xmax is invalid")); + return false; /* corrupt */ + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + return false; /* corrupt */ + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("xmax %u precedes relation freeze threshold %u:%u", + xmax, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + return false; /* corrupt */ + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("xmax %u precedes oldest valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + return false; /* corrupt */ + case XID_BOUNDS_OK: + switch (status) + { + case XID_IN_PROGRESS: + return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ + case XID_COMMITTED: + case XID_ABORTED: + return false; /* HEAPTUPLE_RECENTLY_DEAD or + * HEAPTUPLE_DEAD */ + } + } + + /* Ok, the tuple is live */ + } + else if (!(infomask & HEAP_XMAX_COMMITTED)) + return true; /* HEAPTUPLE_DELETE_IN_PROGRESS or + * HEAPTUPLE_LIVE */ + else + return false; /* HEAPTUPLE_RECENTLY_DEAD or HEAPTUPLE_DEAD */ + } + return true; /* not dead */ +} + +/* + * Check the current toast tuple against the state tracked in ctx, recording + * any corruption found in ctx->tupstore. + * + * This is not equivalent to running verify_heapam on the toast table itself, + * and is not hardened against corruption of the toast table. Rather, when + * validating a toasted attribute in the main table, the sequence of toast + * tuples that store the toasted value are retrieved and checked in order, with + * each toast tuple being checked against where we are in the sequence, as well + * as each toast tuple having its varlena structure sanity checked. + */ +static void +check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx) +{ + int32 curchunk; + Pointer chunk; + bool isnull; + int32 chunksize; + int32 expected_size; + + /* + * Have a chunk, extract the sequence number and the data + */ + curchunk = DatumGetInt32(fastgetattr(toasttup, 2, + ctx->toast_rel->rd_att, &isnull)); + if (isnull) + { + report_corruption(ctx, + pstrdup("toast chunk sequence number is null")); + return; + } + chunk = DatumGetPointer(fastgetattr(toasttup, 3, + ctx->toast_rel->rd_att, &isnull)); + if (isnull) + { + report_corruption(ctx, + pstrdup("toast chunk data is null")); + return; + } + if (!VARATT_IS_EXTENDED(chunk)) + chunksize = VARSIZE(chunk) - VARHDRSZ; + else if (VARATT_IS_SHORT(chunk)) + { + /* + * could happen due to heap_form_tuple doing its thing + */ + chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; + } + else + { + /* should never happen */ + uint32 header = ((varattrib_4b *) chunk)->va_4byte.va_header; + + report_corruption(ctx, + psprintf("corrupt extended toast chunk has invalid varlena header: %0x (sequence number %d)", + header, curchunk)); + return; + } + + /* + * Some checks on the data we've found + */ + if (curchunk != ctx->chunkno) + { + report_corruption(ctx, + psprintf("toast chunk sequence number %u does not match the expected sequence number %u", + curchunk, ctx->chunkno)); + return; + } + if (curchunk > ctx->endchunk) + { + report_corruption(ctx, + psprintf("toast chunk sequence number %u exceeds the end chunk sequence number %u", + curchunk, ctx->endchunk)); + return; + } + + expected_size = curchunk < ctx->totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE + : ctx->attrsize - ((ctx->totalchunks - 1) * TOAST_MAX_CHUNK_SIZE); + if (chunksize != expected_size) + { + report_corruption(ctx, + psprintf("toast chunk size %u differs from the expected size %u", + chunksize, expected_size)); + return; + } +} + +/* + * Check the current attribute as tracked in ctx, recording any corruption + * found in ctx->tupstore. + * + * This function follows the logic performed by heap_deform_tuple(), and in the + * case of a toasted value, optionally continues along the logic of + * detoast_external_attr(), checking for any conditions that would result in + * either of those functions Asserting or crashing the backend. The checks + * performed by Asserts present in those two functions are also performed here. + * In cases where those two functions are a bit cavalier in their assumptions + * about data being correct, we perform additional checks not present in either + * of those two functions. Where some condition is checked in both of those + * functions, we perform it here twice, as we parallel the logical flow of + * those two functions. The presence of duplicate checks seems a reasonable + * price to pay for keeping this code tightly coupled with the code it + * protects. + * + * Returns true if the tuple attribute is sane enough for processing to + * continue on to the next attribute, false otherwise. + */ +static bool +check_tuple_attribute(HeapCheckContext *ctx) +{ + struct varatt_external toast_pointer; + ScanKeyData toastkey; + SysScanDesc toastscan; + SnapshotData SnapshotToast; + HeapTuple toasttup; + bool found_toasttup; + Datum attdatum; + struct varlena *attr; + char *tp; /* pointer to the tuple data */ + uint16 infomask; + Form_pg_attribute thisatt; + + infomask = ctx->tuphdr->t_infomask; + thisatt = TupleDescAttr(RelationGetDescr(ctx->rel), ctx->attnum); + + tp = (char *) ctx->tuphdr + ctx->tuphdr->t_hoff; + + if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len) + { + report_corruption(ctx, + psprintf("attribute %u with length %u starts at offset %u beyond total tuple length %u", + ctx->attnum, + thisatt->attlen, + ctx->tuphdr->t_hoff + ctx->offset, + ctx->lp_len)); + return false; + } + + /* Skip null values */ + if (infomask & HEAP_HASNULL && att_isnull(ctx->attnum, ctx->tuphdr->t_bits)) + return true; + + /* Skip non-varlena values, but update offset first */ + if (thisatt->attlen != -1) + { + ctx->offset = att_align_nominal(ctx->offset, thisatt->attalign); + ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen, + tp + ctx->offset); + if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len) + { + report_corruption(ctx, + psprintf("attribute %u with length %u ends at offset %u beyond total tuple length %u", + ctx->attnum, + thisatt->attlen, + ctx->tuphdr->t_hoff + ctx->offset, + ctx->lp_len)); + return false; + } + return true; + } + + /* Ok, we're looking at a varlena attribute. */ + ctx->offset = att_align_pointer(ctx->offset, thisatt->attalign, -1, + tp + ctx->offset); + + /* Get the (possibly corrupt) varlena datum */ + attdatum = fetchatt(thisatt, tp + ctx->offset); + + /* + * We have the datum, but we cannot decode it carelessly, as it may still + * be corrupt. + */ + + /* + * Check that VARTAG_SIZE won't hit a TrapMacro on a corrupt va_tag before + * risking a call into att_addlength_pointer + */ + if (VARATT_IS_EXTERNAL(tp + ctx->offset)) + { + uint8 va_tag = VARTAG_EXTERNAL(tp + ctx->offset); + + if (va_tag != VARTAG_ONDISK) + { + report_corruption(ctx, + psprintf("toasted attribute %u has unexpected TOAST tag %u", + ctx->attnum, + va_tag)); + /* We can't know where the next attribute begins */ + return false; + } + } + + /* Ok, should be safe now */ + ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen, + tp + ctx->offset); + + if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len) + { + report_corruption(ctx, + psprintf("attribute %u with length %u ends at offset %u beyond total tuple length %u", + ctx->attnum, + thisatt->attlen, + ctx->tuphdr->t_hoff + ctx->offset, + ctx->lp_len)); + + return false; + } + + /* + * heap_deform_tuple would be done with this attribute at this point, + * having stored it in values[], and would continue to the next attribute. + * We go further, because we need to check if the toast datum is corrupt. + */ + + attr = (struct varlena *) DatumGetPointer(attdatum); + + /* + * Now we follow the logic of detoast_external_attr(), with the same + * caveats about being paranoid about corruption. + */ + + /* Skip values that are not external */ + if (!VARATT_IS_EXTERNAL(attr)) + return true; + + /* It is external, and we're looking at a page on disk */ + + /* The tuple header better claim to contain toasted values */ + if (!(infomask & HEAP_HASEXTERNAL)) + { + report_corruption(ctx, + psprintf("attribute %u is external but tuple header flag HEAP_HASEXTERNAL not set", + ctx->attnum)); + return true; + } + + /* The relation better have a toast table */ + if (!ctx->rel->rd_rel->reltoastrelid) + { + report_corruption(ctx, + psprintf("attribute %u is external but relation has no toast relation", + ctx->attnum)); + return true; + } + + /* If we were told to skip toast checking, then we're done. */ + if (ctx->toast_rel == NULL) + return true; + + /* + * Must copy attr into toast_pointer for alignment considerations + */ + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + ctx->attrsize = toast_pointer.va_extsize; + ctx->endchunk = (ctx->attrsize - 1) / TOAST_MAX_CHUNK_SIZE; + ctx->totalchunks = ctx->endchunk + 1; + + /* + * Setup a scan key to find chunks in toast table with matching va_valueid + */ + ScanKeyInit(&toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(toast_pointer.va_valueid)); + + /* + * Check if any chunks for this toasted object exist in the toast table, + * accessible via the index. + */ + init_toast_snapshot(&SnapshotToast); + toastscan = systable_beginscan_ordered(ctx->toast_rel, + ctx->valid_toast_index, + &SnapshotToast, 1, + &toastkey); + ctx->chunkno = 0; + found_toasttup = false; + while ((toasttup = + systable_getnext_ordered(toastscan, + ForwardScanDirection)) != NULL) + { + found_toasttup = true; + check_toast_tuple(toasttup, ctx); + ctx->chunkno++; + } + if (ctx->chunkno != (ctx->endchunk + 1)) + report_corruption(ctx, + psprintf("final toast chunk number %u differs from expected value %u", + ctx->chunkno, (ctx->endchunk + 1))); + if (!found_toasttup) + report_corruption(ctx, + psprintf("toasted value for attribute %u missing from toast table", + ctx->attnum)); + systable_endscan_ordered(toastscan); + + return true; +} + +/* + * Check the current tuple as tracked in ctx, recording any corruption found in + * ctx->tupstore. + */ +static void +check_tuple(HeapCheckContext *ctx) +{ + TransactionId xmin; + TransactionId xmax; + bool fatal = false; + uint16 infomask = ctx->tuphdr->t_infomask; + + /* + * If we report corruption before iterating over individual attributes, we + * need attnum to be reported as NULL. Set that up before any corruption + * reporting might happen. + */ + ctx->attnum = -1; + + /* + * If the line pointer for this tuple does not reserve enough space for a + * complete tuple header, we dare not read the tuple header. + */ + if (ctx->lp_len < MAXALIGN(SizeofHeapTupleHeader)) + { + report_corruption(ctx, + psprintf("line pointer length %u is less than the minimum tuple header size %u", + ctx->lp_len, (uint32) MAXALIGN(SizeofHeapTupleHeader))); + return; + } + + /* If xmin is normal, it should be within valid range */ + xmin = HeapTupleHeaderGetXmin(ctx->tuphdr); + switch (get_xid_status(xmin, ctx, NULL)) + { + case XID_INVALID: + case XID_BOUNDS_OK: + break; + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("xmin %u equals or exceeds next valid transaction ID %u:%u", + xmin, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + fatal = true; + break; + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("xmin %u precedes oldest valid transaction ID %u:%u", + xmin, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + fatal = true; + break; + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("xmin %u precedes relation freeze threshold %u:%u", + xmin, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + fatal = true; + break; + } + + xmax = HeapTupleHeaderGetRawXmax(ctx->tuphdr); + + if (infomask & HEAP_XMAX_IS_MULTI) + { + /* xmax is a multixact, so it should be within valid MXID range */ + switch (check_mxid_valid_in_rel(xmax, ctx)) + { + case XID_INVALID: + report_corruption(ctx, + pstrdup("multitransaction ID is invalid")); + fatal = true; + break; + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("multitransaction ID %u precedes relation minimum multitransaction ID threshold %u", + xmax, ctx->relminmxid)); + fatal = true; + break; + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("multitransaction ID %u precedes oldest valid multitransaction ID threshold %u", + xmax, ctx->oldest_mxact)); + fatal = true; + break; + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("multitransaction ID %u equals or exceeds next valid multitransaction ID %u", + xmax, + ctx->next_mxact)); + fatal = true; + break; + case XID_BOUNDS_OK: + break; + } + } + else + { + /* + * xmax is not a multixact and is normal, so it should be within the + * valid XID range. + */ + switch (get_xid_status(xmax, ctx, NULL)) + { + case XID_INVALID: + case XID_BOUNDS_OK: + break; + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + fatal = true; + break; + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("xmax %u precedes oldest valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + fatal = true; + break; + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("xmax %u precedes relation freeze threshold %u:%u", + xmax, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + fatal = true; + } + } + + /* + * Cannot process tuple data if tuple header was corrupt, as the offsets + * within the page cannot be trusted, leaving too much risk of reading + * garbage if we continue. + * + * We also cannot process the tuple if the xmin or xmax were invalid + * relative to relfrozenxid or relminmxid, as clog entries for the xids + * may already be gone. + */ + if (fatal) + return; + + /* + * Check various forms of tuple header corruption. If the header is too + * corrupt to continue checking, or if the tuple is not visible to anyone, + * we cannot continue with other checks. + */ + if (!check_tuple_header_and_visibilty(ctx->tuphdr, ctx)) + return; + + /* + * The tuple is visible, so it must be compatible with the current version + * of the relation descriptor. It might have fewer columns than are + * present in the relation descriptor, but it cannot have more. + */ + if (RelationGetDescr(ctx->rel)->natts < ctx->natts) + { + report_corruption(ctx, + psprintf("number of attributes %u exceeds maximum expected for table %u", + ctx->natts, + RelationGetDescr(ctx->rel)->natts)); + return; + } + + /* + * Check each attribute unless we hit corruption that confuses what to do + * next, at which point we abort further attribute checks for this tuple. + * Note that we don't abort for all types of corruption, only for those + * types where we don't know how to continue. + */ + ctx->offset = 0; + for (ctx->attnum = 0; ctx->attnum < ctx->natts; ctx->attnum++) + if (!check_tuple_attribute(ctx)) + break; /* cannot continue */ +} + +/* + * Convert a TransactionId into a FullTransactionId using our cached values of + * the valid transaction ID range. It is the caller's responsibility to have + * already updated the cached values, if necessary. + */ +static FullTransactionId +FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) +{ + uint32 epoch; + + if (!TransactionIdIsNormal(xid)) + return FullTransactionIdFromEpochAndXid(0, xid); + epoch = EpochFromFullTransactionId(ctx->next_fxid); + if (xid > ctx->next_xid) + epoch--; + return FullTransactionIdFromEpochAndXid(epoch, xid); +} + +/* + * Update our cached range of valid transaction IDs. + */ +static void +update_cached_xid_range(HeapCheckContext *ctx) +{ + /* Make cached copies */ + LWLockAcquire(XidGenLock, LW_SHARED); + ctx->next_fxid = ShmemVariableCache->nextXid; + ctx->oldest_xid = ShmemVariableCache->oldestXid; + LWLockRelease(XidGenLock); + + /* And compute alternate versions of the same */ + ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx); + ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid); +} + +/* + * Update our cached range of valid multitransaction IDs. + */ +static void +update_cached_mxid_range(HeapCheckContext *ctx) +{ + ReadMultiXactIdRange(&ctx->oldest_mxact, &ctx->next_mxact); +} + +/* + * Return whether the given FullTransactionId is within our cached valid + * transaction ID range. + */ +static inline bool +fxid_in_cached_range(FullTransactionId fxid, const HeapCheckContext *ctx) +{ + return (FullTransactionIdPrecedesOrEquals(ctx->oldest_fxid, fxid) && + FullTransactionIdPrecedes(fxid, ctx->next_fxid)); +} + +/* + * Checks wheter a multitransaction ID is in the cached valid range, returning + * the nature of the range violation, if any. + */ +static XidBoundsViolation +check_mxid_in_range(MultiXactId mxid, HeapCheckContext *ctx) +{ + if (!TransactionIdIsValid(mxid)) + return XID_INVALID; + if (MultiXactIdPrecedes(mxid, ctx->relminmxid)) + return XID_PRECEDES_RELMIN; + if (MultiXactIdPrecedes(mxid, ctx->oldest_mxact)) + return XID_PRECEDES_CLUSTERMIN; + if (MultiXactIdPrecedesOrEquals(ctx->next_mxact, mxid)) + return XID_IN_FUTURE; + return XID_BOUNDS_OK; +} + +/* + * Checks whether the given mxid is valid to appear in the heap being checked, + * returning the nature of the range violation, if any. + * + * This function attempts to return quickly by caching the known valid mxid + * range in ctx. Callers should already have performed the initial setup of + * the cache prior to the first call to this function. + */ +static XidBoundsViolation +check_mxid_valid_in_rel(MultiXactId mxid, HeapCheckContext *ctx) +{ + XidBoundsViolation result; + + result = check_mxid_in_range(mxid, ctx); + if (result == XID_BOUNDS_OK) + return XID_BOUNDS_OK; + + /* The range may have advanced. Recheck. */ + update_cached_mxid_range(ctx); + return check_mxid_in_range(mxid, ctx); +} + +/* + * Checks whether the given transaction ID is (or was recently) valid to appear + * in the heap being checked, or whether it is too old or too new to appear in + * the relation, returning information about the nature of the bounds violation. + * + * We cache the range of valid transaction IDs. If xid is in that range, we + * conclude that it is valid, even though concurrent changes to the table might + * invalidate it under certain corrupt conditions. (For example, if the table + * contains corrupt all-frozen bits, a concurrent vacuum might skip the page(s) + * containing the xid and then truncate clog and advance the relfrozenxid + * beyond xid.) Reporting the xid as valid under such conditions seems + * acceptable, since if we had checked it earlier in our scan it would have + * truly been valid at that time. + * + * If the status argument is not NULL, and if and only if the transaction ID + * appears to be valid in this relation, clog will be consulted and the commit + * status argument will be set with the status of the transaction ID. + */ +static XidBoundsViolation +get_xid_status(TransactionId xid, HeapCheckContext *ctx, + XidCommitStatus *status) +{ + XidBoundsViolation result; + FullTransactionId fxid; + FullTransactionId clog_horizon; + + /* Quick check for special xids */ + if (!TransactionIdIsValid(xid)) + result = XID_INVALID; + else if (xid == BootstrapTransactionId || xid == FrozenTransactionId) + result = XID_BOUNDS_OK; + else + { + /* Check if the xid is within bounds */ + fxid = FullTransactionIdFromXidAndCtx(xid, ctx); + if (!fxid_in_cached_range(fxid, ctx)) + { + /* + * We may have been checking against stale values. Update the + * cached range to be sure, and since we relied on the cached + * range when we performed the full xid conversion, reconvert. + */ + update_cached_xid_range(ctx); + fxid = FullTransactionIdFromXidAndCtx(xid, ctx); + } + + if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid)) + result = XID_IN_FUTURE; + else if (FullTransactionIdPrecedes(fxid, ctx->oldest_fxid)) + result = XID_PRECEDES_CLUSTERMIN; + else if (FullTransactionIdPrecedes(fxid, ctx->relfrozenfxid)) + result = XID_PRECEDES_RELMIN; + else + result = XID_BOUNDS_OK; + } + + /* + * Early return if the caller does not request clog checking, or if the + * xid is already known to be out of bounds. We dare not check clog for + * out of bounds transaction IDs. + */ + if (status == NULL || result != XID_BOUNDS_OK) + return result; + + /* Early return if we just checked this xid in a prior call */ + if (xid == ctx->cached_xid) + { + *status = ctx->cached_status; + return result; + } + + *status = XID_COMMITTED; + LWLockAcquire(XactTruncationLock, LW_SHARED); + clog_horizon = + FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid, + ctx); + if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid)) + { + if (TransactionIdIsCurrentTransactionId(xid)) + *status = XID_IN_PROGRESS; + else if (TransactionIdDidCommit(xid)) + *status = XID_COMMITTED; + else if (TransactionIdDidAbort(xid)) + *status = XID_ABORTED; + else + *status = XID_IN_PROGRESS; + } + LWLockRelease(XactTruncationLock); + ctx->cached_xid = xid; + ctx->cached_status = *status; + return result; +} |