diff options
author | Michael Paquier <michael@paquier.xyz> | 2020-11-04 10:21:46 +0900 |
---|---|---|
committer | Michael Paquier <michael@paquier.xyz> | 2020-11-04 10:21:46 +0900 |
commit | e152506adef4bc503ea7b8ebb4fedc0b8eebda81 (patch) | |
tree | 550e61c934bb4e562ec5b946baed8a5ab03d8fc5 /src/backend | |
parent | 90851d1d26f54ccb4d7b1bc49449138113d6ec83 (diff) | |
download | postgresql-e152506adef4bc503ea7b8ebb4fedc0b8eebda81.tar.gz postgresql-e152506adef4bc503ea7b8ebb4fedc0b8eebda81.zip |
Revert pg_relation_check_pages()
This reverts the following set of commits, following complaints about
the lack of portability of the central part of the code in bufmgr.c as
well as the use of partition mapping locks during page reads:
c780a7a9
f2b88396
b787d4ce
ce7f772c
60a51c6b
Per discussion with Andres Freund, Robert Haas and myself.
Bump catalog version.
Discussion: https://postgr.es/m/20201029181729.2nrub47u7yqncsv7@alap3.anarazel.de
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/catalog/system_views.sql | 9 | ||||
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 92 | ||||
-rw-r--r-- | src/backend/utils/adt/Makefile | 1 | ||||
-rw-r--r-- | src/backend/utils/adt/pagefuncs.c | 229 |
4 files changed, 0 insertions, 331 deletions
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 5171ea05c7e..2e4aa1c4b66 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1303,14 +1303,6 @@ LANGUAGE INTERNAL STRICT VOLATILE AS 'pg_create_logical_replication_slot'; -CREATE OR REPLACE FUNCTION pg_relation_check_pages( - IN relation regclass, IN fork text DEFAULT NULL, - OUT path text, OUT failed_block_num bigint) -RETURNS SETOF record -LANGUAGE internal -VOLATILE PARALLEL RESTRICTED -AS 'pg_relation_check_pages'; - CREATE OR REPLACE FUNCTION make_interval(years int4 DEFAULT 0, months int4 DEFAULT 0, weeks int4 DEFAULT 0, days int4 DEFAULT 0, hours int4 DEFAULT 0, mins int4 DEFAULT 0, @@ -1455,7 +1447,6 @@ AS 'unicode_is_normalized'; -- can later change who can access these functions, or leave them as only -- available to superuser / cluster owner, if they choose. -- -REVOKE EXECUTE ON FUNCTION pg_relation_check_pages(regclass, text) FROM public; REVOKE EXECUTE ON FUNCTION pg_start_backup(text, boolean, boolean) FROM public; REVOKE EXECUTE ON FUNCTION pg_stop_backup() FROM public; REVOKE EXECUTE ON FUNCTION pg_stop_backup(boolean, boolean) FROM public; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 0adf04814cd..ad0d1a9abc0 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4585,95 +4585,3 @@ TestForOldSnapshot_impl(Snapshot snapshot, Relation relation) (errcode(ERRCODE_SNAPSHOT_TOO_OLD), errmsg("snapshot too old"))); } - - -/* - * CheckBuffer - * - * Check the state of a buffer without loading it into the shared buffers. To - * avoid torn pages and possible false positives when reading data, a shared - * LWLock is taken on the target buffer pool partition mapping, and we check - * if the page is in shared buffers or not. An I/O lock is taken on the block - * to prevent any concurrent activity from happening. - * - * If the page is found as dirty in the shared buffers, it is ignored as - * it will be flushed to disk either before the end of the next checkpoint - * or during recovery in the event of an unsafe shutdown. - * - * If the page is found in the shared buffers but is not dirty, we still - * check the state of its data on disk, as it could be possible that the - * page stayed in shared buffers for a rather long time while the on-disk - * data got corrupted. - * - * If the page is not found in shared buffers, the block is read from disk - * while holding the buffer pool partition mapping LWLock. - * - * The page data is stored in a private memory area local to this function - * while running the checks. - */ -bool -CheckBuffer(SMgrRelation smgr, ForkNumber forknum, BlockNumber blkno) -{ - char buffer[BLCKSZ]; - BufferTag buf_tag; /* identity of requested block */ - uint32 buf_hash; /* hash value for buf_tag */ - LWLock *partLock; /* buffer partition lock for the buffer */ - BufferDesc *bufdesc; - int buf_id; - - Assert(smgrexists(smgr, forknum)); - - /* create a tag so we can look after the buffer */ - INIT_BUFFERTAG(buf_tag, smgr->smgr_rnode.node, forknum, blkno); - - /* determine its hash code and partition lock ID */ - buf_hash = BufTableHashCode(&buf_tag); - partLock = BufMappingPartitionLock(buf_hash); - - /* see if the block is in the buffer pool or not */ - LWLockAcquire(partLock, LW_SHARED); - buf_id = BufTableLookup(&buf_tag, buf_hash); - if (buf_id >= 0) - { - uint32 buf_state; - - /* - * Found it. Now, retrieve its state to know what to do with it, and - * release the pin immediately. We do so to limit overhead as much as - * possible. We keep the shared LWLock on the target buffer mapping - * partition for now, so this buffer cannot be evicted, and we acquire - * an I/O Lock on the buffer as we may need to read its contents from - * disk. - */ - bufdesc = GetBufferDescriptor(buf_id); - - LWLockAcquire(BufferDescriptorGetIOLock(bufdesc), LW_SHARED); - buf_state = LockBufHdr(bufdesc); - UnlockBufHdr(bufdesc, buf_state); - - /* If the page is dirty or invalid, skip it */ - if ((buf_state & BM_DIRTY) != 0 || (buf_state & BM_TAG_VALID) == 0) - { - LWLockRelease(BufferDescriptorGetIOLock(bufdesc)); - LWLockRelease(partLock); - return true; - } - - /* Read the buffer from disk, with the I/O lock still held */ - smgrread(smgr, forknum, blkno, buffer); - LWLockRelease(BufferDescriptorGetIOLock(bufdesc)); - } - else - { - /* - * Simply read the buffer. There's no risk of modification on it as - * we are holding the buffer pool partition mapping lock. - */ - smgrread(smgr, forknum, blkno, buffer); - } - - /* buffer lookup done, so now do its check */ - LWLockRelease(partLock); - - return PageIsVerifiedExtended(buffer, blkno, PIV_REPORT_STAT); -} diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index e2279af1e52..b4d55e849b3 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -69,7 +69,6 @@ OBJS = \ oid.o \ oracle_compat.o \ orderedsetaggs.o \ - pagefuncs.o \ partitionfuncs.o \ pg_locale.o \ pg_lsn.o \ diff --git a/src/backend/utils/adt/pagefuncs.c b/src/backend/utils/adt/pagefuncs.c deleted file mode 100644 index 368ada515cf..00000000000 --- a/src/backend/utils/adt/pagefuncs.c +++ /dev/null @@ -1,229 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pagefuncs.c - * Functions for features related to relation pages. - * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/backend/utils/adt/pagefuncs.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "access/relation.h" -#include "funcapi.h" -#include "miscadmin.h" -#include "storage/bufmgr.h" -#include "storage/lmgr.h" -#include "storage/smgr.h" -#include "utils/builtins.h" -#include "utils/syscache.h" - -static void check_one_relation(TupleDesc tupdesc, Tuplestorestate *tupstore, - Oid relid, ForkNumber single_forknum); -static void check_relation_fork(TupleDesc tupdesc, Tuplestorestate *tupstore, - Relation relation, ForkNumber forknum); - -/* - * callback arguments for check_pages_error_callback() - */ -typedef struct CheckPagesErrorInfo -{ - char *path; - BlockNumber blkno; -} CheckPagesErrorInfo; - -/* - * Error callback specific to check_relation_fork(). - */ -static void -check_pages_error_callback(void *arg) -{ - CheckPagesErrorInfo *errinfo = (CheckPagesErrorInfo *) arg; - - errcontext("while checking page %u of path %s", - errinfo->blkno, errinfo->path); -} - -/* - * pg_relation_check_pages - * - * Check the state of all the pages for one or more fork types in the given - * relation. - */ -Datum -pg_relation_check_pages(PG_FUNCTION_ARGS) -{ - Oid relid; - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; - TupleDesc tupdesc; - Tuplestorestate *tupstore; - MemoryContext per_query_ctx; - MemoryContext oldcontext; - ForkNumber forknum; - - /* Switch into long-lived context to construct returned data structures */ - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; - oldcontext = MemoryContextSwitchTo(per_query_ctx); - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - elog(ERROR, "return type must be a row type"); - - tupstore = tuplestore_begin_heap(true, false, work_mem); - rsinfo->returnMode = SFRM_Materialize; - rsinfo->setResult = tupstore; - rsinfo->setDesc = tupdesc; - - MemoryContextSwitchTo(oldcontext); - - /* handle arguments */ - if (PG_ARGISNULL(0)) - { - /* Just leave if nothing is defined */ - PG_RETURN_VOID(); - } - - /* By default all the forks of a relation are checked */ - if (PG_ARGISNULL(1)) - forknum = InvalidForkNumber; - else - { - const char *forkname = TextDatumGetCString(PG_GETARG_TEXT_PP(1)); - - forknum = forkname_to_number(forkname); - } - - relid = PG_GETARG_OID(0); - - check_one_relation(tupdesc, tupstore, relid, forknum); - tuplestore_donestoring(tupstore); - - return (Datum) 0; -} - -/* - * Perform the check on a single relation, possibly filtered with a single - * fork. This function will check if the given relation exists or not, as - * a relation could be dropped after checking for the list of relations and - * before getting here, and we don't want to error out in this case. - */ -static void -check_one_relation(TupleDesc tupdesc, Tuplestorestate *tupstore, - Oid relid, ForkNumber single_forknum) -{ - Relation relation; - ForkNumber forknum; - - /* Check if relation exists. leaving if there is no such relation */ - if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid))) - return; - - relation = relation_open(relid, AccessShareLock); - - /* - * Sanity checks, returning no results if not supported. Temporary - * relations and relations without storage are out of scope. - */ - if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind) || - relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP) - { - relation_close(relation, AccessShareLock); - return; - } - - RelationOpenSmgr(relation); - - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - { - if (single_forknum != InvalidForkNumber && single_forknum != forknum) - continue; - - if (smgrexists(relation->rd_smgr, forknum)) - check_relation_fork(tupdesc, tupstore, relation, forknum); - } - - relation_close(relation, AccessShareLock); -} - -/* - * For a given relation and fork, do the real work of iterating over all pages - * and doing the check. Caller must hold an AccessShareLock lock on the given - * relation. - */ -static void -check_relation_fork(TupleDesc tupdesc, Tuplestorestate *tupstore, - Relation relation, ForkNumber forknum) -{ - BlockNumber blkno, - nblocks; - SMgrRelation smgr = relation->rd_smgr; - char *path; - CheckPagesErrorInfo errinfo; - ErrorContextCallback errcallback; - - /* Number of output arguments in the SRF */ -#define PG_CHECK_RELATION_COLS 2 - - Assert(CheckRelationLockedByMe(relation, AccessShareLock, true)); - - /* - * We remember the number of blocks here. Since caller must hold a lock - * on the relation, we know that it won't be truncated while we are - * iterating over the blocks. Any block added after this function started - * will not be checked. - */ - nblocks = RelationGetNumberOfBlocksInFork(relation, forknum); - - path = relpathbackend(smgr->smgr_rnode.node, - smgr->smgr_rnode.backend, - forknum); - - /* - * Error context to print some information about blocks and relations - * impacted by corruptions. - */ - errinfo.path = pstrdup(path); - errinfo.blkno = 0; - errcallback.callback = check_pages_error_callback; - errcallback.arg = (void *) &errinfo; - errcallback.previous = error_context_stack; - error_context_stack = &errcallback; - - for (blkno = 0; blkno < nblocks; blkno++) - { - Datum values[PG_CHECK_RELATION_COLS]; - bool nulls[PG_CHECK_RELATION_COLS]; - int i = 0; - - /* Update block number for the error context */ - errinfo.blkno = blkno; - - CHECK_FOR_INTERRUPTS(); - - /* Check the given buffer */ - if (CheckBuffer(smgr, forknum, blkno)) - continue; - - memset(values, 0, sizeof(values)); - memset(nulls, 0, sizeof(nulls)); - - values[i++] = CStringGetTextDatum(path); - values[i++] = Int64GetDatum((int64) blkno); - - Assert(i == PG_CHECK_RELATION_COLS); - - /* Save the corrupted blocks in the tuplestore. */ - tuplestore_putvalues(tupstore, tupdesc, values, nulls); - } - - pfree(path); - - /* Pop the error context stack */ - error_context_stack = errcallback.previous; -} |