diff options
73 files changed, 305 insertions, 720 deletions
diff --git a/contrib/pg_buffercache/Makefile b/contrib/pg_buffercache/Makefile index 4d88eba5e3a..d74b3e853c6 100644 --- a/contrib/pg_buffercache/Makefile +++ b/contrib/pg_buffercache/Makefile @@ -6,8 +6,8 @@ OBJS = \ pg_buffercache_pages.o EXTENSION = pg_buffercache -DATA = pg_buffercache--1.0--1.1.sql pg_buffercache--1.1--1.2.sql pg_buffercache--1.2.sql \ - pg_buffercache--1.2--1.3.sql pg_buffercache--1.3--1.4.sql +DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \ + pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time" REGRESS = pg_buffercache diff --git a/contrib/pg_buffercache/meson.build b/contrib/pg_buffercache/meson.build index 9f34754d6c3..2c69eae3ea2 100644 --- a/contrib/pg_buffercache/meson.build +++ b/contrib/pg_buffercache/meson.build @@ -11,7 +11,6 @@ install_data( 'pg_buffercache--1.1--1.2.sql', 'pg_buffercache--1.2--1.3.sql', 'pg_buffercache--1.2.sql', - 'pg_buffercache--1.3--1.4.sql', 'pg_buffercache.control', kwargs: contrib_data_args, ) diff --git a/contrib/pg_buffercache/pg_buffercache--1.3--1.4.sql b/contrib/pg_buffercache/pg_buffercache--1.3--1.4.sql deleted file mode 100644 index 50956b195a8..00000000000 --- a/contrib/pg_buffercache/pg_buffercache--1.3--1.4.sql +++ /dev/null @@ -1,30 +0,0 @@ -/* contrib/pg_buffercache/pg_buffercache--1.3--1.4.sql */ - --- complain if script is sourced in psql, rather than via ALTER EXTENSION -\echo Use "ALTER EXTENSION pg_buffercache UPDATE TO '1.4'" to load this file. \quit - -/* First we have to remove them from the extension */ -ALTER EXTENSION pg_buffercache DROP VIEW pg_buffercache; -ALTER EXTENSION pg_buffercache DROP FUNCTION pg_buffercache_pages(); - -/* Then we can drop them */ -DROP VIEW pg_buffercache; -DROP FUNCTION pg_buffercache_pages(); - -/* Now redefine */ -CREATE FUNCTION pg_buffercache_pages() -RETURNS SETOF RECORD -AS 'MODULE_PATHNAME', 'pg_buffercache_pages_v1_4' -LANGUAGE C PARALLEL SAFE; - -CREATE VIEW pg_buffercache AS - SELECT P.* FROM pg_buffercache_pages() AS P - (bufferid integer, relfilenode int8, reltablespace oid, reldatabase oid, - relforknumber int2, relblocknumber int8, isdirty bool, usagecount int2, - pinning_backends int4); - --- Don't want these to be available to public. -REVOKE ALL ON FUNCTION pg_buffercache_pages() FROM PUBLIC; -REVOKE ALL ON pg_buffercache FROM PUBLIC; -GRANT EXECUTE ON FUNCTION pg_buffercache_pages() TO pg_monitor; -GRANT SELECT ON pg_buffercache TO pg_monitor; diff --git a/contrib/pg_buffercache/pg_buffercache.control b/contrib/pg_buffercache/pg_buffercache.control index a82ae5f9bb5..8c060ae9abf 100644 --- a/contrib/pg_buffercache/pg_buffercache.control +++ b/contrib/pg_buffercache/pg_buffercache.control @@ -1,5 +1,5 @@ # pg_buffercache extension comment = 'examine the shared buffer cache' -default_version = '1.4' +default_version = '1.3' module_pathname = '$libdir/pg_buffercache' relocatable = true diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index a45f240499a..c5754ea9fa5 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -59,10 +59,9 @@ typedef struct * relation node/tablespace/database/blocknum and dirty indicator. */ PG_FUNCTION_INFO_V1(pg_buffercache_pages); -PG_FUNCTION_INFO_V1(pg_buffercache_pages_v1_4); -static Datum -pg_buffercache_pages_internal(PG_FUNCTION_ARGS, Oid rfn_typid) +Datum +pg_buffercache_pages(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; @@ -104,7 +103,7 @@ pg_buffercache_pages_internal(PG_FUNCTION_ARGS, Oid rfn_typid) TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", - rfn_typid, -1, 0); + OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", @@ -210,24 +209,7 @@ pg_buffercache_pages_internal(PG_FUNCTION_ARGS, Oid rfn_typid) } else { - if (rfn_typid == INT8OID) - values[1] = - Int64GetDatum((int64) fctx->record[i].relfilenumber); - else - { - Assert(rfn_typid == OIDOID); - - if (fctx->record[i].relfilenumber > OID_MAX) - ereport(ERROR, - errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("relfilenode %llu is too large to be represented as an OID", - (unsigned long long) fctx->record[i].relfilenumber), - errhint("Upgrade the extension using ALTER EXTENSION pg_buffercache UPDATE")); - - values[1] = - ObjectIdGetDatum((Oid) fctx->record[i].relfilenumber); - } - + values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber); nulls[1] = false; values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); nulls[2] = false; @@ -255,16 +237,3 @@ pg_buffercache_pages_internal(PG_FUNCTION_ARGS, Oid rfn_typid) else SRF_RETURN_DONE(funcctx); } - -/* entry point for old extension version */ -Datum -pg_buffercache_pages(PG_FUNCTION_ARGS) -{ - return pg_buffercache_pages_internal(fcinfo, OIDOID); -} - -Datum -pg_buffercache_pages_v1_4(PG_FUNCTION_ARGS) -{ - return pg_buffercache_pages_internal(fcinfo, INT8OID); -} diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index 31caf101a94..c8d673a20e3 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -345,7 +345,7 @@ apw_load_buffers(void) { unsigned forknum; - if (fscanf(file, "%u,%u," UINT64_FORMAT ",%u,%u\n", &blkinfo[i].database, + if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database, &blkinfo[i].tablespace, &blkinfo[i].filenumber, &forknum, &blkinfo[i].blocknum) != 5) ereport(ERROR, @@ -669,7 +669,7 @@ apw_dump_now(bool is_bgworker, bool dump_unlogged) { CHECK_FOR_INTERRUPTS(); - ret = fprintf(file, "%u,%u," UINT64_FORMAT ",%u,%u\n", + ret = fprintf(file, "%u,%u,%u,%u,%u\n", block_info_array[i].database, block_info_array[i].tablespace, block_info_array[i].filenumber, diff --git a/contrib/pg_walinspect/expected/pg_walinspect.out b/contrib/pg_walinspect/expected/pg_walinspect.out index e9b06ed3af2..a1ee743457c 100644 --- a/contrib/pg_walinspect/expected/pg_walinspect.out +++ b/contrib/pg_walinspect/expected/pg_walinspect.out @@ -54,9 +54,9 @@ SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1'); -- =================================================================== -- Test for filtering out WAL records of a particular table -- =================================================================== -SELECT relfilenode AS sample_tbl_relfilenode FROM pg_class WHERE relname = 'sample_tbl' \gset +SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') - WHERE block_ref LIKE concat('%', :'sample_tbl_relfilenode', '%') AND resource_manager = 'Heap'; + WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap'; ok ---- t diff --git a/contrib/pg_walinspect/sql/pg_walinspect.sql b/contrib/pg_walinspect/sql/pg_walinspect.sql index 53938341257..1b265ea7bcc 100644 --- a/contrib/pg_walinspect/sql/pg_walinspect.sql +++ b/contrib/pg_walinspect/sql/pg_walinspect.sql @@ -39,10 +39,10 @@ SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1'); -- Test for filtering out WAL records of a particular table -- =================================================================== -SELECT relfilenode AS sample_tbl_relfilenode FROM pg_class WHERE relname = 'sample_tbl' \gset +SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') - WHERE block_ref LIKE concat('%', :'sample_tbl_relfilenode', '%') AND resource_manager = 'Heap'; + WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap'; -- =================================================================== -- Test for filtering out WAL records based on resource_manager and diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 40d4e9c35e6..00f833d210e 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1984,7 +1984,7 @@ SCRAM-SHA-256$<replaceable><iteration count></replaceable>:<replaceable>&l <row> <entry role="catalog_table_entry"><para role="column_definition"> - <structfield>relfilenode</structfield> <type>int8</type> + <structfield>relfilenode</structfield> <type>oid</type> </para> <para> Name of the on-disk file of this relation; zero means this diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index d8718ed61e6..546213fa931 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25210,11 +25210,6 @@ SELECT collation for ('foo' COLLATE "de_DE"); <entry><type>timestamp with time zone</type></entry> </row> - <row> - <entry><structfield>next_relfilenumber</structfield></entry> - <entry><type>timestamp with time zone</type></entry> - </row> - </tbody> </tgroup> </table> diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml index e2222655804..a06fd3e26de 100644 --- a/doc/src/sgml/pgbuffercache.sgml +++ b/doc/src/sgml/pgbuffercache.sgml @@ -62,7 +62,7 @@ <row> <entry role="catalog_table_entry"><para role="column_definition"> - <structfield>relfilenode</structfield> <type>int8</type> + <structfield>relfilenode</structfield> <type>oid</type> (references <link linkend="catalog-pg-class"><structname>pg_class</structname></link>.<structfield>relfilenode</structfield>) </para> <para> diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index d9e9b0f43ee..e5b9f3f1ffa 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -217,12 +217,11 @@ with the suffix <literal>_init</literal> (see <xref linkend="storage-init"/>). <caution> <para> -Note that a table's filenode will normally be different than the OID. For -system tables, the initial filenode will be equal to the table OID, but it will -be different if the table has ever been subjected to a rewriting operation, -such as <command>TRUNCATE</command>, <command>REINDEX</command>, -<command>CLUSTER</command> or some forms of <command>ALTER TABLE</command>. -For user tables, even the initial filenode will be different than the table OID. +Note that while a table's filenode often matches its OID, this is +<emphasis>not</emphasis> necessarily the case; some operations, like +<command>TRUNCATE</command>, <command>REINDEX</command>, <command>CLUSTER</command> and some forms +of <command>ALTER TABLE</command>, can change the filenode while preserving the OID. +Avoid assuming that filenode and table OID are the same. Also, for certain system catalogs including <structname>pg_class</structname> itself, <structname>pg_class</structname>.<structfield>relfilenode</structfield> contains zero. The actual filenode number of these catalogs is stored in a lower-level data diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index bc093f2a887..41b92115bff 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -100,7 +100,7 @@ ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rda BlockNumber blknum; BufferGetTag(buffer, &locator, &forknum, &blknum); - elog(ERROR, "failed to add item to index page in %u/%u/" UINT64_FORMAT, + elog(ERROR, "failed to add item to index page in %u/%u/%u", locator.spcOid, locator.dbOid, locator.relNumber); } } diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index d1c8a24d66f..7dd3c1d500f 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -26,7 +26,7 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) static void out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) { - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; blk %u; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, xlrec->block, EpochFromFullTransactionId(xlrec->latestRemovedFullXid), diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 70bd49303a9..923d3bc43df 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -169,7 +169,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec; - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; tid %u/%u", + appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u", xlrec->target_locator.spcOid, xlrec->target_locator.dbOid, xlrec->target_locator.relNumber, diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index 6192a7ba841..4843cd530df 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -100,7 +100,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u:%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, EpochFromFullTransactionId(xlrec->latestRemovedFullXid), diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqdesc.c index df72caf1768..b3845f93bff 100644 --- a/src/backend/access/rmgrdesc/seqdesc.c +++ b/src/backend/access/rmgrdesc/seqdesc.c @@ -25,7 +25,7 @@ seq_desc(StringInfo buf, XLogReaderState *record) xl_seq_rec *xlrec = (xl_seq_rec *) rec; if (info == XLOG_SEQ_LOG) - appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT, + appendStringInfo(buf, "rel %u/%u/%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber); } diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index b1cede74cf4..3fd7185f217 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -45,8 +45,8 @@ xlog_desc(StringInfo buf, XLogReaderState *record) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " - "tli %u; prev tli %u; fpw %s; xid %u:%u; relfilenumber " UINT64_FORMAT "; oid %u; " - "multi %u; offset %u; oldest xid %u in DB %u; oldest multi %u in DB %u; " + "tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; " + "oldest xid %u in DB %u; oldest multi %u in DB %u; " "oldest/newest commit timestamp xid: %u/%u; " "oldest running xid %u; %s", LSN_FORMAT_ARGS(checkpoint->redo), @@ -55,7 +55,6 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->fullPageWrites ? "true" : "false", EpochFromFullTransactionId(checkpoint->nextXid), XidFromFullTransactionId(checkpoint->nextXid), - checkpoint->nextRelFileNumber, checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, @@ -75,13 +74,6 @@ xlog_desc(StringInfo buf, XLogReaderState *record) memcpy(&nextOid, rec, sizeof(Oid)); appendStringInfo(buf, "%u", nextOid); } - else if (info == XLOG_NEXT_RELFILENUMBER) - { - RelFileNumber nextRelFileNumber; - - memcpy(&nextRelFileNumber, rec, sizeof(RelFileNumber)); - appendStringInfo(buf, UINT64_FORMAT, nextRelFileNumber); - } else if (info == XLOG_RESTORE_POINT) { xl_restore_point *xlrec = (xl_restore_point *) rec; @@ -177,9 +169,6 @@ xlog_identify(uint8 info) case XLOG_NEXTOID: id = "NEXTOID"; break; - case XLOG_NEXT_RELFILENUMBER: - id = "NEXT_RELFILENUMBER"; - break; case XLOG_SWITCH: id = "SWITCH"; break; @@ -248,7 +237,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, appendStringInfoChar(buf, ' '); appendStringInfo(buf, - "blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u", + "blkref #%d: rel %u/%u/%u fork %s blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forkNames[forknum], @@ -308,7 +297,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, if (forknum != MAIN_FORKNUM) { appendStringInfo(buf, - ", blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u", + ", blkref #%d: rel %u/%u/%u fork %s blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forkNames[forknum], @@ -317,7 +306,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, else { appendStringInfo(buf, - ", blkref #%d: rel %u/%u/" UINT64_FORMAT " blk %u", + ", blkref #%d: rel %u/%u/%u blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blk); diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README index 91c2578f7a9..72af6560600 100644 --- a/src/backend/access/transam/README +++ b/src/backend/access/transam/README @@ -692,9 +692,8 @@ by having database restart search for files that don't have any committed entry in pg_class, but that currently isn't done because of the possibility of deleting data that is useful for forensic analysis of the crash. Orphan files are harmless --- at worst they waste a bit of disk space --- -because the relfilenumber counter is monotonically increasing. The maximum -value is 2^56-1, and there is no provision for wraparound. Thus, on-disk -collisions aren't possible. +because we check for on-disk collisions when allocating new relfilenumber +OIDs. So cleaning up isn't really necessary. 3. Deleting a table, which requires an unlink() that could fail. diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 89da2f44590..849a7ce9d6d 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -13,16 +13,12 @@ #include "postgres.h" -#include <unistd.h> - #include "access/clog.h" #include "access/commit_ts.h" #include "access/subtrans.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlogutils.h" -#include "catalog/pg_class.h" -#include "catalog/pg_tablespace.h" #include "commands/dbcommands.h" #include "miscadmin.h" #include "postmaster/autovacuum.h" @@ -34,15 +30,6 @@ /* Number of OIDs to prefetch (preallocate) per XLOG write */ #define VAR_OID_PREFETCH 8192 -/* Number of RelFileNumbers to be logged per XLOG write */ -#define VAR_RELNUMBER_PER_XLOG 512 - -/* - * Need to log more if remaining logged RelFileNumbers are less than the - * threshold. Valid range could be between 0 to VAR_RELNUMBER_PER_XLOG - 1. - */ -#define VAR_RELNUMBER_NEW_XLOG_THRESHOLD 256 - /* pointer to "variable cache" in shared memory (set up by shmem.c) */ VariableCache ShmemVariableCache = NULL; @@ -534,7 +521,8 @@ ForceTransactionIdLimitUpdate(void) * wide, counter wraparound will occur eventually, and therefore it is unwise * to assume they are unique unless precautions are taken to make them so. * Hence, this routine should generally not be used directly. The only direct - * caller should be GetNewOidWithIndex() in catalog/catalog.c. + * callers should be GetNewOidWithIndex() and GetNewRelFileNumber() in + * catalog/catalog.c. */ Oid GetNewObjectId(void) @@ -625,199 +613,6 @@ SetNextObjectId(Oid nextOid) } /* - * GetNewRelFileNumber - * - * Similar to GetNewObjectId but instead of new Oid it generates new - * relfilenumber. - */ -RelFileNumber -GetNewRelFileNumber(Oid reltablespace, char relpersistence) -{ - RelFileNumber result; - RelFileNumber nextRelFileNumber, - loggedRelFileNumber, - flushedRelFileNumber; - - StaticAssertStmt(VAR_RELNUMBER_NEW_XLOG_THRESHOLD < VAR_RELNUMBER_PER_XLOG, - "VAR_RELNUMBER_NEW_XLOG_THRESHOLD must be smaller than VAR_RELNUMBER_PER_XLOG"); - - /* safety check, we should never get this far in a HS standby */ - if (RecoveryInProgress()) - elog(ERROR, "cannot assign RelFileNumber during recovery"); - - if (IsBinaryUpgrade) - elog(ERROR, "cannot assign RelFileNumber during binary upgrade"); - - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - - nextRelFileNumber = ShmemVariableCache->nextRelFileNumber; - loggedRelFileNumber = ShmemVariableCache->loggedRelFileNumber; - flushedRelFileNumber = ShmemVariableCache->flushedRelFileNumber; - - Assert(nextRelFileNumber <= flushedRelFileNumber); - Assert(flushedRelFileNumber <= loggedRelFileNumber); - - /* check for the wraparound for the relfilenumber counter */ - if (unlikely(nextRelFileNumber > MAX_RELFILENUMBER)) - elog(ERROR, "relfilenumber is too large"); - - /* - * If the remaining logged relfilenumbers values are less than the - * threshold value then log more. Ideally, we can wait until all - * relfilenumbers have been consumed before logging more. Nevertheless, if - * we do that, we must immediately flush the logged wal record because we - * want to ensure that the nextRelFileNumber is always larger than any - * relfilenumber already in use on disk. And, to maintain that invariant, - * we must make sure that the record we log reaches the disk before any new - * files are created with the newly logged range. - * - * So in order to avoid flushing the wal immediately, we always log before - * consuming all the relfilenumber, and now we only have to flush the newly - * logged relfilenumber wal before consuming the relfilenumber from this - * new range. By the time we need to flush this wal, hopefully, those have - * already been flushed with some other XLogFlush operation. - */ - if (loggedRelFileNumber - nextRelFileNumber <= - VAR_RELNUMBER_NEW_XLOG_THRESHOLD) - { - XLogRecPtr recptr; - - loggedRelFileNumber = loggedRelFileNumber + VAR_RELNUMBER_PER_XLOG; - recptr = LogNextRelFileNumber(loggedRelFileNumber); - ShmemVariableCache->loggedRelFileNumber = loggedRelFileNumber; - - /* remember for the future flush */ - ShmemVariableCache->loggedRelFileNumberRecPtr = recptr; - } - - /* - * If the nextRelFileNumber is already reached to the already flushed - * relfilenumber then flush the WAL for previously logged relfilenumber. - */ - if (nextRelFileNumber >= flushedRelFileNumber) - { - XLogFlush(ShmemVariableCache->loggedRelFileNumberRecPtr); - ShmemVariableCache->flushedRelFileNumber = loggedRelFileNumber; - } - - result = ShmemVariableCache->nextRelFileNumber; - - /* we should never be using any relfilenumber outside the flushed range */ - Assert(result <= ShmemVariableCache->flushedRelFileNumber); - - (ShmemVariableCache->nextRelFileNumber)++; - - LWLockRelease(RelFileNumberGenLock); - - /* - * Because the RelFileNumber counter only ever increases and never wraps - * around, it should be impossible for the newly-allocated RelFileNumber to - * already be in use. But, if Asserts are enabled, double check that - * there's no main-fork relation file with the new RelFileNumber already on - * disk. - */ -#ifdef USE_ASSERT_CHECKING - { - RelFileLocatorBackend rlocator; - char *rpath; - BackendId backend; - - switch (relpersistence) - { - case RELPERSISTENCE_TEMP: - backend = BackendIdForTempRelations(); - break; - case RELPERSISTENCE_UNLOGGED: - case RELPERSISTENCE_PERMANENT: - backend = InvalidBackendId; - break; - default: - elog(ERROR, "invalid relpersistence: %c", relpersistence); - } - - /* this logic should match RelationInitPhysicalAddr */ - rlocator.locator.spcOid = - reltablespace ? reltablespace : MyDatabaseTableSpace; - rlocator.locator.dbOid = (reltablespace == GLOBALTABLESPACE_OID) ? - InvalidOid : MyDatabaseId; - rlocator.locator.relNumber = result; - - /* - * The relpath will vary based on the backend ID, so we must - * initialize that properly here to make sure that any collisions - * based on filename are properly detected. - */ - rlocator.backend = backend; - - /* check for existing file of same name. */ - rpath = relpath(rlocator, MAIN_FORKNUM); - Assert(access(rpath, F_OK) != 0); - } -#endif - - return result; -} - -/* - * SetNextRelFileNumber - * - * This may only be called during pg_upgrade; it advances the RelFileNumber - * counter to the specified value if the current value is smaller than the - * input value. - */ -void -SetNextRelFileNumber(RelFileNumber relnumber) -{ - /* safety check, we should never get this far in a HS standby */ - if (RecoveryInProgress()) - elog(ERROR, "cannot set RelFileNumber during recovery"); - - if (!IsBinaryUpgrade) - elog(ERROR, "RelFileNumber can be set only during binary upgrade"); - - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - - /* - * If previous assigned value of the nextRelFileNumber is already higher - * than the current value then nothing to be done. This is possible - * because during upgrade the objects are not created in relfilenumber - * order. - */ - if (relnumber <= ShmemVariableCache->nextRelFileNumber) - { - LWLockRelease(RelFileNumberGenLock); - return; - } - - /* - * If the new relfilenumber to be set is greater than or equal to already - * flushed relfilenumber then log more and flush immediately. - * - * (This is less efficient than GetNewRelFileNumber, which arranges to - * log some new relfilenumbers before the old batch is exhausted in the - * hope that a flush will happen in the background before any values are - * needed from the new batch. However, since this is only used during - * binary upgrade, it shouldn't really matter.) - */ - if (relnumber >= ShmemVariableCache->flushedRelFileNumber) - { - RelFileNumber newlogrelnum; - - newlogrelnum = relnumber + VAR_RELNUMBER_PER_XLOG; - XLogFlush(LogNextRelFileNumber(newlogrelnum)); - - /* we have flushed whatever we have logged so no pending flush */ - ShmemVariableCache->loggedRelFileNumber = newlogrelnum; - ShmemVariableCache->flushedRelFileNumber = newlogrelnum; - ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr; - } - - ShmemVariableCache->nextRelFileNumber = relnumber; - - LWLockRelease(RelFileNumberGenLock); -} - -/* * StopGeneratingPinnedObjectIds * * This is called once during initdb to force the OID counter up to diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 35fac945cb1..00992a11b9e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4712,7 +4712,6 @@ BootStrapXLOG(void) checkPoint.nextXid = FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); checkPoint.nextOid = FirstGenbkiObjectId; - checkPoint.nextRelFileNumber = FirstNormalRelFileNumber; checkPoint.nextMulti = FirstMultiXactId; checkPoint.nextMultiOffset = 0; checkPoint.oldestXid = FirstNormalTransactionId; @@ -4726,11 +4725,7 @@ BootStrapXLOG(void) ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; - ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber; ShmemVariableCache->oidCount = 0; - ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -5196,10 +5191,7 @@ StartupXLOG(void) /* initialize shared memory variables from the checkpoint record */ ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; - ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber; ShmemVariableCache->oidCount = 0; - ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -6671,24 +6663,6 @@ CreateCheckPoint(int flags) checkPoint.nextOid += ShmemVariableCache->oidCount; LWLockRelease(OidGenLock); - /* - * If this is a shutdown checkpoint then we can safely start allocating - * relfilenumber from the nextRelFileNumber value after the restart because - * no one one else can use the relfilenumber beyond that number before the - * shutdown. OTOH, if it is a normal checkpoint then if there is a crash - * after this point then we might end up reusing the same relfilenumbers - * after the restart so we need to set the nextRelFileNumber to the already - * logged relfilenumber as no one will use number beyond this limit without - * logging again. - */ - LWLockAcquire(RelFileNumberGenLock, LW_SHARED); - if (shutdown) - checkPoint.nextRelFileNumber = ShmemVariableCache->nextRelFileNumber; - else - checkPoint.nextRelFileNumber = ShmemVariableCache->loggedRelFileNumber; - - LWLockRelease(RelFileNumberGenLock); - MultiXactGetCheckptMulti(shutdown, &checkPoint.nextMulti, &checkPoint.nextMultiOffset, @@ -7567,24 +7541,6 @@ XLogPutNextOid(Oid nextOid) } /* - * Similar to the XLogPutNextOid but instead of writing NEXTOID log record it - * writes a NEXT_RELFILENUMBER log record. It also returns the XLogRecPtr of - * the currently logged relfilenumber record, so that the caller can flush it - * at the appropriate time. - */ -XLogRecPtr -LogNextRelFileNumber(RelFileNumber nextrelnumber) -{ - XLogRecPtr recptr; - - XLogBeginInsert(); - XLogRegisterData((char *) (&nextrelnumber), sizeof(RelFileNumber)); - recptr = XLogInsert(RM_XLOG_ID, XLOG_NEXT_RELFILENUMBER); - - return recptr; -} - -/* * Write an XLOG SWITCH record. * * Here we just blindly issue an XLogInsert request for the record. @@ -7799,17 +7755,6 @@ xlog_redo(XLogReaderState *record) ShmemVariableCache->oidCount = 0; LWLockRelease(OidGenLock); } - if (info == XLOG_NEXT_RELFILENUMBER) - { - RelFileNumber nextRelFileNumber; - - memcpy(&nextRelFileNumber, XLogRecGetData(record), sizeof(RelFileNumber)); - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - ShmemVariableCache->nextRelFileNumber = nextRelFileNumber; - ShmemVariableCache->loggedRelFileNumber = nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = nextRelFileNumber; - LWLockRelease(RelFileNumberGenLock); - } else if (info == XLOG_CHECKPOINT_SHUTDOWN) { CheckPoint checkPoint; @@ -7824,11 +7769,6 @@ xlog_redo(XLogReaderState *record) ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; LWLockRelease(OidGenLock); - LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE); - ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber; - ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber; - LWLockRelease(RelFileNumberGenLock); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c index cea38eccea6..8f5d4253320 100644 --- a/src/backend/access/transam/xlogprefetcher.c +++ b/src/backend/access/transam/xlogprefetcher.c @@ -613,7 +613,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, which creates the relation", + "suppressing prefetch in relation %u/%u/%u until %X/%X is replayed, which creates the relation", xlrec->rlocator.spcOid, xlrec->rlocator.dbOid, xlrec->rlocator.relNumber, @@ -636,7 +636,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, which truncates the relation", + "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, which truncates the relation", xlrec->rlocator.spcOid, xlrec->rlocator.dbOid, xlrec->rlocator.relNumber, @@ -735,7 +735,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing all prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, because the relation does not exist on disk", + "suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -756,7 +756,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, because the relation is too small", + "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -795,7 +795,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) * truncated beneath our feet? */ elog(ERROR, - "could not prefetch relation %u/%u/" UINT64_FORMAT " block %u", + "could not prefetch relation %u/%u/%u block %u", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -934,7 +934,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)", + "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, LSN_FORMAT_ARGS(filter->filter_until_replayed), filter->filter_from_block); @@ -950,7 +950,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (whole database)", + "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (whole database)", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, LSN_FORMAT_ARGS(filter->filter_until_replayed)); #endif diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 1026ce5dcf7..b41e6826643 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -2228,14 +2228,14 @@ xlog_block_info(StringInfo buf, XLogReaderState *record) continue; if (forknum != MAIN_FORKNUM) - appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", fork %u, blk %u", + appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forknum, blk); else - appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", blk %u", + appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u", block_id, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, @@ -2433,7 +2433,7 @@ verifyBackupPageConsistency(XLogReaderState *record) if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0) { elog(FATAL, - "inconsistent page found, rel %u/%u/" UINT64_FORMAT ", forknum %u, blkno %u", + "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, forknum, blkno); } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index ffda2c210b7..563cba258dd 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -619,17 +619,17 @@ CreateFakeRelcacheEntry(RelFileLocator rlocator) rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT; /* We don't know the name of the relation; use relfilenumber instead */ - sprintf(RelationGetRelationName(rel), UINT64_FORMAT, rlocator.relNumber); + sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber); /* * We set up the lockRelId in case anything tries to lock the dummy - * relation. Note that this is fairly bogus since relNumber are completely + * relation. Note that this is fairly bogus since relNumber may be * different from the relation's OID. It shouldn't really matter though. * In recovery, we are running by ourselves and can't have any lock * conflicts. While syncing, we already hold AccessExclusiveLock. */ rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid; - rel->rd_lockInfo.lockRelId.relId = (Oid) rlocator.relNumber; + rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber; rel->rd_smgr = NULL; diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 1434bcdd85c..411cac9be3f 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -1246,7 +1246,7 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, if (relForkNum != INIT_FORKNUM) { char initForkFile[MAXPGPATH]; - char relNumber[RELNUMBERCHARS + 1]; + char relNumber[OIDCHARS + 1]; /* * If any other type of fork, check if there is an init fork diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index a9bd8ae008e..2abd6b007a2 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -483,6 +483,101 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn) } /* + * GetNewRelFileNumber + * Generate a new relfilenumber that is unique within the + * database of the given tablespace. + * + * If the relfilenumber will also be used as the relation's OID, pass the + * opened pg_class catalog, and this routine will guarantee that the result + * is also an unused OID within pg_class. If the result is to be used only + * as a relfilenumber for an existing relation, pass NULL for pg_class. + * + * As with GetNewOidWithIndex(), there is some theoretical risk of a race + * condition, but it doesn't seem worth worrying about. + * + * Note: we don't support using this in bootstrap mode. All relations + * created by bootstrap have preassigned OIDs, so there's no need. + */ +RelFileNumber +GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence) +{ + RelFileLocatorBackend rlocator; + char *rpath; + bool collides; + BackendId backend; + + /* + * If we ever get here during pg_upgrade, there's something wrong; all + * relfilenumber assignments during a binary-upgrade run should be + * determined by commands in the dump script. + */ + Assert(!IsBinaryUpgrade); + + switch (relpersistence) + { + case RELPERSISTENCE_TEMP: + backend = BackendIdForTempRelations(); + break; + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + backend = InvalidBackendId; + break; + default: + elog(ERROR, "invalid relpersistence: %c", relpersistence); + return InvalidRelFileNumber; /* placate compiler */ + } + + /* This logic should match RelationInitPhysicalAddr */ + rlocator.locator.spcOid = reltablespace ? reltablespace : MyDatabaseTableSpace; + rlocator.locator.dbOid = + (rlocator.locator.spcOid == GLOBALTABLESPACE_OID) ? + InvalidOid : MyDatabaseId; + + /* + * The relpath will vary based on the backend ID, so we must initialize + * that properly here to make sure that any collisions based on filename + * are properly detected. + */ + rlocator.backend = backend; + + do + { + CHECK_FOR_INTERRUPTS(); + + /* Generate the OID */ + if (pg_class) + rlocator.locator.relNumber = GetNewOidWithIndex(pg_class, ClassOidIndexId, + Anum_pg_class_oid); + else + rlocator.locator.relNumber = GetNewObjectId(); + + /* Check for existing file of same name */ + rpath = relpath(rlocator, MAIN_FORKNUM); + + if (access(rpath, F_OK) == 0) + { + /* definite collision */ + collides = true; + } + else + { + /* + * Here we have a little bit of a dilemma: if errno is something + * other than ENOENT, should we declare a collision and loop? In + * practice it seems best to go ahead regardless of the errno. If + * there is a colliding file we will get an smgr failure when we + * attempt to create the new relation file. + */ + collides = false; + } + + pfree(rpath); + } while (collides); + + return rlocator.locator.relNumber; +} + +/* * SQL callable interface for GetNewOidWithIndex(). Outside of initdb's * direct insertions into catalog tables, and recovering from corruption, this * should rarely be needed. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index de01da198e3..9a80ccdccdf 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -341,19 +341,11 @@ heap_create(const char *relname, else { /* - * If relfilenumber is unspecified by the caller then allocate a new - * one, except for system tables, for which we make the initial - * relfilenumber the same as the table OID. See the comments for - * FirstNormalRelFileNumber for an explanation of why we do this. + * If relfilenumber is unspecified by the caller then create storage + * with oid same as relid. */ if (!RelFileNumberIsValid(relfilenumber)) - { - if (relid < FirstNormalObjectId) - relfilenumber = relid; - else - relfilenumber = GetNewRelFileNumber(reltablespace, - relpersistence); - } + relfilenumber = relid; } /* @@ -909,7 +901,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_reloftype - 1] = ObjectIdGetDatum(rd_rel->reloftype); values[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(rd_rel->relowner); values[Anum_pg_class_relam - 1] = ObjectIdGetDatum(rd_rel->relam); - values[Anum_pg_class_relfilenode - 1] = Int64GetDatum(rd_rel->relfilenode); + values[Anum_pg_class_relfilenode - 1] = ObjectIdGetDatum(rd_rel->relfilenode); values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace); values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages); values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples); @@ -1181,7 +1173,12 @@ heap_create_with_catalog(const char *relname, if (shared_relation && reltablespace != GLOBALTABLESPACE_OID) elog(ERROR, "shared relations must be placed in pg_global tablespace"); - /* Allocate an OID for the relation, unless we were told what to use. */ + /* + * Allocate an OID for the relation, unless we were told what to use. + * + * The OID will be the relfilenumber as well, so make sure it doesn't + * collide with either pg_class OIDs or existing physical files. + */ if (!OidIsValid(relid)) { /* Use binary-upgrade override for pg_class.oid and relfilenumber */ @@ -1235,8 +1232,8 @@ heap_create_with_catalog(const char *relname, } if (!OidIsValid(relid)) - relid = GetNewOidWithIndex(pg_class_desc, ClassOidIndexId, - Anum_pg_class_oid); + relid = GetNewRelFileNumber(reltablespace, pg_class_desc, + relpersistence); } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 1fd40c42a3a..61f1d3926a9 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -898,7 +898,12 @@ index_create(Relation heapRelation, collationObjectId, classObjectId); - /* Allocate an OID for the index, unless we were told what to use. */ + /* + * Allocate an OID for the index, unless we were told what to use. + * + * The OID will be the relfilenumber as well, so make sure it doesn't + * collide with either pg_class OIDs or existing physical files. + */ if (!OidIsValid(indexRelationId)) { /* Use binary-upgrade override for pg_class.oid and relfilenumber */ @@ -930,8 +935,8 @@ index_create(Relation heapRelation, } else { - indexRelationId = GetNewOidWithIndex(pg_class, ClassOidIndexId, - Anum_pg_class_oid); + indexRelationId = + GetNewRelFileNumber(tableSpaceId, pg_class, relpersistence); } } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 38bbe325509..d708af19ed2 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -968,10 +968,6 @@ smgr_redo(XLogReaderState *record) xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; - if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber) - elog(ERROR, "unexpected relnumber " UINT64_FORMAT " is bigger than nextRelFileNumber " UINT64_FORMAT, - xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber); - reln = smgropen(xlrec->rlocator, InvalidBackendId); smgrcreate(reln, xlrec->forkNum, true); } @@ -985,10 +981,6 @@ smgr_redo(XLogReaderState *record) int nforks = 0; bool need_fsm_vacuum = false; - if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber) - elog(ERROR, "unexpected relnumber " UINT64_FORMAT " is bigger than nextRelFileNumber " UINT64_FORMAT, - xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber); - reln = smgropen(xlrec->rlocator, InvalidBackendId); /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 1b8e6d57294..7d8a75d23c2 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14375,14 +14375,10 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) } /* - * Generate a new relfilenumber. We cannot reuse the old relfilenumber - * because of the possibility that that relation will be moved back to the - * original tablespace before the next checkpoint. At that point, the - * first segment of the main fork won't have been unlinked yet, and an - * attempt to create new relation storage with that same relfilenumber - * will fail. - */ - newrelfilenumber = GetNewRelFileNumber(newTableSpace, + * Relfilenumbers are not unique in databases across tablespaces, so we + * need to allocate a new one in the new tablespace. + */ + newrelfilenumber = GetNewRelFileNumber(newTableSpace, NULL, rel->rd_rel->relpersistence); /* Open old and new relation */ diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index cdd7986dfc3..b69ff37dbbd 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -267,7 +267,7 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) * parts. */ if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + - OIDCHARS + 1 + RELNUMBERCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH) + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location \"%s\" is too long", diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index f1fa8945135..81b8c184a90 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -961,12 +961,12 @@ _read${n}(void) print $off "\tWRITE_UINT_FIELD($f);\n"; print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read; } - elsif ($t eq 'uint64' || $t eq 'RelFileNumber') + elsif ($t eq 'uint64') { print $off "\tWRITE_UINT64_FIELD($f);\n"; print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read; } - elsif ($t eq 'Oid') + elsif ($t eq 'Oid' || $t eq 'RelFileNumber') { print $off "\tWRITE_OID_FIELD($f);\n"; print $rff "\tREAD_OID_FIELD($f);\n" unless $no_read; diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index cdf19a9c204..2cc0ac9eb09 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -154,7 +154,6 @@ xlog_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) break; case XLOG_NOOP: case XLOG_NEXTOID: - case XLOG_NEXT_RELFILENUMBER: case XLOG_SWITCH: case XLOG_BACKUP_END: case XLOG_PARAMETER_CHANGE: diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index a0f398b458a..03d9c9c86a2 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -4932,7 +4932,7 @@ DisplayMapping(HTAB *tuplecid_data) hash_seq_init(&hstat, tuplecid_data); while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL) { - elog(DEBUG3, "mapping: node: %u/%u/" UINT64_FORMAT " tid: %u/%u cmin: %u, cmax: %u", + elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u", ent->key.rlocator.dbOid, ent->key.rlocator.spcOid, ent->key.rlocator.relNumber, diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index c3faa68126a..647c458b52e 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -31,7 +31,7 @@ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, typedef struct { - RelFileNumber relnumber; /* hash key */ + Oid reloid; /* hash key */ } unlogged_relation_entry; /* @@ -184,10 +184,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) * need to be reset. Otherwise, this cleanup operation would be * O(n^2). */ - ctl.keysize = sizeof(RelFileNumber); + ctl.keysize = sizeof(Oid); ctl.entrysize = sizeof(unlogged_relation_entry); ctl.hcxt = CurrentMemoryContext; - hash = hash_create("unlogged relation RelFileNumbers", 32, &ctl, + hash = hash_create("unlogged relation OIDs", 32, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); /* Scan the directory. */ @@ -208,10 +208,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) continue; /* - * Put the RELFILENUMBER portion of the name into the hash table, - * if it isn't already. + * Put the OID portion of the name into the hash table, if it + * isn't already. */ - ent.relnumber = atorelnumber(de->d_name); + ent.reloid = atooid(de->d_name); (void) hash_search(hash, &ent, HASH_ENTER, NULL); } @@ -248,10 +248,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) continue; /* - * See whether the RELFILENUMBER portion of the name shows up in - * the hash table. If so, nuke it! + * See whether the OID portion of the name shows up in the hash + * table. If so, nuke it! */ - ent.relnumber = atorelnumber(de->d_name); + ent.reloid = atooid(de->d_name); if (hash_search(hash, &ent, HASH_FIND, NULL)) { snprintf(rm_path, sizeof(rm_path), "%s/%s", @@ -286,7 +286,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) { ForkNumber forkNum; int relnumchars; - char relnumbuf[RELNUMBERCHARS + 1]; + char relnumbuf[OIDCHARS + 1]; char srcpath[MAXPGPATH * 2]; char dstpath[MAXPGPATH]; @@ -329,7 +329,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) { ForkNumber forkNum; int relnumchars; - char relnumbuf[RELNUMBERCHARS + 1]; + char relnumbuf[OIDCHARS + 1]; char mainpath[MAXPGPATH]; /* Skip anything that doesn't look like a relation data file. */ @@ -372,8 +372,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) * for a non-temporary relation and false otherwise. * * NB: If this function returns true, the caller is entitled to assume that - * *relnumchars has been set to a value no more than RELNUMBERCHARS, and thus - * that a buffer of RELNUMBERCHARS+1 characters is sufficient to hold the + * *relnumchars has been set to a value no more than OIDCHARS, and thus + * that a buffer of OIDCHARS+1 characters is sufficient to hold the * RelFileNumber portion of the filename. This is critical to protect against * a possible buffer overrun. */ @@ -386,7 +386,7 @@ parse_filename_for_nontemp_relation(const char *name, int *relnumchars, /* Look for a non-empty string of digits (that isn't too long). */ for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) ; - if (pos == 0 || pos > RELNUMBERCHARS) + if (pos == 0 || pos > OIDCHARS) return false; *relnumchars = pos; diff --git a/src/backend/storage/freespace/fsmpage.c b/src/backend/storage/freespace/fsmpage.c index 1210be7470b..af4dab7d2c7 100644 --- a/src/backend/storage/freespace/fsmpage.c +++ b/src/backend/storage/freespace/fsmpage.c @@ -273,7 +273,7 @@ restart: BlockNumber blknum; BufferGetTag(buf, &rlocator, &forknum, &blknum); - elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/" UINT64_FORMAT, + elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u", blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber); /* make sure we hold an exclusive lock */ diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index 3c5d0410795..6c7cf6c2956 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -53,4 +53,3 @@ XactTruncationLock 44 # 45 was XactTruncationLock until removal of BackendRandomLock WrapLimitsVacuumLock 46 NotifyQueueTailLock 47 -RelFileNumberGenLock 48
\ No newline at end of file diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index bed47f07d73..a515bb36ac1 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -257,13 +257,6 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * next checkpoint, we prevent reassignment of the relfilenumber until it's * safe, because relfilenumber assignment skips over any existing file. * - * XXX. Although all of this was true when relfilenumbers were 32 bits wide, - * they are now 56 bits wide and do not wrap around, so in the future we can - * change the code to immediately unlink the first segment of the relation - * along with all the others. We still do reuse relfilenumbers when createdb() - * is performed using the file-copy method or during movedb(), but the scenario - * described above can only happen when creating a new relation. - * * We do not need to go through this dance for temp relations, though, because * we never make WAL entries for temp rels, and so a temp rel poses no threat * to the health of a regular rel that has taken over its relfilenumber. diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index ed46ac3f44e..c1a5febcbfd 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -154,7 +154,7 @@ smgropen(RelFileLocator rlocator, BackendId backend) /* First time through: initialize the hash table */ HASHCTL ctl; - ctl.keysize = SizeOfRelFileLocatorBackend; + ctl.keysize = sizeof(RelFileLocatorBackend); ctl.entrysize = sizeof(SMgrRelationData); SMgrRelationHash = hash_create("smgr relation table", 400, &ctl, HASH_ELEM | HASH_BLOBS); diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 9f70f3526c9..34efa121b40 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -878,7 +878,7 @@ pg_relation_filenode(PG_FUNCTION_ARGS) if (!RelFileNumberIsValid(result)) PG_RETURN_NULL(); - PG_RETURN_INT64(result); + PG_RETURN_OID(result); } /* @@ -898,12 +898,9 @@ Datum pg_filenode_relation(PG_FUNCTION_ARGS) { Oid reltablespace = PG_GETARG_OID(0); - RelFileNumber relfilenumber = PG_GETARG_INT64(1); + RelFileNumber relfilenumber = PG_GETARG_OID(1); Oid heaprel; - /* check whether the relfilenumber is within a valid range */ - CHECK_RELFILENUMBER_RANGE(relfilenumber); - /* test needed so RelidByRelfilenumber doesn't misbehave */ if (!RelFileNumberIsValid(relfilenumber)) PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c index fc2faed9a7d..797f5f539af 100644 --- a/src/backend/utils/adt/pg_upgrade_support.c +++ b/src/backend/utils/adt/pg_upgrade_support.c @@ -17,7 +17,6 @@ #include "catalog/pg_type.h" #include "commands/extension.h" #include "miscadmin.h" -#include "storage/relfilelocator.h" #include "utils/array.h" #include "utils/builtins.h" @@ -99,12 +98,10 @@ binary_upgrade_set_next_heap_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS) { - RelFileNumber relfilenumber = PG_GETARG_INT64(0); + RelFileNumber relfilenumber = PG_GETARG_OID(0); CHECK_IS_BINARY_UPGRADE; - CHECK_RELFILENUMBER_RANGE(relfilenumber); binary_upgrade_next_heap_pg_class_relfilenumber = relfilenumber; - SetNextRelFileNumber(relfilenumber + 1); PG_RETURN_VOID(); } @@ -123,12 +120,10 @@ binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS) { - RelFileNumber relfilenumber = PG_GETARG_INT64(0); + RelFileNumber relfilenumber = PG_GETARG_OID(0); CHECK_IS_BINARY_UPGRADE; - CHECK_RELFILENUMBER_RANGE(relfilenumber); binary_upgrade_next_index_pg_class_relfilenumber = relfilenumber; - SetNextRelFileNumber(relfilenumber + 1); PG_RETURN_VOID(); } @@ -147,12 +142,10 @@ binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS) { - RelFileNumber relfilenumber = PG_GETARG_INT64(0); + RelFileNumber relfilenumber = PG_GETARG_OID(0); CHECK_IS_BINARY_UPGRADE; - CHECK_RELFILENUMBER_RANGE(relfilenumber); binary_upgrade_next_toast_pg_class_relfilenumber = relfilenumber; - SetNextRelFileNumber(relfilenumber + 1); PG_RETURN_VOID(); } diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index fecbf06a04d..eb5782f82a4 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -663,9 +663,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) */ RelFileLocatorBackend rlocator; - rlocator.locator.dbOid = msg->sm.dbOid; - rlocator.locator.spcOid = msg->sm.spcOid; - rlocator.locator.relNumber = (((uint64) msg->sm.relNumber_hi) << 32) | msg->sm.relNumber_lo; + rlocator.locator = msg->sm.rlocator; rlocator.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo; smgrcloserellocator(rlocator); } @@ -1468,10 +1466,7 @@ CacheInvalidateSmgr(RelFileLocatorBackend rlocator) msg.sm.id = SHAREDINVALSMGR_ID; msg.sm.backend_hi = rlocator.backend >> 16; msg.sm.backend_lo = rlocator.backend & 0xffff; - msg.sm.dbOid = rlocator.locator.dbOid; - msg.sm.spcOid = rlocator.locator.spcOid; - msg.sm.relNumber_hi = rlocator.locator.relNumber >> 32; - msg.sm.relNumber_lo = rlocator.locator.relNumber & 0xffffffff; + msg.sm.rlocator = rlocator.locator; /* check AddCatcacheInvalidationMessage() for an explanation */ VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 6f4e96dd33b..00dc0f24037 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3712,7 +3712,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) { /* Allocate a new relfilenumber */ newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace, - persistence); + NULL, persistence); } else if (relation->rd_rel->relkind == RELKIND_INDEX) { diff --git a/src/backend/utils/cache/relfilenumbermap.c b/src/backend/utils/cache/relfilenumbermap.c index 11427ba97eb..c4245d5ccdd 100644 --- a/src/backend/utils/cache/relfilenumbermap.c +++ b/src/backend/utils/cache/relfilenumbermap.c @@ -88,6 +88,7 @@ static void InitializeRelfilenumberMap(void) { HASHCTL ctl; + int i; /* Make sure we've initialized CacheMemoryContext. */ if (CacheMemoryContext == NULL) @@ -96,20 +97,17 @@ InitializeRelfilenumberMap(void) /* build skey */ MemSet(&relfilenumber_skey, 0, sizeof(relfilenumber_skey)); - fmgr_info_cxt(F_OIDEQ, - &relfilenumber_skey[0].sk_func, - CacheMemoryContext); - relfilenumber_skey[0].sk_strategy = BTEqualStrategyNumber; - relfilenumber_skey[0].sk_subtype = InvalidOid; - relfilenumber_skey[0].sk_collation = InvalidOid; - relfilenumber_skey[0].sk_attno = Anum_pg_class_reltablespace; + for (i = 0; i < 2; i++) + { + fmgr_info_cxt(F_OIDEQ, + &relfilenumber_skey[i].sk_func, + CacheMemoryContext); + relfilenumber_skey[i].sk_strategy = BTEqualStrategyNumber; + relfilenumber_skey[i].sk_subtype = InvalidOid; + relfilenumber_skey[i].sk_collation = InvalidOid; + } - fmgr_info_cxt(F_INT8EQ, - &relfilenumber_skey[1].sk_func, - CacheMemoryContext); - relfilenumber_skey[1].sk_strategy = BTEqualStrategyNumber; - relfilenumber_skey[1].sk_subtype = InvalidOid; - relfilenumber_skey[1].sk_collation = InvalidOid; + relfilenumber_skey[0].sk_attno = Anum_pg_class_reltablespace; relfilenumber_skey[1].sk_attno = Anum_pg_class_relfilenode; /* @@ -198,7 +196,7 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber) /* set scan arguments */ skey[0].sk_argument = ObjectIdGetDatum(reltablespace); - skey[1].sk_argument = Int64GetDatum((int64) relfilenumber); + skey[1].sk_argument = ObjectIdGetDatum(relfilenumber); scandesc = systable_beginscan(relation, ClassTblspcRelfilenodeIndexId, @@ -215,7 +213,7 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber) if (found) elog(ERROR, - "unexpected duplicate for tablespace %u, relfilenumber " UINT64_FORMAT, + "unexpected duplicate for tablespace %u, relfilenumber %u", reltablespace, relfilenumber); found = true; diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index d441cd97e2f..781f8b87580 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -79,8 +79,8 @@ pg_control_system(PG_FUNCTION_ARGS) Datum pg_control_checkpoint(PG_FUNCTION_ARGS) { - Datum values[19]; - bool nulls[19]; + Datum values[18]; + bool nulls[18]; TupleDesc tupdesc; HeapTuple htup; ControlFileData *ControlFile; @@ -129,8 +129,6 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time", TIMESTAMPTZOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 19, "next_relfilenumber", - INT8OID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); /* Read the control file. */ @@ -204,9 +202,6 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) values[17] = TimestampTzGetDatum(time_t_to_timestamptz(ControlFile->checkPointCopy.time)); nulls[17] = false; - values[18] = Int64GetDatum((int64) ControlFile->checkPointCopy.nextRelFileNumber); - nulls[18] = false; - htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index ddb5ec117f2..324ccf77834 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -485,7 +485,9 @@ main(int argc, char *argv[]) mode = PG_MODE_ENABLE; break; case 'f': - if (!option_parse_relfilenumber(optarg, "-f/--filenode")) + if (!option_parse_int(optarg, "-f/--filenode", 0, + INT_MAX, + NULL)) exit(1); only_filenode = pstrdup(optarg); break; diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 2f0e91fc2f9..c390ec51ce9 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -250,8 +250,6 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's NextXID: %u:%u\n"), EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid), XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); - printf(_("Latest checkpoint's NextRelFileNumber:%llu\n"), - (unsigned long long) ControlFile->checkPointCopy.nextRelFileNumber); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); printf(_("Latest checkpoint's NextMultiXactId: %u\n"), diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 9f78971cab5..bd9b066e4eb 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3184,15 +3184,15 @@ dumpDatabase(Archive *fout) atooid(PQgetvalue(lo_res, i, ii_oid))); oid = atooid(PQgetvalue(lo_res, i, ii_oid)); - relfilenumber = atorelnumber(PQgetvalue(lo_res, i, ii_relfilenode)); + relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode)); if (oid == LargeObjectRelationId) appendPQExpBuffer(loOutQry, - "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", + "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", relfilenumber); else if (oid == LargeObjectLOidPNIndexId) appendPQExpBuffer(loOutQry, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", relfilenumber); } @@ -4877,16 +4877,16 @@ binary_upgrade_set_pg_class_oids(Archive *fout, relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind")); - relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "relfilenode"))); + relfilenumber = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "relfilenode"))); toast_oid = atooid(PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "reltoastrelid"))); - toast_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_relfilenode"))); + toast_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "toast_relfilenode"))); toast_index_oid = atooid(PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "indexrelid"))); - toast_index_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_index_relfilenode"))); + toast_index_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "toast_index_relfilenode"))); appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n"); @@ -4904,7 +4904,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, */ if (RelFileNumberIsValid(relfilenumber) && relkind != RELKIND_PARTITIONED_TABLE) appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", + "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", relfilenumber); /* @@ -4918,7 +4918,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n", toast_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", + "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('%u'::pg_catalog.oid);\n", toast_relfilenumber); /* every toast table has an index */ @@ -4926,7 +4926,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", toast_index_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", toast_index_relfilenumber); } @@ -4939,7 +4939,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", pg_class_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", relfilenumber); } diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 197ec0eac91..269ed6446e6 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -538,7 +538,7 @@ isRelDataFile(const char *path) segNo = 0; matched = false; - nmatch = sscanf(path, "global/" UINT64_FORMAT ".%u", &rlocator.relNumber, &segNo); + nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo); if (nmatch == 1 || nmatch == 2) { rlocator.spcOid = GLOBALTABLESPACE_OID; @@ -547,7 +547,7 @@ isRelDataFile(const char *path) } else { - nmatch = sscanf(path, "base/%u/" UINT64_FORMAT ".%u", + nmatch = sscanf(path, "base/%u/%u.%u", &rlocator.dbOid, &rlocator.relNumber, &segNo); if (nmatch == 2 || nmatch == 3) { @@ -556,7 +556,7 @@ isRelDataFile(const char *path) } else { - nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/" UINT64_FORMAT ".%u", + nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u", &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber, &segNo); if (nmatch == 3 || nmatch == 4) diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 0c712a62669..f18cf971202 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -527,8 +527,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) relname = PQgetvalue(res, relnum, i_relname); curr->relname = pg_strdup(relname); - curr->relfilenumber = - atorelnumber(PQgetvalue(res, relnum, i_relfilenumber)); + curr->relfilenumber = atooid(PQgetvalue(res, relnum, i_relfilenumber)); curr->tblsp_alloc = false; /* Is the tablespace oid non-default? */ diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 7ab1bcc9c8d..115faa222e3 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -15,8 +15,10 @@ * oids are the same between old and new clusters. This is important * because toast oids are stored as toast pointers in user tables. * - * We control assignments of pg_class.relfilenode because we want the - * filenames to match between the old and new cluster. + * While pg_class.oid and pg_class.relfilenode are initially the same in a + * cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM FULL. We + * control assignments of pg_class.relfilenode because we want the filenames + * to match between the old and new cluster. * * We control assignment of pg_tablespace.oid because we want the oid to match * between the old and new cluster. diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index 529267d670a..c3f3d6bc0af 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -190,14 +190,14 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro else snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno); - snprintf(old_file, sizeof(old_file), "%s%s/%u/" UINT64_FORMAT "%s%s", + snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", map->old_tablespace, map->old_tablespace_suffix, map->db_oid, map->relfilenumber, type_suffix, extent_suffix); - snprintf(new_file, sizeof(new_file), "%s%s/%u/" UINT64_FORMAT "%s%s", + snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", map->new_tablespace, map->new_tablespace_suffix, map->db_oid, diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 6fdc7dcf529..9993378ca58 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -884,7 +884,7 @@ main(int argc, char **argv) } break; case 'R': - if (sscanf(optarg, "%u/%u/" UINT64_FORMAT, + if (sscanf(optarg, "%u/%u/%u", &config.filter_by_relation.spcOid, &config.filter_by_relation.dbOid, &config.filter_by_relation.relNumber) != 3 || diff --git a/src/bin/scripts/t/090_reindexdb.pl b/src/bin/scripts/t/090_reindexdb.pl index de5cee6fa08..e706d686e39 100644 --- a/src/bin/scripts/t/090_reindexdb.pl +++ b/src/bin/scripts/t/090_reindexdb.pl @@ -40,7 +40,7 @@ my $toast_index = $node->safe_psql('postgres', # REINDEX operations. A set of relfilenodes is saved from the catalogs # and then compared with pg_class. $node->safe_psql('postgres', - 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode int8);' + 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode oid);' ); # Save the relfilenode of a set of toast indexes, one from the catalog # pg_constraint and one from the test table. diff --git a/src/common/relpath.c b/src/common/relpath.c index d0d83e593b5..1b6b620ce83 100644 --- a/src/common/relpath.c +++ b/src/common/relpath.c @@ -149,10 +149,10 @@ GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber, Assert(dbOid == 0); Assert(backendId == InvalidBackendId); if (forkNumber != MAIN_FORKNUM) - path = psprintf("global/" UINT64_FORMAT "_%s", + path = psprintf("global/%u_%s", relNumber, forkNames[forkNumber]); else - path = psprintf("global/" UINT64_FORMAT, relNumber); + path = psprintf("global/%u", relNumber); } else if (spcOid == DEFAULTTABLESPACE_OID) { @@ -160,21 +160,21 @@ GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber, if (backendId == InvalidBackendId) { if (forkNumber != MAIN_FORKNUM) - path = psprintf("base/%u/" UINT64_FORMAT "_%s", + path = psprintf("base/%u/%u_%s", dbOid, relNumber, forkNames[forkNumber]); else - path = psprintf("base/%u/" UINT64_FORMAT, + path = psprintf("base/%u/%u", dbOid, relNumber); } else { if (forkNumber != MAIN_FORKNUM) - path = psprintf("base/%u/t%d_" UINT64_FORMAT "_%s", + path = psprintf("base/%u/t%d_%u_%s", dbOid, backendId, relNumber, forkNames[forkNumber]); else - path = psprintf("base/%u/t%d_" UINT64_FORMAT, + path = psprintf("base/%u/t%d_%u", dbOid, backendId, relNumber); } } @@ -184,24 +184,24 @@ GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber, if (backendId == InvalidBackendId) { if (forkNumber != MAIN_FORKNUM) - path = psprintf("pg_tblspc/%u/%s/%u/" UINT64_FORMAT "_%s", + path = psprintf("pg_tblspc/%u/%s/%u/%u_%s", spcOid, TABLESPACE_VERSION_DIRECTORY, dbOid, relNumber, forkNames[forkNumber]); else - path = psprintf("pg_tblspc/%u/%s/%u/" UINT64_FORMAT, + path = psprintf("pg_tblspc/%u/%s/%u/%u", spcOid, TABLESPACE_VERSION_DIRECTORY, dbOid, relNumber); } else { if (forkNumber != MAIN_FORKNUM) - path = psprintf("pg_tblspc/%u/%s/%u/t%d_" UINT64_FORMAT "_%s", + path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s", spcOid, TABLESPACE_VERSION_DIRECTORY, dbOid, backendId, relNumber, forkNames[forkNumber]); else - path = psprintf("pg_tblspc/%u/%s/%u/t%d_" UINT64_FORMAT, + path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u", spcOid, TABLESPACE_VERSION_DIRECTORY, dbOid, backendId, relNumber); } diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c index 32149d9be47..abea88154ca 100644 --- a/src/fe_utils/option_utils.c +++ b/src/fe_utils/option_utils.c @@ -13,7 +13,6 @@ #include "postgres_fe.h" #include "common/logging.h" -#include "common/relpath.h" #include "common/string.h" #include "fe_utils/option_utils.h" @@ -83,42 +82,3 @@ option_parse_int(const char *optarg, const char *optname, *result = val; return true; } - -/* - * option_parse_relfilenumber - * - * Parse relfilenumber value for an option. If the parsing is successful, - * returns true; if parsing fails, returns false. - */ -bool -option_parse_relfilenumber(const char *optarg, const char *optname) -{ - char *endptr; - uint64 val; - - errno = 0; - val = strtou64(optarg, &endptr, 10); - - /* - * Skip any trailing whitespace; if anything but whitespace remains before - * the terminating character, fail. - */ - while (*endptr != '\0' && isspace((unsigned char) *endptr)) - endptr++; - - if (*endptr != '\0') - { - pg_log_error("invalid value \"%s\" for option %s", - optarg, optname); - return false; - } - - if (val > MAX_RELFILENUMBER) - { - pg_log_error("%s must be in range " UINT64_FORMAT ".." UINT64_FORMAT, - optname, UINT64CONST(0), MAX_RELFILENUMBER); - return false; - } - - return true; -} diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 2aaad2b9d51..775471d2a7d 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -15,7 +15,6 @@ #define TRANSAM_H #include "access/xlogdefs.h" -#include "common/relpath.h" /* ---------------- @@ -197,33 +196,6 @@ FullTransactionIdAdvance(FullTransactionId *dest) #define FirstUnpinnedObjectId 12000 #define FirstNormalObjectId 16384 -/* ---------- - * RelFileNumbers are normally assigned sequentially beginning with - * FirstNormalRelFileNumber, but for system tables the initial RelFileNumber - * is equal to the table OID. This scheme allows pg_upgrade to work: we expect - * that the new cluster will contain only system tables, and that none of those - * will have previously been rewritten, so any RelFileNumber which is in use - * in both the old and new clusters will be used for the same relation in both - * places. - * - * This is important because pg_upgrade can't reactively move conflicting - * relations out of the way. If it tries to set the RelFileNumber for a - * relation to some value that's already in use by a different relation, the - * upgrade will just fail. It's OK if the same RelFileNumber is used for the - * same relation, though, since then nothing needs to be changed. - * ---------- - */ -#define FirstNormalRelFileNumber ((RelFileNumber) 100000) - -#define CHECK_RELFILENUMBER_RANGE(relfilenumber) \ -do { \ - if ((relfilenumber) < 0 || (relfilenumber) > MAX_RELFILENUMBER) \ - ereport(ERROR, \ - errcode(ERRCODE_INVALID_PARAMETER_VALUE), \ - errmsg("relfilenumber %llu is out of range", \ - (unsigned long long) (relfilenumber))); \ -} while (0) - /* * VariableCache is a data structure in shared memory that is used to track * OID and XID assignment state. For largely historical reasons, there is @@ -243,15 +215,6 @@ typedef struct VariableCacheData uint32 oidCount; /* OIDs available before must do XLOG work */ /* - * These fields are protected by RelFileNumberGenLock. - */ - RelFileNumber nextRelFileNumber; /* next relfilenumber to assign */ - RelFileNumber loggedRelFileNumber; /* last logged relfilenumber */ - RelFileNumber flushedRelFileNumber; /* last flushed relfilenumber */ - XLogRecPtr loggedRelFileNumberRecPtr; /* xlog record pointer w.r.t. - * loggedRelFileNumber */ - - /* * These fields are protected by XidGenLock. */ FullTransactionId nextXid; /* next XID to assign */ @@ -330,9 +293,6 @@ extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid); extern bool ForceTransactionIdLimitUpdate(void); extern Oid GetNewObjectId(void); -extern RelFileNumber GetNewRelFileNumber(Oid reltablespace, - char relpersistence); -extern void SetNextRelFileNumber(RelFileNumber relnumber); extern void StopGeneratingPinnedObjectIds(void); #ifdef USE_ASSERT_CHECKING diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 53375865dfd..dce265098e3 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -236,7 +236,6 @@ extern void CreateCheckPoint(int flags); extern bool CreateRestartPoint(int flags); extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); extern void XLogPutNextOid(Oid nextOid); -extern XLogRecPtr LogNextRelFileNumber(RelFileNumber nextrelnumber); extern XLogRecPtr XLogRestorePoint(const char *rpName); extern void UpdateFullPageWrites(void); extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p); diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index b45253045e7..e1c85f98550 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -38,5 +38,8 @@ extern bool IsPinnedObject(Oid classId, Oid objectId); extern Oid GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn); +extern RelFileNumber GetNewRelFileNumber(Oid reltablespace, + Relation pg_class, + char relpersistence); #endif /* CATALOG_H */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 8ba25e4dc8e..95e7c249ed8 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202209271 +#define CATALOG_VERSION_NO 202209261 #endif diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 4768e5ebda5..e1f4eefa220 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -34,13 +34,6 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* oid */ Oid oid; - /* access method; 0 if not a table / index */ - Oid relam BKI_DEFAULT(heap) BKI_LOOKUP_OPT(pg_am); - - /* identifier of physical storage file */ - /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */ - int64 relfilenode BKI_DEFAULT(0); - /* class name */ NameData relname; @@ -56,6 +49,13 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* class owner */ Oid relowner BKI_DEFAULT(POSTGRES) BKI_LOOKUP(pg_authid); + /* access method; 0 if not a table / index */ + Oid relam BKI_DEFAULT(heap) BKI_LOOKUP_OPT(pg_am); + + /* identifier of physical storage file */ + /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */ + Oid relfilenode BKI_DEFAULT(0); + /* identifier of table space for relation (0 means default for database) */ Oid reltablespace BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_tablespace); @@ -154,7 +154,7 @@ typedef FormData_pg_class *Form_pg_class; DECLARE_UNIQUE_INDEX_PKEY(pg_class_oid_index, 2662, ClassOidIndexId, on pg_class using btree(oid oid_ops)); DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, ClassNameNspIndexId, on pg_class using btree(relname name_ops, relnamespace oid_ops)); -DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode int8_ops)); +DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops)); #ifdef EXPOSE_TO_CLIENT_CODE diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 096222f1fe5..06368e23667 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -41,7 +41,6 @@ typedef struct CheckPoint * timeline (equals ThisTimeLineID otherwise) */ bool fullPageWrites; /* current full_page_writes */ FullTransactionId nextXid; /* next free transaction ID */ - RelFileNumber nextRelFileNumber; /* next relfilenumber */ Oid nextOid; /* next free OID */ MultiXactId nextMulti; /* next free MultiXactId */ MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ @@ -79,7 +78,6 @@ typedef struct CheckPoint #define XLOG_FPI 0xB0 /* 0xC0 is used in Postgres 9.5-11 */ #define XLOG_OVERWRITE_CONTRECORD 0xD0 -#define XLOG_NEXT_RELFILENUMBER 0xE0 /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8b72f8a215b..a07e737a337 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -7329,11 +7329,11 @@ proname => 'pg_indexes_size', provolatile => 'v', prorettype => 'int8', proargtypes => 'regclass', prosrc => 'pg_indexes_size' }, { oid => '2999', descr => 'filenode identifier of relation', - proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'int8', + proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'oid', proargtypes => 'regclass', prosrc => 'pg_relation_filenode' }, { oid => '3454', descr => 'relation OID for filenode and tablespace', proname => 'pg_filenode_relation', provolatile => 's', - prorettype => 'regclass', proargtypes => 'oid int8', + prorettype => 'regclass', proargtypes => 'oid oid', prosrc => 'pg_filenode_relation' }, { oid => '3034', descr => 'file path of relation', proname => 'pg_relation_filepath', provolatile => 's', prorettype => 'text', @@ -11125,15 +11125,15 @@ prosrc => 'binary_upgrade_set_missing_value' }, { oid => '4545', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_heap_relfilenode', provolatile => 'v', - proparallel => 'u', prorettype => 'void', proargtypes => 'int8', + proparallel => 'u', prorettype => 'void', proargtypes => 'oid', prosrc => 'binary_upgrade_set_next_heap_relfilenode' }, { oid => '4546', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_index_relfilenode', provolatile => 'v', - proparallel => 'u', prorettype => 'void', proargtypes => 'int8', + proparallel => 'u', prorettype => 'void', proargtypes => 'oid', prosrc => 'binary_upgrade_set_next_index_relfilenode' }, { oid => '4547', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_toast_relfilenode', provolatile => 'v', - proparallel => 'u', prorettype => 'void', proargtypes => 'int8', + proparallel => 'u', prorettype => 'void', proargtypes => 'oid', prosrc => 'binary_upgrade_set_next_toast_relfilenode' }, { oid => '4548', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_pg_tablespace_oid', provolatile => 'v', diff --git a/src/include/common/relpath.h b/src/include/common/relpath.h index 2d3b52fe0b8..4bbd94393c8 100644 --- a/src/include/common/relpath.h +++ b/src/include/common/relpath.h @@ -22,12 +22,10 @@ /* * RelFileNumber data type identifies the specific relation file name. */ -typedef uint64 RelFileNumber; -#define InvalidRelFileNumber ((RelFileNumber) 0) +typedef Oid RelFileNumber; +#define InvalidRelFileNumber ((RelFileNumber) InvalidOid) #define RelFileNumberIsValid(relnumber) \ ((bool) ((relnumber) != InvalidRelFileNumber)) -#define atorelnumber(x) ((RelFileNumber) strtou64((x), NULL, 10)) -#define MAX_RELFILENUMBER UINT64CONST(0x00FFFFFFFFFFFFFF) /* * Name of major-version-specific tablespace subdirectories @@ -37,7 +35,6 @@ typedef uint64 RelFileNumber; /* Characters to allow for an OID in a relation path */ #define OIDCHARS 10 /* max chars printed by %u */ -#define RELNUMBERCHARS 20 /* max chars printed by UINT64_FORMAT */ /* * Stuff for fork names. diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h index 2508a6193b0..03c09fd13a4 100644 --- a/src/include/fe_utils/option_utils.h +++ b/src/include/fe_utils/option_utils.h @@ -22,7 +22,5 @@ extern void handle_help_version_opts(int argc, char *argv[], extern bool option_parse_int(const char *optarg, const char *optname, int min_range, int max_range, int *result); -extern bool option_parse_relfilenumber(const char *optarg, - const char *optname); #endif /* OPTION_UTILS_H */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index d4dc9eb3429..406db6be783 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -92,66 +92,29 @@ typedef struct buftag { Oid spcOid; /* tablespace oid */ Oid dbOid; /* database oid */ - - /* - * relForkDetails[] stores the fork number in the high 8 bits of the first - * integer; the remaining 56 bits are used to store the relfilenmber. - * Expanding the relfilenumber to a full 64 bits would require widening - * the BufferTag, which is undesirable for performance reasons. We use - * two 32-bit values here rather than a single 64-bit value to avoid - * padding the struct out to a multiple of 8 bytes. - */ - uint32 relForkDetails[2]; + RelFileNumber relNumber; /* relation file number */ + ForkNumber forkNum; /* fork number */ BlockNumber blockNum; /* blknum relative to begin of reln */ } BufferTag; -/* High relNumber bits in relForkDetails[0] */ -#define BUFTAG_RELNUM_HIGH_BITS 24 - -/* Low relNumber bits in relForkDetails[1] */ -#define BUFTAG_RELNUM_LOW_BITS 32 - -/* Mask to fetch high bits of relNumber from relForkDetails[0] */ -#define BUFTAG_RELNUM_HIGH_MASK ((1U << BUFTAG_RELNUM_HIGH_BITS) - 1) - -/* Mask to fetch low bits of relNumber from relForkDetails[1] */ -#define BUFTAG_RELNUM_LOW_MASK 0XFFFFFFFF - static inline RelFileNumber BufTagGetRelNumber(const BufferTag *tag) { - uint64 relnum; - - relnum = ((uint64) tag->relForkDetails[0]) & BUFTAG_RELNUM_HIGH_MASK; - relnum = (relnum << BUFTAG_RELNUM_LOW_BITS) | tag->relForkDetails[1]; - - Assert(relnum <= MAX_RELFILENUMBER); - return (RelFileNumber) relnum; + return tag->relNumber; } static inline ForkNumber BufTagGetForkNum(const BufferTag *tag) { - ForkNumber ret; - - StaticAssertStmt(MAX_FORKNUM <= INT8_MAX, - "MAX_FORKNUM can't be greater than INT8_MAX"); - - ret = (ForkNumber) (tag->relForkDetails[0] >> BUFTAG_RELNUM_HIGH_BITS); - return ret; + return tag->forkNum; } static inline void BufTagSetRelForkDetails(BufferTag *tag, RelFileNumber relnumber, ForkNumber forknum) { - Assert(relnumber <= MAX_RELFILENUMBER); - Assert(forknum <= MAX_FORKNUM); - - tag->relForkDetails[0] = (relnumber >> BUFTAG_RELNUM_LOW_BITS) & - BUFTAG_RELNUM_HIGH_MASK; - tag->relForkDetails[0] |= (forknum << BUFTAG_RELNUM_HIGH_BITS); - tag->relForkDetails[1] = relnumber & BUFTAG_RELNUM_LOW_MASK; + tag->relNumber = relnumber; + tag->forkNum = forknum; } static inline RelFileLocator @@ -190,9 +153,9 @@ BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2) { return (tag1->spcOid == tag2->spcOid) && (tag1->dbOid == tag2->dbOid) && - (tag1->relForkDetails[0] == tag2->relForkDetails[0]) && - (tag1->relForkDetails[1] == tag2->relForkDetails[1]) && - (tag1->blockNum == tag2->blockNum); + (tag1->relNumber == tag2->relNumber) && + (tag1->blockNum == tag2->blockNum) && + (tag1->forkNum == tag2->forkNum); } static inline bool diff --git a/src/include/storage/relfilelocator.h b/src/include/storage/relfilelocator.h index ef904644fa4..10f41f3abb3 100644 --- a/src/include/storage/relfilelocator.h +++ b/src/include/storage/relfilelocator.h @@ -32,11 +32,10 @@ * Nonzero dbOid values correspond to pg_database.oid. * * relNumber identifies the specific relation. relNumber corresponds to - * pg_class.relfilenode. Notice that relNumber values are assigned by - * GetNewRelFileNumber(), which will only ever assign the same value once - * during the lifetime of a cluster. However, since CREATE DATABASE duplicates - * the relfilenumbers of the template database, the values are in practice only - * unique within a database, not globally. + * pg_class.relfilenode (NOT pg_class.oid, because we need to be able + * to assign new physical files to relations in some situations). + * Notice that relNumber is only unique within a database in a particular + * tablespace. * * Note: spcOid must be GLOBALTABLESPACE_OID if and only if dbOid is * zero. We support shared relations only in the "global" tablespace. @@ -76,9 +75,6 @@ typedef struct RelFileLocatorBackend BackendId backend; } RelFileLocatorBackend; -#define SizeOfRelFileLocatorBackend \ - (offsetof(RelFileLocatorBackend, backend) + sizeof(BackendId)) - #define RelFileLocatorBackendIsTemp(rlocator) \ ((rlocator).backend != InvalidBackendId) diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 4a267be935c..aca0347a3d3 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -86,14 +86,11 @@ typedef struct typedef struct { - /* note: field layout chosen to pack into 20 bytes */ + /* note: field layout chosen to pack into 16 bytes */ int8 id; /* type field --- must be first */ int8 backend_hi; /* high bits of backend ID, if temprel */ uint16 backend_lo; /* low bits of backend ID, if temprel */ - Oid dbOid; - Oid spcOid; - uint32 relNumber_hi; /* avoid 8 byte alignment requirement */ - uint32 relNumber_lo; + RelFileLocator rlocator; /* spcOid, dbOid, relNumber */ } SharedInvalSmgrMsg; #define SHAREDINVALRELMAP_ID (-4) diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 86666b83ae5..346f594ad02 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -2164,8 +2164,9 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' + when c.oid then 'own' when oldfilenode then 'orig' - else 'new' + else 'OTHER' end as storage, obj_description(c.oid, 'pg_class') as desc from pg_class c left join old_oids using (relname) @@ -2174,10 +2175,10 @@ select relname, relname | orig_oid | storage | desc ------------------------------+----------+---------+--------------- at_partitioned | t | none | - at_partitioned_0 | t | orig | - at_partitioned_0_id_name_key | t | orig | child 0 index - at_partitioned_1 | t | orig | - at_partitioned_1_id_name_key | t | orig | child 1 index + at_partitioned_0 | t | own | + at_partitioned_0_id_name_key | t | own | child 0 index + at_partitioned_1 | t | own | + at_partitioned_1_id_name_key | t | own | child 1 index at_partitioned_id_name_key | t | none | parent index (6 rows) @@ -2197,8 +2198,9 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' + when c.oid then 'own' when oldfilenode then 'orig' - else 'new' + else 'OTHER' end as storage, obj_description(c.oid, 'pg_class') as desc from pg_class c left join old_oids using (relname) @@ -2207,10 +2209,10 @@ select relname, relname | orig_oid | storage | desc ------------------------------+----------+---------+-------------- at_partitioned | t | none | - at_partitioned_0 | t | orig | - at_partitioned_0_id_name_key | f | new | parent index - at_partitioned_1 | t | orig | - at_partitioned_1_id_name_key | f | new | parent index + at_partitioned_0 | t | own | + at_partitioned_0_id_name_key | f | own | parent index + at_partitioned_1 | t | own | + at_partitioned_1_id_name_key | f | own | parent index at_partitioned_id_name_key | f | none | parent index (6 rows) @@ -2558,7 +2560,7 @@ CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text) RETURNS boolean LANGUAGE plpgsql AS $$ DECLARE - v_relfilenode int8; + v_relfilenode oid; BEGIN v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; diff --git a/src/test/regress/expected/fast_default.out b/src/test/regress/expected/fast_default.out index 0a35f333f63..91f25717b5a 100644 --- a/src/test/regress/expected/fast_default.out +++ b/src/test/regress/expected/fast_default.out @@ -3,8 +3,8 @@ -- SET search_path = fast_default; CREATE SCHEMA fast_default; -CREATE TABLE m(id BIGINT); -INSERT INTO m VALUES (NULL::BIGINT); +CREATE TABLE m(id OID); +INSERT INTO m VALUES (NULL::OID); CREATE FUNCTION set(tabname name) RETURNS VOID AS $$ BEGIN diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out index af57470f93e..215eb899be3 100644 --- a/src/test/regress/expected/oidjoins.out +++ b/src/test/regress/expected/oidjoins.out @@ -74,11 +74,11 @@ NOTICE: checking pg_type {typcollation} => pg_collation {oid} NOTICE: checking pg_attribute {attrelid} => pg_class {oid} NOTICE: checking pg_attribute {atttypid} => pg_type {oid} NOTICE: checking pg_attribute {attcollation} => pg_collation {oid} -NOTICE: checking pg_class {relam} => pg_am {oid} NOTICE: checking pg_class {relnamespace} => pg_namespace {oid} NOTICE: checking pg_class {reltype} => pg_type {oid} NOTICE: checking pg_class {reloftype} => pg_type {oid} NOTICE: checking pg_class {relowner} => pg_authid {oid} +NOTICE: checking pg_class {relam} => pg_am {oid} NOTICE: checking pg_class {reltablespace} => pg_tablespace {oid} NOTICE: checking pg_class {reltoastrelid} => pg_class {oid} NOTICE: checking pg_class {relrewrite} => pg_class {oid} diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index a67eb5f982a..9f773aeeb94 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -1478,8 +1478,9 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' + when c.oid then 'own' when oldfilenode then 'orig' - else 'new' + else 'OTHER' end as storage, obj_description(c.oid, 'pg_class') as desc from pg_class c left join old_oids using (relname) @@ -1498,8 +1499,9 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' + when c.oid then 'own' when oldfilenode then 'orig' - else 'new' + else 'OTHER' end as storage, obj_description(c.oid, 'pg_class') as desc from pg_class c left join old_oids using (relname) @@ -1639,7 +1641,7 @@ CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text) RETURNS boolean LANGUAGE plpgsql AS $$ DECLARE - v_relfilenode int8; + v_relfilenode oid; BEGIN v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; diff --git a/src/test/regress/sql/fast_default.sql b/src/test/regress/sql/fast_default.sql index 819ec40fdaf..16a3b7ca51d 100644 --- a/src/test/regress/sql/fast_default.sql +++ b/src/test/regress/sql/fast_default.sql @@ -4,8 +4,8 @@ SET search_path = fast_default; CREATE SCHEMA fast_default; -CREATE TABLE m(id BIGINT); -INSERT INTO m VALUES (NULL::BIGINT); +CREATE TABLE m(id OID); +INSERT INTO m VALUES (NULL::OID); CREATE FUNCTION set(tabname name) RETURNS VOID AS $$ |