From 9c08aea6a3090a396be334cc58c511edab05776a Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 29 Mar 2022 11:31:43 -0400 Subject: Add new block-by-block strategy for CREATE DATABASE. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because this strategy logs changes on a block-by-block basis, it avoids the need to checkpoint before and after the operation. However, because it logs each changed block individually, it might generate a lot of extra write-ahead logging if the template database is large. Therefore, the older strategy remains available via a new STRATEGY parameter to CREATE DATABASE, and a corresponding --strategy option to createdb. Somewhat controversially, this patch assembles the list of relations to be copied to the new database by reading the pg_class relation of the template database. Cross-database access like this isn't normally possible, but it can be made to work here because there can't be any connections to the database being copied, nor can it contain any in-doubt transactions. Even so, we have to use lower-level interfaces than normal, since the table scan and relcache interfaces will not work for a database to which we're not connected. The advantage of this approach is that we do not need to rely on the filesystem to determine what ought to be copied, but instead on PostgreSQL's own knowledge of the database structure. This avoids, for example, copying stray files that happen to be located in the source database directory. Dilip Kumar, with a fairly large number of cosmetic changes by me. Reviewed and tested by Ashutosh Sharma, Andres Freund, John Naylor, Greg Nancarrow, Neha Sharma. Additional feedback from Bruce Momjian, Heikki Linnakangas, Julien Rouhaud, Adam Brusselback, Kyotaro Horiguchi, Tomas Vondra, Andrew Dunstan, Álvaro Herrera, and others. Discussion: http://postgr.es/m/CA+TgmoYtcdxBjLh31DLxUXHxFVMPGzrU5_T=CYCvRyFHywSBUQ@mail.gmail.com --- src/backend/utils/cache/relcache.c | 2 +- src/backend/utils/cache/relmapper.c | 64 +++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'src/backend/utils/cache') diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index d47fac7bb98..a15ce9edb13 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3746,7 +3746,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence) /* handle these directly, at least for now */ SMgrRelation srel; - srel = RelationCreateStorage(newrnode, persistence); + srel = RelationCreateStorage(newrnode, persistence, true); smgrclose(srel); } else diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 4d0718f0018..dee3387d026 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -251,6 +251,63 @@ RelationMapFilenodeToOid(Oid filenode, bool shared) return InvalidOid; } +/* + * RelationMapOidToFilenodeForDatabase + * + * Like RelationMapOidToFilenode, but reads the mapping from the indicated + * path instead of using the one for the current database. + */ +Oid +RelationMapOidToFilenodeForDatabase(char *dbpath, Oid relationId) +{ + RelMapFile map; + int i; + + /* Read the relmap file from the source database. */ + read_relmap_file(&map, dbpath, false, ERROR); + + /* Iterate over the relmap entries to find the input relation OID. */ + for (i = 0; i < map.num_mappings; i++) + { + if (relationId == map.mappings[i].mapoid) + return map.mappings[i].mapfilenode; + } + + return InvalidOid; +} + +/* + * RelationMapCopy + * + * Copy relmapfile from source db path to the destination db path and WAL log + * the operation. This is intended for use in creating a new relmap file + * for a database that doesn't have one yet, not for replacing an existing + * relmap file. + */ +void +RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath) +{ + RelMapFile map; + + /* + * Read the relmap file from the source database. + */ + read_relmap_file(&map, srcdbpath, false, ERROR); + + /* + * Write the same data into the destination database's relmap file. + * + * No sinval is needed because no one can be connected to the destination + * database yet. For the same reason, there is no need to acquire + * RelationMappingLock. + * + * There's no point in trying to preserve files here. The new database + * isn't usable yet anyway, and won't ever be if we can't install a + * relmap file. + */ + write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath); +} + /* * RelationMapUpdateMap * @@ -1031,6 +1088,13 @@ relmap_redo(XLogReaderState *record) * * There shouldn't be anyone else updating relmaps during WAL replay, * but grab the lock to interlock against load_relmap_file(). + * + * Note that we use the same WAL record for updating the relmap of + * an existing database as we do for creating a new database. In + * the latter case, taking the relmap log and sending sinval messages + * is unnecessary, but harmless. If we wanted to avoid it, we could + * add a flag to the WAL record to indicate which opration is being + * performed. */ LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE); write_relmap_file(&newmap, false, true, false, -- cgit v1.2.3