diff options
author | Noah Misch <noah@leadboat.com> | 2024-02-01 13:44:19 -0800 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2024-02-01 13:44:22 -0800 |
commit | 6d423e9ff95ac2e62655faa97633155d08dfdd64 (patch) | |
tree | f681a52c9638712a677716c1d3433fc1e141e82c /src/backend | |
parent | b4fb76fb5283dcd76de0e5b9ef8c4588ef8ee619 (diff) | |
download | postgresql-6d423e9ff95ac2e62655faa97633155d08dfdd64.tar.gz postgresql-6d423e9ff95ac2e62655faa97633155d08dfdd64.zip |
Handle interleavings between CREATE DATABASE steps and base backup.
Restoring a base backup taken in the middle of CreateDirAndVersionFile()
or write_relmap_file() would lose the function's effects. The symptom
was absence of the database directory, PG_VERSION file, or
pg_filenode.map. If missing the directory, recovery would fail. Either
missing file would not fail recovery but would render the new database
unusable. Fix CreateDirAndVersionFile() with the transam/README "action
first and then write a WAL entry" strategy. That has a side benefit of
moving filesystem mutations out of a critical section, reducing the ways
to PANIC. Fix the write_relmap_file() call with a lock acquisition, so
it interacts with checkpoints like non-CREATE DATABASE calls do.
Back-patch to v15, where commit 9c08aea6a3090a396be334cc58c511edab05776a
introduced STRATEGY=WAL_LOG and made it the default.
Discussion: https://postgr.es/m/20240130195003.0a.nmisch@google.com
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/commands/dbcommands.c | 44 | ||||
-rw-r--r-- | src/backend/utils/cache/relmapper.c | 16 |
2 files changed, 32 insertions, 28 deletions
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 307729ab7ef..18a5868567b 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -462,35 +462,12 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) char buf[16]; /* - * Prepare version data before starting a critical section. - * - * Note that we don't have to copy this from the source database; there's - * only one legal value. + * Note that we don't have to copy version data from the source database; + * there's only one legal value. */ sprintf(buf, "%s\n", PG_MAJORVERSION); nbytes = strlen(PG_MAJORVERSION) + 1; - /* If we are not in WAL replay then write the WAL. */ - if (!isRedo) - { - xl_dbase_create_wal_log_rec xlrec; - XLogRecPtr lsn; - - START_CRIT_SECTION(); - - xlrec.db_id = dbid; - xlrec.tablespace_id = tsid; - - XLogBeginInsert(); - XLogRegisterData((char *) (&xlrec), - sizeof(xl_dbase_create_wal_log_rec)); - - lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG); - - /* As always, WAL must hit the disk before the data update does. */ - XLogFlush(lsn); - } - /* Create database directory. */ if (MakePGDirectory(dbpath) < 0) { @@ -534,9 +511,24 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) /* Close the version file. */ CloseTransientFile(fd); - /* Critical section done. */ + /* If we are not in WAL replay then write the WAL. */ if (!isRedo) + { + xl_dbase_create_wal_log_rec xlrec; + + START_CRIT_SECTION(); + + xlrec.db_id = dbid; + xlrec.tablespace_id = tsid; + + XLogBeginInsert(); + XLogRegisterData((char *) (&xlrec), + sizeof(xl_dbase_create_wal_log_rec)); + + (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG); + END_CRIT_SECTION(); + } } /* diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 26575cae6c9..6790126277e 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -303,14 +303,15 @@ RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath) * Write the same data into the destination database's relmap file. * * No sinval is needed because no one can be connected to the destination - * database yet. For the same reason, there is no need to acquire - * RelationMappingLock. + * database yet. * * There's no point in trying to preserve files here. The new database * isn't usable yet anyway, and won't ever be if we can't install a relmap * file. */ + LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE); write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath); + LWLockRelease(RelationMappingLock); } /* @@ -633,10 +634,12 @@ RelationMapFinishBootstrap(void) Assert(pending_local_updates.num_mappings == 0); /* Write the files; no WAL or sinval needed */ + LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE); write_relmap_file(&shared_map, false, false, false, InvalidOid, GLOBALTABLESPACE_OID, "global"); write_relmap_file(&local_map, false, false, false, MyDatabaseId, MyDatabaseTableSpace, DatabasePath); + LWLockRelease(RelationMappingLock); } /* @@ -892,6 +895,15 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval, char maptempfilename[MAXPGPATH]; /* + * Even without concurrent use of this map, CheckPointRelationMap() relies + * on this locking. Without it, a restore of a base backup taken after + * this function's XLogInsert() and before its durable_rename() would not + * have the changes. wal_level=minimal doesn't need the lock, but this + * isn't performance-critical enough for such a micro-optimization. + */ + Assert(LWLockHeldByMeInMode(RelationMappingLock, LW_EXCLUSIVE)); + + /* * Fill in the overhead fields and update CRC. */ newmap->magic = RELMAPPER_FILEMAGIC; |