diff options
author | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2022-07-28 08:40:06 +0200 |
---|---|---|
committer | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2022-07-28 08:40:06 +0200 |
commit | 9e4f914b5eba3f49ab99bdecdc4f96fac099571f (patch) | |
tree | 37d644b03c8d3284e5914cbb3ccbdd6dd664e2f4 /src/backend/commands/dbcommands.c | |
parent | d396606ebe9722baceabb156551169d83c85b8c8 (diff) | |
download | postgresql-9e4f914b5eba3f49ab99bdecdc4f96fac099571f.tar.gz postgresql-9e4f914b5eba3f49ab99bdecdc4f96fac099571f.zip |
Fix replay of create database records on standby
Crash recovery on standby may encounter missing directories
when replaying database-creation WAL records. Prior to this
patch, the standby would fail to recover in such a case;
however, the directories could be legitimately missing.
Consider the following sequence of commands:
CREATE DATABASE
DROP DATABASE
DROP TABLESPACE
If, after replaying the last WAL record and removing the
tablespace directory, the standby crashes and has to replay the
create database record again, crash recovery must be able to continue.
A fix for this problem was already attempted in 49d9cfc68bf4, but it
was reverted because of design issues. This new version is based
on Robert Haas' proposal: any missing tablespaces are created
during recovery before reaching consistency. Tablespaces
are created as real directories, and should be deleted
by later replay. CheckRecoveryConsistency ensures
they have disappeared.
The problems detected by this new code are reported as PANIC,
except when allow_in_place_tablespaces is set to ON, in which
case they are WARNING. Apart from making tests possible, this
gives users an escape hatch in case things don't go as planned.
Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Author: Asim R Praveen <apraveen@pivotal.io>
Author: Paul Guo <paulguo@gmail.com>
Reviewed-by: Anastasia Lubennikova <lubennikovaav@gmail.com> (older versions)
Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com> (older versions)
Reviewed-by: Michaƫl Paquier <michael@paquier.xyz>
Diagnosed-by: Paul Guo <paulguo@gmail.com>
Discussion: https://postgr.es/m/CAEET0ZGx9AvioViLf7nbR_8tH9-=27DN5xWJ2P9-ROH16e4JUA@mail.gmail.com
Diffstat (limited to 'src/backend/commands/dbcommands.c')
-rw-r--r-- | src/backend/commands/dbcommands.c | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 099d369b2f4..95844bbb691 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -30,6 +30,7 @@ #include "access/tableam.h" #include "access/xact.h" #include "access/xloginsert.h" +#include "access/xlogrecovery.h" #include "access/xlogutils.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -47,6 +48,7 @@ #include "commands/defrem.h" #include "commands/seclabel.h" #include "commands/tablespace.h" +#include "common/file_perm.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "pgstat.h" @@ -62,6 +64,7 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/guc.h" #include "utils/pg_locale.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" @@ -135,6 +138,7 @@ static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo); static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid, Oid dst_tsid); +static void recovery_create_dbdir(char *path, bool only_tblspc); /* * Create a new database using the WAL_LOG strategy. @@ -2996,6 +3000,45 @@ get_database_name(Oid dbid) } /* + * recovery_create_dbdir() + * + * During recovery, there's a case where we validly need to recover a missing + * tablespace directory so that recovery can continue. This happens when + * recovery wants to create a database but the holding tablespace has been + * removed before the server stopped. Since we expect that the directory will + * be gone before reaching recovery consistency, and we have no knowledge about + * the tablespace other than its OID here, we create a real directory under + * pg_tblspc here instead of restoring the symlink. + * + * If only_tblspc is true, then the requested directory must be in pg_tblspc/ + */ +static void +recovery_create_dbdir(char *path, bool only_tblspc) +{ + struct stat st; + + Assert(RecoveryInProgress()); + + if (stat(path, &st) == 0) + return; + + if (only_tblspc && strstr(path, "pg_tblspc/") == NULL) + elog(PANIC, "requested to created invalid directory: %s", path); + + if (reachedConsistency && !allow_in_place_tablespaces) + ereport(PANIC, + errmsg("missing directory \"%s\"", path)); + + elog(reachedConsistency ? WARNING : DEBUG1, + "creating missing directory: %s", path); + + if (pg_mkdir_p(path, pg_dir_create_mode) != 0) + ereport(PANIC, + errmsg("could not create missing directory \"%s\": %m", path)); +} + + +/* * DATABASE resource manager's routines */ void @@ -3012,6 +3055,7 @@ dbase_redo(XLogReaderState *record) (xl_dbase_create_file_copy_rec *) XLogRecGetData(record); char *src_path; char *dst_path; + char *parent_path; struct stat st; src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id); @@ -3032,6 +3076,33 @@ dbase_redo(XLogReaderState *record) } /* + * If the parent of the target path doesn't exist, create it now. This + * enables us to create the target underneath later. + */ + parent_path = pstrdup(dst_path); + get_parent_directory(parent_path); + if (stat(parent_path, &st) < 0) + { + if (errno != ENOENT) + ereport(FATAL, + errmsg("could not stat directory \"%s\": %m", + dst_path)); + + /* create the parent directory if needed and valid */ + recovery_create_dbdir(parent_path, true); + } + pfree(parent_path); + + /* + * There's a case where the copy source directory is missing for the + * same reason above. Create the emtpy source directory so that + * copydir below doesn't fail. The directory will be dropped soon by + * recovery. + */ + if (stat(src_path, &st) < 0 && errno == ENOENT) + recovery_create_dbdir(src_path, false); + + /* * Force dirty buffers out to disk, to ensure source database is * up-to-date for the copy. */ @@ -3055,9 +3126,15 @@ dbase_redo(XLogReaderState *record) xl_dbase_create_wal_log_rec *xlrec = (xl_dbase_create_wal_log_rec *) XLogRecGetData(record); char *dbpath; + char *parent_path; dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id); + /* create the parent directory if needed and valid */ + parent_path = pstrdup(dbpath); + get_parent_directory(parent_path); + recovery_create_dbdir(parent_path, true); + /* Create the database directory with the version file. */ CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id, true); |