aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/dbcommands.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2022-07-28 08:40:06 +0200
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2022-07-28 08:40:06 +0200
commit9e4f914b5eba3f49ab99bdecdc4f96fac099571f (patch)
tree37d644b03c8d3284e5914cbb3ccbdd6dd664e2f4 /src/backend/commands/dbcommands.c
parentd396606ebe9722baceabb156551169d83c85b8c8 (diff)
downloadpostgresql-9e4f914b5eba3f49ab99bdecdc4f96fac099571f.tar.gz
postgresql-9e4f914b5eba3f49ab99bdecdc4f96fac099571f.zip
Fix replay of create database records on standby
Crash recovery on standby may encounter missing directories when replaying database-creation WAL records. Prior to this patch, the standby would fail to recover in such a case; however, the directories could be legitimately missing. Consider the following sequence of commands: CREATE DATABASE DROP DATABASE DROP TABLESPACE If, after replaying the last WAL record and removing the tablespace directory, the standby crashes and has to replay the create database record again, crash recovery must be able to continue. A fix for this problem was already attempted in 49d9cfc68bf4, but it was reverted because of design issues. This new version is based on Robert Haas' proposal: any missing tablespaces are created during recovery before reaching consistency. Tablespaces are created as real directories, and should be deleted by later replay. CheckRecoveryConsistency ensures they have disappeared. The problems detected by this new code are reported as PANIC, except when allow_in_place_tablespaces is set to ON, in which case they are WARNING. Apart from making tests possible, this gives users an escape hatch in case things don't go as planned. Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Author: Asim R Praveen <apraveen@pivotal.io> Author: Paul Guo <paulguo@gmail.com> Reviewed-by: Anastasia Lubennikova <lubennikovaav@gmail.com> (older versions) Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com> (older versions) Reviewed-by: Michaƫl Paquier <michael@paquier.xyz> Diagnosed-by: Paul Guo <paulguo@gmail.com> Discussion: https://postgr.es/m/CAEET0ZGx9AvioViLf7nbR_8tH9-=27DN5xWJ2P9-ROH16e4JUA@mail.gmail.com
Diffstat (limited to 'src/backend/commands/dbcommands.c')
-rw-r--r--src/backend/commands/dbcommands.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 099d369b2f4..95844bbb691 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -30,6 +30,7 @@
#include "access/tableam.h"
#include "access/xact.h"
#include "access/xloginsert.h"
+#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
@@ -47,6 +48,7 @@
#include "commands/defrem.h"
#include "commands/seclabel.h"
#include "commands/tablespace.h"
+#include "common/file_perm.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
@@ -62,6 +64,7 @@
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
+#include "utils/guc.h"
#include "utils/pg_locale.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
@@ -135,6 +138,7 @@ static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
bool isRedo);
static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid,
Oid dst_tsid);
+static void recovery_create_dbdir(char *path, bool only_tblspc);
/*
* Create a new database using the WAL_LOG strategy.
@@ -2996,6 +3000,45 @@ get_database_name(Oid dbid)
}
/*
+ * recovery_create_dbdir()
+ *
+ * During recovery, there's a case where we validly need to recover a missing
+ * tablespace directory so that recovery can continue. This happens when
+ * recovery wants to create a database but the holding tablespace has been
+ * removed before the server stopped. Since we expect that the directory will
+ * be gone before reaching recovery consistency, and we have no knowledge about
+ * the tablespace other than its OID here, we create a real directory under
+ * pg_tblspc here instead of restoring the symlink.
+ *
+ * If only_tblspc is true, then the requested directory must be in pg_tblspc/
+ */
+static void
+recovery_create_dbdir(char *path, bool only_tblspc)
+{
+ struct stat st;
+
+ Assert(RecoveryInProgress());
+
+ if (stat(path, &st) == 0)
+ return;
+
+ if (only_tblspc && strstr(path, "pg_tblspc/") == NULL)
+ elog(PANIC, "requested to created invalid directory: %s", path);
+
+ if (reachedConsistency && !allow_in_place_tablespaces)
+ ereport(PANIC,
+ errmsg("missing directory \"%s\"", path));
+
+ elog(reachedConsistency ? WARNING : DEBUG1,
+ "creating missing directory: %s", path);
+
+ if (pg_mkdir_p(path, pg_dir_create_mode) != 0)
+ ereport(PANIC,
+ errmsg("could not create missing directory \"%s\": %m", path));
+}
+
+
+/*
* DATABASE resource manager's routines
*/
void
@@ -3012,6 +3055,7 @@ dbase_redo(XLogReaderState *record)
(xl_dbase_create_file_copy_rec *) XLogRecGetData(record);
char *src_path;
char *dst_path;
+ char *parent_path;
struct stat st;
src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
@@ -3032,6 +3076,33 @@ dbase_redo(XLogReaderState *record)
}
/*
+ * If the parent of the target path doesn't exist, create it now. This
+ * enables us to create the target underneath later.
+ */
+ parent_path = pstrdup(dst_path);
+ get_parent_directory(parent_path);
+ if (stat(parent_path, &st) < 0)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ errmsg("could not stat directory \"%s\": %m",
+ dst_path));
+
+ /* create the parent directory if needed and valid */
+ recovery_create_dbdir(parent_path, true);
+ }
+ pfree(parent_path);
+
+ /*
+ * There's a case where the copy source directory is missing for the
+ * same reason above. Create the emtpy source directory so that
+ * copydir below doesn't fail. The directory will be dropped soon by
+ * recovery.
+ */
+ if (stat(src_path, &st) < 0 && errno == ENOENT)
+ recovery_create_dbdir(src_path, false);
+
+ /*
* Force dirty buffers out to disk, to ensure source database is
* up-to-date for the copy.
*/
@@ -3055,9 +3126,15 @@ dbase_redo(XLogReaderState *record)
xl_dbase_create_wal_log_rec *xlrec =
(xl_dbase_create_wal_log_rec *) XLogRecGetData(record);
char *dbpath;
+ char *parent_path;
dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+ /* create the parent directory if needed and valid */
+ parent_path = pstrdup(dbpath);
+ get_parent_directory(parent_path);
+ recovery_create_dbdir(parent_path, true);
+
/* Create the database directory with the version file. */
CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id,
true);