aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/transam/xlogrecovery.c50
-rw-r--r--src/backend/commands/dbcommands.c77
-rw-r--r--src/backend/commands/tablespace.c40
-rw-r--r--src/test/recovery/t/033_replay_tsp_drops.pl169
4 files changed, 305 insertions, 31 deletions
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index e383c2123a3..27e02fbfcda 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -42,6 +42,7 @@
#include "access/xlogutils.h"
#include "catalog/pg_control.h"
#include "commands/tablespace.h"
+#include "common/file_utils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
@@ -2009,6 +2010,47 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
}
/*
+ * Verify that, in non-test mode, ./pg_tblspc doesn't contain any real
+ * directories.
+ *
+ * Replay of database creation XLOG records for databases that were later
+ * dropped can create fake directories in pg_tblspc. By the time consistency
+ * is reached these directories should have been removed; here we verify
+ * that this did indeed happen. This is to be called at the point where
+ * consistent state is reached.
+ *
+ * allow_in_place_tablespaces turns the PANIC into a WARNING, which is
+ * useful for testing purposes, and also allows for an escape hatch in case
+ * things go south.
+ */
+static void
+CheckTablespaceDirectory(void)
+{
+ DIR *dir;
+ struct dirent *de;
+
+ dir = AllocateDir("pg_tblspc");
+ while ((de = ReadDir(dir, "pg_tblspc")) != NULL)
+ {
+ char path[MAXPGPATH + 10];
+
+ /* Skip entries of non-oid names */
+ if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
+ continue;
+
+ snprintf(path, sizeof(path), "pg_tblspc/%s", de->d_name);
+
+ if (get_dirent_type(path, de, false, ERROR) != PGFILETYPE_LNK)
+ ereport(allow_in_place_tablespaces ? WARNING : PANIC,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("unexpected directory entry \"%s\" found in %s",
+ de->d_name, "pg_tblspc/"),
+ errdetail("All directory entries in pg_tblspc/ should be symbolic links."),
+ errhint("Remove those directories, or set allow_in_place_tablespaces to ON transiently to let recovery complete.")));
+ }
+}
+
+/*
* Checks if recovery has reached a consistent state. When consistency is
* reached and we have a valid starting standby snapshot, tell postmaster
* that it can start accepting read-only connections.
@@ -2068,6 +2110,14 @@ CheckRecoveryConsistency(void)
*/
XLogCheckInvalidPages();
+ /*
+ * Check that pg_tblspc doesn't contain any real directories. Replay
+ * of Database/CREATE_* records may have created ficticious tablespace
+ * directories that should have been removed by the time consistency
+ * was reached.
+ */
+ CheckTablespaceDirectory();
+
reachedConsistency = true;
ereport(LOG,
(errmsg("consistent recovery state reached at %X/%X",
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 099d369b2f4..95844bbb691 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -30,6 +30,7 @@
#include "access/tableam.h"
#include "access/xact.h"
#include "access/xloginsert.h"
+#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
@@ -47,6 +48,7 @@
#include "commands/defrem.h"
#include "commands/seclabel.h"
#include "commands/tablespace.h"
+#include "common/file_perm.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
@@ -62,6 +64,7 @@
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
+#include "utils/guc.h"
#include "utils/pg_locale.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
@@ -135,6 +138,7 @@ static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
bool isRedo);
static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid,
Oid dst_tsid);
+static void recovery_create_dbdir(char *path, bool only_tblspc);
/*
* Create a new database using the WAL_LOG strategy.
@@ -2996,6 +3000,45 @@ get_database_name(Oid dbid)
}
/*
+ * recovery_create_dbdir()
+ *
+ * During recovery, there's a case where we validly need to recover a missing
+ * tablespace directory so that recovery can continue. This happens when
+ * recovery wants to create a database but the holding tablespace has been
+ * removed before the server stopped. Since we expect that the directory will
+ * be gone before reaching recovery consistency, and we have no knowledge about
+ * the tablespace other than its OID here, we create a real directory under
+ * pg_tblspc here instead of restoring the symlink.
+ *
+ * If only_tblspc is true, then the requested directory must be in pg_tblspc/
+ */
+static void
+recovery_create_dbdir(char *path, bool only_tblspc)
+{
+ struct stat st;
+
+ Assert(RecoveryInProgress());
+
+ if (stat(path, &st) == 0)
+ return;
+
+ if (only_tblspc && strstr(path, "pg_tblspc/") == NULL)
+ elog(PANIC, "requested to created invalid directory: %s", path);
+
+ if (reachedConsistency && !allow_in_place_tablespaces)
+ ereport(PANIC,
+ errmsg("missing directory \"%s\"", path));
+
+ elog(reachedConsistency ? WARNING : DEBUG1,
+ "creating missing directory: %s", path);
+
+ if (pg_mkdir_p(path, pg_dir_create_mode) != 0)
+ ereport(PANIC,
+ errmsg("could not create missing directory \"%s\": %m", path));
+}
+
+
+/*
* DATABASE resource manager's routines
*/
void
@@ -3012,6 +3055,7 @@ dbase_redo(XLogReaderState *record)
(xl_dbase_create_file_copy_rec *) XLogRecGetData(record);
char *src_path;
char *dst_path;
+ char *parent_path;
struct stat st;
src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
@@ -3032,6 +3076,33 @@ dbase_redo(XLogReaderState *record)
}
/*
+ * If the parent of the target path doesn't exist, create it now. This
+ * enables us to create the target underneath later.
+ */
+ parent_path = pstrdup(dst_path);
+ get_parent_directory(parent_path);
+ if (stat(parent_path, &st) < 0)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ errmsg("could not stat directory \"%s\": %m",
+ dst_path));
+
+ /* create the parent directory if needed and valid */
+ recovery_create_dbdir(parent_path, true);
+ }
+ pfree(parent_path);
+
+ /*
+ * There's a case where the copy source directory is missing for the
+ * same reason above. Create the emtpy source directory so that
+ * copydir below doesn't fail. The directory will be dropped soon by
+ * recovery.
+ */
+ if (stat(src_path, &st) < 0 && errno == ENOENT)
+ recovery_create_dbdir(src_path, false);
+
+ /*
* Force dirty buffers out to disk, to ensure source database is
* up-to-date for the copy.
*/
@@ -3055,9 +3126,15 @@ dbase_redo(XLogReaderState *record)
xl_dbase_create_wal_log_rec *xlrec =
(xl_dbase_create_wal_log_rec *) XLogRecGetData(record);
char *dbpath;
+ char *parent_path;
dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+ /* create the parent directory if needed and valid */
+ parent_path = pstrdup(dbpath);
+ get_parent_directory(parent_path);
+ recovery_create_dbdir(parent_path, true);
+
/* Create the database directory with the version file. */
CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id,
true);
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index cb7d46089a3..570ce3dbd54 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -156,8 +156,6 @@ TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
/* Directory creation failed? */
if (MakePGDirectory(dir) < 0)
{
- char *parentdir;
-
/* Failure other than not exists or not in WAL replay? */
if (errno != ENOENT || !isRedo)
ereport(ERROR,
@@ -166,36 +164,16 @@ TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
dir)));
/*
- * Parent directories are missing during WAL replay, so
- * continue by creating simple parent directories rather
- * than a symlink.
+ * During WAL replay, it's conceivable that several levels
+ * of directories are missing if tablespaces are dropped
+ * further ahead of the WAL stream than we're currently
+ * replaying. An easy way forward is to create them as
+ * plain directories and hope they are removed by further
+ * WAL replay if necessary. If this also fails, there is
+ * trouble we cannot get out of, so just report that and
+ * bail out.
*/
-
- /* create two parents up if not exist */
- parentdir = pstrdup(dir);
- get_parent_directory(parentdir);
- get_parent_directory(parentdir);
- /* Can't create parent and it doesn't already exist? */
- if (MakePGDirectory(parentdir) < 0 && errno != EEXIST)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not create directory \"%s\": %m",
- parentdir)));
- pfree(parentdir);
-
- /* create one parent up if not exist */
- parentdir = pstrdup(dir);
- get_parent_directory(parentdir);
- /* Can't create parent and it doesn't already exist? */
- if (MakePGDirectory(parentdir) < 0 && errno != EEXIST)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not create directory \"%s\": %m",
- parentdir)));
- pfree(parentdir);
-
- /* Create database directory */
- if (MakePGDirectory(dir) < 0)
+ if (pg_mkdir_p(dir, pg_dir_create_mode) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create directory \"%s\": %m",
diff --git a/src/test/recovery/t/033_replay_tsp_drops.pl b/src/test/recovery/t/033_replay_tsp_drops.pl
new file mode 100644
index 00000000000..9b74cb09ac3
--- /dev/null
+++ b/src/test/recovery/t/033_replay_tsp_drops.pl
@@ -0,0 +1,169 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test replay of tablespace/database creation/drop
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+sub test_tablespace
+{
+ my ($strategy) = @_;
+
+ my $node_primary = PostgreSQL::Test::Cluster->new("primary1_$strategy");
+ $node_primary->init(allows_streaming => 1);
+ $node_primary->start;
+ $node_primary->psql(
+ 'postgres',
+ qq[
+ SET allow_in_place_tablespaces=on;
+ CREATE TABLESPACE dropme_ts1 LOCATION '';
+ CREATE TABLESPACE dropme_ts2 LOCATION '';
+ CREATE TABLESPACE source_ts LOCATION '';
+ CREATE TABLESPACE target_ts LOCATION '';
+ CREATE DATABASE template_db IS_TEMPLATE = true;
+ ]);
+ my $backup_name = 'my_backup';
+ $node_primary->backup($backup_name);
+
+ my $node_standby = PostgreSQL::Test::Cluster->new("standby2_$strategy");
+ $node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+ $node_standby->append_conf('postgresql.conf',
+ "allow_in_place_tablespaces = on");
+ $node_standby->start;
+
+ # Make sure connection is made
+ $node_primary->poll_query_until('postgres',
+ 'SELECT count(*) = 1 FROM pg_stat_replication');
+
+ $node_standby->safe_psql('postgres', 'CHECKPOINT');
+
+ # Do immediate shutdown just after a sequence of CREAT DATABASE / DROP
+ # DATABASE / DROP TABLESPACE. This causes CREATE DATABASE WAL records
+ # to be applied to already-removed directories.
+ my $query = q[
+ CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts1 STRATEGY=<STRATEGY>;
+ CREATE TABLE t (a int) TABLESPACE dropme_ts2;
+ CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts2 STRATEGY=<STRATEGY>;
+ CREATE DATABASE moveme_db TABLESPACE source_ts STRATEGY=<STRATEGY>;
+ ALTER DATABASE moveme_db SET TABLESPACE target_ts;
+ CREATE DATABASE newdb TEMPLATE template_db STRATEGY=<STRATEGY>;
+ ALTER DATABASE template_db IS_TEMPLATE = false;
+ DROP DATABASE dropme_db1;
+ DROP TABLE t;
+ DROP DATABASE dropme_db2; DROP TABLESPACE dropme_ts2;
+ DROP TABLESPACE source_ts;
+ DROP DATABASE template_db;
+ ];
+
+ $query =~ s/<STRATEGY>/$strategy/g;
+ $node_primary->safe_psql('postgres', $query);
+ $node_primary->wait_for_catchup($node_standby, 'replay',
+ $node_primary->lsn('write'));
+
+ # show "create missing directory" log message
+ $node_standby->safe_psql('postgres',
+ "ALTER SYSTEM SET log_min_messages TO debug1;");
+ $node_standby->stop('immediate');
+ # Should restart ignoring directory creation error.
+ is($node_standby->start(fail_ok => 1),
+ 1, "standby node started for $strategy");
+ $node_standby->stop('immediate');
+}
+
+test_tablespace("FILE_COPY");
+test_tablespace("WAL_LOG");
+
+# Ensure that a missing tablespace directory during create database
+# replay immediately causes panic if the standby has already reached
+# consistent state (archive recovery is in progress). This is
+# effective only for CREATE DATABASE WITH STRATEGY=FILE_COPY.
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary2');
+$node_primary->init(allows_streaming => 1);
+$node_primary->start;
+
+# Create tablespace
+$node_primary->safe_psql(
+ 'postgres', q[
+ SET allow_in_place_tablespaces=on;
+ CREATE TABLESPACE ts1 LOCATION ''
+ ]);
+$node_primary->safe_psql('postgres',
+ "CREATE DATABASE db1 WITH TABLESPACE ts1 STRATEGY=FILE_COPY");
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+my $node_standby = PostgreSQL::Test::Cluster->new('standby3');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf('postgresql.conf',
+ "allow_in_place_tablespaces = on");
+$node_standby->start;
+
+# Make sure standby reached consistency and starts accepting connections
+$node_standby->poll_query_until('postgres', 'SELECT 1', '1');
+
+# Remove standby tablespace directory so it will be missing when
+# replay resumes.
+my $tspoid = $node_standby->safe_psql('postgres',
+ "SELECT oid FROM pg_tablespace WHERE spcname = 'ts1';");
+my $tspdir = $node_standby->data_dir . "/pg_tblspc/$tspoid";
+File::Path::rmtree($tspdir);
+
+my $logstart = get_log_size($node_standby);
+
+# Create a database in the tablespace and a table in default tablespace
+$node_primary->safe_psql(
+ 'postgres',
+ q[
+ CREATE TABLE should_not_replay_insertion(a int);
+ CREATE DATABASE db2 WITH TABLESPACE ts1 STRATEGY=FILE_COPY;
+ INSERT INTO should_not_replay_insertion VALUES (1);
+ ]);
+
+# Standby should fail and should not silently skip replaying the wal
+# In this test, PANIC turns into WARNING by allow_in_place_tablespaces.
+# Check the log messages instead of confirming standby failure.
+my $max_attempts = $PostgreSQL::Test::Utils::timeout_default;
+while ($max_attempts-- >= 0)
+{
+ last
+ if (
+ find_in_log(
+ $node_standby, "WARNING: creating missing directory: pg_tblspc/",
+ $logstart));
+ sleep 1;
+}
+ok($max_attempts > 0, "invalid directory creation is detected");
+
+done_testing();
+
+
+# return the size of logfile of $node in bytes
+sub get_log_size
+{
+ my ($node) = @_;
+
+ return (stat $node->logfile)[7];
+}
+
+# find $pat in logfile of $node after $off-th byte
+sub find_in_log
+{
+ my ($node, $pat, $off) = @_;
+
+ $off = 0 unless defined $off;
+ my $log = PostgreSQL::Test::Utils::slurp_file($node->logfile);
+ return 0 if (length($log) <= $off);
+
+ $log = substr($log, $off);
+
+ return $log =~ m/$pat/;
+}