aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/bin/pg_upgrade/TESTING6
-rw-r--r--src/bin/pg_upgrade/check.c29
-rw-r--r--src/bin/pg_upgrade/controldata.c21
-rw-r--r--src/bin/pg_upgrade/dump.c4
-rw-r--r--src/bin/pg_upgrade/file.c14
-rw-r--r--src/bin/pg_upgrade/info.c4
-rw-r--r--src/bin/pg_upgrade/option.c7
-rw-r--r--src/bin/pg_upgrade/pg_upgrade.c16
-rw-r--r--src/bin/pg_upgrade/pg_upgrade.h5
-rw-r--r--src/bin/pg_upgrade/relfilenumber.c384
-rw-r--r--src/bin/pg_upgrade/t/006_transfer_modes.pl10
-rw-r--r--src/common/file_utils.c14
-rw-r--r--src/include/common/file_utils.h1
13 files changed, 482 insertions, 33 deletions
diff --git a/src/bin/pg_upgrade/TESTING b/src/bin/pg_upgrade/TESTING
index 00842ac6ec3..c3d463c9c29 100644
--- a/src/bin/pg_upgrade/TESTING
+++ b/src/bin/pg_upgrade/TESTING
@@ -20,13 +20,13 @@ export oldinstall=...otherversion/ (old version's install base path)
See DETAILS below for more information about creation of the dump.
You can also test the different transfer modes (--copy, --link,
---clone, --copy-file-range) by setting the environment variable
+--clone, --copy-file-range, --swap) by setting the environment variable
PG_TEST_PG_UPGRADE_MODE to the respective command-line option, like
make check PG_TEST_PG_UPGRADE_MODE=--link
-The default is --copy. Note that the other modes are not supported on
-all operating systems.
+The default is --copy. Note that not all modes are supported on all
+operating systems.
DETAILS
-------
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 117f461d46a..02d9146e5ed 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -709,7 +709,34 @@ check_new_cluster(void)
check_copy_file_range();
break;
case TRANSFER_MODE_LINK:
- check_hard_link();
+ check_hard_link(TRANSFER_MODE_LINK);
+ break;
+ case TRANSFER_MODE_SWAP:
+
+ /*
+ * We do the hard link check for --swap, too, since it's an easy
+ * way to verify the clusters are in the same file system. This
+ * allows us to take some shortcuts in the file synchronization
+ * step. With some more effort, we could probably support the
+ * separate-file-system use case, but this mode is unlikely to
+ * offer much benefit if we have to copy the files across file
+ * system boundaries.
+ */
+ check_hard_link(TRANSFER_MODE_SWAP);
+
+ /*
+ * There are a few known issues with using --swap to upgrade from
+ * versions older than 10. For example, the sequence tuple format
+ * changed in v10, and the visibility map format changed in 9.6.
+ * While such problems are not insurmountable (and we may have to
+ * deal with similar problems in the future, anyway), it doesn't
+ * seem worth the effort to support swap mode for upgrades from
+ * long-unsupported versions.
+ */
+ if (GET_MAJOR_VERSION(old_cluster.major_version) < 1000)
+ pg_fatal("Swap mode can only upgrade clusters from PostgreSQL version %s and later.",
+ "10");
+
break;
}
diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index bd49ea867bf..47ee27ec835 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -751,7 +751,7 @@ check_control_data(ControlData *oldctrl,
void
-disable_old_cluster(void)
+disable_old_cluster(transferMode transfer_mode)
{
char old_path[MAXPGPATH],
new_path[MAXPGPATH];
@@ -766,10 +766,17 @@ disable_old_cluster(void)
old_path, new_path);
check_ok();
- pg_log(PG_REPORT, "\n"
- "If you want to start the old cluster, you will need to remove\n"
- "the \".old\" suffix from %s/global/pg_control.old.\n"
- "Because \"link\" mode was used, the old cluster cannot be safely\n"
- "started once the new cluster has been started.",
- old_cluster.pgdata);
+ if (transfer_mode == TRANSFER_MODE_LINK)
+ pg_log(PG_REPORT, "\n"
+ "If you want to start the old cluster, you will need to remove\n"
+ "the \".old\" suffix from %s/global/pg_control.old.\n"
+ "Because \"link\" mode was used, the old cluster cannot be safely\n"
+ "started once the new cluster has been started.",
+ old_cluster.pgdata);
+ else if (transfer_mode == TRANSFER_MODE_SWAP)
+ pg_log(PG_REPORT, "\n"
+ "Because \"swap\" mode was used, the old cluster can no longer be\n"
+ "safely started.");
+ else
+ pg_fatal("unrecognized transfer mode");
}
diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c
index b8fd0d0acee..23cb08e8347 100644
--- a/src/bin/pg_upgrade/dump.c
+++ b/src/bin/pg_upgrade/dump.c
@@ -52,9 +52,11 @@ generate_old_dump(void)
snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid);
parallel_exec_prog(log_file_name, NULL,
- "\"%s/pg_dump\" %s --no-data %s --sequence-data --quote-all-identifiers "
+ "\"%s/pg_dump\" %s --no-data %s %s --quote-all-identifiers "
"--binary-upgrade --format=custom %s --no-sync --file=\"%s/%s\" %s",
new_cluster.bindir, cluster_conn_opts(&old_cluster),
+ (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ?
+ "" : "--sequence-data",
log_opts.verbose ? "--verbose" : "",
user_opts.do_statistics ? "" : "--no-statistics",
log_opts.dumpdir,
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 7fd1991204a..91ed16acb08 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -434,7 +434,7 @@ check_copy_file_range(void)
}
void
-check_hard_link(void)
+check_hard_link(transferMode transfer_mode)
{
char existing_file[MAXPGPATH];
char new_link_file[MAXPGPATH];
@@ -444,8 +444,16 @@ check_hard_link(void)
unlink(new_link_file); /* might fail */
if (link(existing_file, new_link_file) < 0)
- pg_fatal("could not create hard link between old and new data directories: %m\n"
- "In link mode the old and new data directories must be on the same file system.");
+ {
+ if (transfer_mode == TRANSFER_MODE_LINK)
+ pg_fatal("could not create hard link between old and new data directories: %m\n"
+ "In link mode the old and new data directories must be on the same file system.");
+ else if (transfer_mode == TRANSFER_MODE_SWAP)
+ pg_fatal("could not create hard link between old and new data directories: %m\n"
+ "In swap mode the old and new data directories must be on the same file system.");
+ else
+ pg_fatal("unrecognized transfer mode");
+ }
unlink(new_link_file);
}
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index ad52de8b607..4b7a56f5b3b 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -490,7 +490,7 @@ get_rel_infos_query(void)
" FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n "
" ON c.relnamespace = n.oid "
" WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", "
- CppAsString2(RELKIND_MATVIEW) ") AND "
+ CppAsString2(RELKIND_MATVIEW) "%s) AND "
/* exclude possible orphaned temp tables */
" ((n.nspname !~ '^pg_temp_' AND "
" n.nspname !~ '^pg_toast_temp_' AND "
@@ -499,6 +499,8 @@ get_rel_infos_query(void)
" c.oid >= %u::pg_catalog.oid) OR "
" (n.nspname = 'pg_catalog' AND "
" relname IN ('pg_largeobject') ))), ",
+ (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ?
+ ", " CppAsString2(RELKIND_SEQUENCE) : "",
FirstNormalObjectId);
/*
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 188dd8d8a8b..7fd7f1d33fc 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -62,6 +62,7 @@ parseCommandLine(int argc, char *argv[])
{"sync-method", required_argument, NULL, 4},
{"no-statistics", no_argument, NULL, 5},
{"set-char-signedness", required_argument, NULL, 6},
+ {"swap", no_argument, NULL, 7},
{NULL, 0, NULL, 0}
};
@@ -228,6 +229,11 @@ parseCommandLine(int argc, char *argv[])
else
pg_fatal("invalid argument for option %s", "--set-char-signedness");
break;
+
+ case 7:
+ user_opts.transfer_mode = TRANSFER_MODE_SWAP;
+ break;
+
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
os_info.progname);
@@ -325,6 +331,7 @@ usage(void)
printf(_(" --no-statistics do not import statistics from old cluster\n"));
printf(_(" --set-char-signedness=OPTION set new cluster char signedness to \"signed\" or\n"
" \"unsigned\"\n"));
+ printf(_(" --swap move data directories to new cluster\n"));
printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 174cd920840..9295e46aed3 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -170,12 +170,14 @@ main(int argc, char **argv)
/*
* Most failures happen in create_new_objects(), which has completed at
- * this point. We do this here because it is just before linking, which
- * will link the old and new cluster data files, preventing the old
- * cluster from being safely started once the new cluster is started.
+ * this point. We do this here because it is just before file transfer,
+ * which for --link will make it unsafe to start the old cluster once the
+ * new cluster is started, and for --swap will make it unsafe to start the
+ * old cluster at all.
*/
- if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
- disable_old_cluster();
+ if (user_opts.transfer_mode == TRANSFER_MODE_LINK ||
+ user_opts.transfer_mode == TRANSFER_MODE_SWAP)
+ disable_old_cluster(user_opts.transfer_mode);
transfer_all_new_tablespaces(&old_cluster.dbarr, &new_cluster.dbarr,
old_cluster.pgdata, new_cluster.pgdata);
@@ -212,8 +214,10 @@ main(int argc, char **argv)
{
prep_status("Sync data directory to disk");
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
- "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+ "\"%s/initdb\" --sync-only %s \"%s\" --sync-method %s",
new_cluster.bindir,
+ (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ?
+ "--no-sync-data-files" : "",
new_cluster.pgdata,
user_opts.sync_method);
check_ok();
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 4c9d0172149..69c965bb7d0 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -262,6 +262,7 @@ typedef enum
TRANSFER_MODE_COPY,
TRANSFER_MODE_COPY_FILE_RANGE,
TRANSFER_MODE_LINK,
+ TRANSFER_MODE_SWAP,
} transferMode;
/*
@@ -391,7 +392,7 @@ void create_script_for_old_cluster_deletion(char **deletion_script_file_name);
void get_control_data(ClusterInfo *cluster);
void check_control_data(ControlData *oldctrl, ControlData *newctrl);
-void disable_old_cluster(void);
+void disable_old_cluster(transferMode transfer_mode);
/* dump.c */
@@ -423,7 +424,7 @@ void rewriteVisibilityMap(const char *fromfile, const char *tofile,
const char *schemaName, const char *relName);
void check_file_clone(void);
void check_copy_file_range(void);
-void check_hard_link(void);
+void check_hard_link(transferMode transfer_mode);
/* fopen_priv() is no longer different from fopen() */
#define fopen_priv(path, mode) fopen(path, mode)
diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c
index 8c23c583172..aa205aec51d 100644
--- a/src/bin/pg_upgrade/relfilenumber.c
+++ b/src/bin/pg_upgrade/relfilenumber.c
@@ -11,11 +11,92 @@
#include <sys/stat.h>
+#include "common/file_perm.h"
+#include "common/file_utils.h"
+#include "common/int.h"
+#include "common/logging.h"
#include "pg_upgrade.h"
static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit);
+/*
+ * The following set of sync_queue_* functions are used for --swap to reduce
+ * the amount of time spent synchronizing the swapped catalog files. When a
+ * file is added to the queue, we also alert the file system that we'd like it
+ * to be persisted to disk in the near future (if that operation is supported
+ * by the current platform). Once the queue is full, all of the files are
+ * synchronized to disk. This strategy should generally be much faster than
+ * simply calling fsync() on the files right away.
+ *
+ * The general usage pattern should be something like:
+ *
+ * for (int i = 0; i < num_files; i++)
+ * sync_queue_push(files[i]);
+ *
+ * // be sure to sync any remaining files in the queue
+ * sync_queue_sync_all();
+ * synq_queue_destroy();
+ */
+
+#define SYNC_QUEUE_MAX_LEN (1024)
+
+static char *sync_queue[SYNC_QUEUE_MAX_LEN];
+static bool sync_queue_inited;
+static int sync_queue_len;
+
+static inline void
+sync_queue_init(void)
+{
+ if (sync_queue_inited)
+ return;
+
+ sync_queue_inited = true;
+ for (int i = 0; i < SYNC_QUEUE_MAX_LEN; i++)
+ sync_queue[i] = palloc(MAXPGPATH);
+}
+
+static inline void
+sync_queue_sync_all(void)
+{
+ if (!sync_queue_inited)
+ return;
+
+ for (int i = 0; i < sync_queue_len; i++)
+ {
+ if (fsync_fname(sync_queue[i], false) != 0)
+ pg_fatal("could not synchronize file \"%s\": %m", sync_queue[i]);
+ }
+
+ sync_queue_len = 0;
+}
+
+static inline void
+sync_queue_push(const char *fname)
+{
+ sync_queue_init();
+
+ pre_sync_fname(fname, false);
+
+ strncpy(sync_queue[sync_queue_len++], fname, MAXPGPATH);
+ if (sync_queue_len >= SYNC_QUEUE_MAX_LEN)
+ sync_queue_sync_all();
+}
+
+static inline void
+sync_queue_destroy(void)
+{
+ if (!sync_queue_inited)
+ return;
+
+ sync_queue_inited = false;
+ sync_queue_len = 0;
+ for (int i = 0; i < SYNC_QUEUE_MAX_LEN; i++)
+ {
+ pfree(sync_queue[i]);
+ sync_queue[i] = NULL;
+ }
+}
/*
* transfer_all_new_tablespaces()
@@ -41,6 +122,9 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
case TRANSFER_MODE_LINK:
prep_status_progress("Linking user relation files");
break;
+ case TRANSFER_MODE_SWAP:
+ prep_status_progress("Swapping data directories");
+ break;
}
/*
@@ -125,6 +209,287 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
/* We allocate something even for n_maps == 0 */
pg_free(mappings);
}
+
+ /*
+ * Make sure anything pending synchronization in swap mode is fully
+ * persisted to disk. This is a no-op for other transfer modes.
+ */
+ sync_queue_sync_all();
+ sync_queue_destroy();
+}
+
+/*
+ * prepare_for_swap()
+ *
+ * This function moves the database directory from the old cluster to the new
+ * cluster in preparation for moving the pg_restore-generated catalog files
+ * into place. Returns false if the database with the given OID does not have
+ * a directory in the given tablespace, otherwise returns true.
+ *
+ * This function will return paths in the following variables, which the caller
+ * must ensure are sized to MAXPGPATH bytes:
+ *
+ * old_catalog_dir: The directory for the old cluster's catalog files.
+ * new_db_dir: The new cluster's database directory for db_oid.
+ * moved_db_dir: Destination for the pg_restore-generated database directory.
+ */
+static bool
+prepare_for_swap(const char *old_tablespace, Oid db_oid,
+ char *old_catalog_dir, char *new_db_dir, char *moved_db_dir)
+{
+ const char *new_tablespace;
+ const char *old_tblspc_suffix;
+ const char *new_tblspc_suffix;
+ char old_tblspc[MAXPGPATH];
+ char new_tblspc[MAXPGPATH];
+ char moved_tblspc[MAXPGPATH];
+ char old_db_dir[MAXPGPATH];
+ struct stat st;
+
+ if (strcmp(old_tablespace, old_cluster.pgdata) == 0)
+ {
+ new_tablespace = new_cluster.pgdata;
+ new_tblspc_suffix = "/base";
+ old_tblspc_suffix = "/base";
+ }
+ else
+ {
+ /*
+ * XXX: The below line is a hack to deal with the fact that we
+ * presently don't have an easy way to find the corresponding new
+ * tablespace's path. This will need to be fixed if/when we add
+ * pg_upgrade support for in-place tablespaces.
+ */
+ new_tablespace = old_tablespace;
+
+ new_tblspc_suffix = new_cluster.tablespace_suffix;
+ old_tblspc_suffix = old_cluster.tablespace_suffix;
+ }
+
+ /* Old and new cluster paths. */
+ snprintf(old_tblspc, sizeof(old_tblspc), "%s%s", old_tablespace, old_tblspc_suffix);
+ snprintf(new_tblspc, sizeof(new_tblspc), "%s%s", new_tablespace, new_tblspc_suffix);
+ snprintf(old_db_dir, sizeof(old_db_dir), "%s/%u", old_tblspc, db_oid);
+ snprintf(new_db_dir, MAXPGPATH, "%s/%u", new_tblspc, db_oid);
+
+ /*
+ * Paths for "moved aside" stuff. We intentionally put these in the old
+ * cluster so that the delete_old_cluster.{sh,bat} script handles them.
+ */
+ snprintf(moved_tblspc, sizeof(moved_tblspc), "%s/moved_for_upgrade", old_tblspc);
+ snprintf(old_catalog_dir, MAXPGPATH, "%s/%u_old_catalogs", moved_tblspc, db_oid);
+ snprintf(moved_db_dir, MAXPGPATH, "%s/%u", moved_tblspc, db_oid);
+
+ /* Check that the database directory exists in the given tablespace. */
+ if (stat(old_db_dir, &st) != 0)
+ {
+ if (errno != ENOENT)
+ pg_fatal("could not stat file \"%s\": %m", old_db_dir);
+ return false;
+ }
+
+ /* Create directory for stuff that is moved aside. */
+ if (pg_mkdir_p(moved_tblspc, pg_dir_create_mode) != 0 && errno != EEXIST)
+ pg_fatal("could not create directory \"%s\"", moved_tblspc);
+
+ /* Create directory for old catalog files. */
+ if (pg_mkdir_p(old_catalog_dir, pg_dir_create_mode) != 0)
+ pg_fatal("could not create directory \"%s\"", old_catalog_dir);
+
+ /* Move the new cluster's database directory aside. */
+ if (rename(new_db_dir, moved_db_dir) != 0)
+ pg_fatal("could not rename \"%s\" to \"%s\"", new_db_dir, moved_db_dir);
+
+ /* Move the old cluster's database directory into place. */
+ if (rename(old_db_dir, new_db_dir) != 0)
+ pg_fatal("could not rename \"%s\" to \"%s\"", old_db_dir, new_db_dir);
+
+ return true;
+}
+
+/*
+ * FileNameMapCmp()
+ *
+ * qsort() comparator for FileNameMap that sorts by RelFileNumber.
+ */
+static int
+FileNameMapCmp(const void *a, const void *b)
+{
+ const FileNameMap *map1 = (const FileNameMap *) a;
+ const FileNameMap *map2 = (const FileNameMap *) b;
+
+ return pg_cmp_u32(map1->relfilenumber, map2->relfilenumber);
+}
+
+/*
+ * parse_relfilenumber()
+ *
+ * Attempt to parse the RelFileNumber of the given file name. If we can't,
+ * return InvalidRelFileNumber. Note that this code snippet is lifted from
+ * parse_filename_for_nontemp_relation().
+ */
+static RelFileNumber
+parse_relfilenumber(const char *filename)
+{
+ char *endp;
+ unsigned long n;
+
+ if (filename[0] < '1' || filename[0] > '9')
+ return InvalidRelFileNumber;
+
+ errno = 0;
+ n = strtoul(filename, &endp, 10);
+ if (errno || filename == endp || n <= 0 || n > PG_UINT32_MAX)
+ return InvalidRelFileNumber;
+
+ return (RelFileNumber) n;
+}
+
+/*
+ * swap_catalog_files()
+ *
+ * Moves the old catalog files aside, and moves the new catalog files into
+ * place. prepare_for_swap() should have already been called (and returned
+ * true) for the tablespace/database being transferred.
+ *
+ * The arguments for the following parameters should be the corresponding
+ * variables returned by prepare_for_swap():
+ *
+ * old_catalog_dir: The directory for the old cluster's catalog files.
+ * new_db_dir: New cluster's database directory (for DB being transferred).
+ * moved_db_dir: Moved-aside pg_restore-generated database directory.
+ */
+static void
+swap_catalog_files(FileNameMap *maps, int size, const char *old_catalog_dir,
+ const char *new_db_dir, const char *moved_db_dir)
+{
+ DIR *dir;
+ struct dirent *de;
+ char path[MAXPGPATH];
+ char dest[MAXPGPATH];
+ RelFileNumber rfn;
+
+ /* Move the old catalog files aside. */
+ dir = opendir(new_db_dir);
+ if (dir == NULL)
+ pg_fatal("could not open directory \"%s\": %m", new_db_dir);
+ while (errno = 0, (de = readdir(dir)) != NULL)
+ {
+ snprintf(path, sizeof(path), "%s/%s", new_db_dir, de->d_name);
+ if (get_dirent_type(path, de, false, PG_LOG_ERROR) != PGFILETYPE_REG)
+ continue;
+
+ rfn = parse_relfilenumber(de->d_name);
+ if (RelFileNumberIsValid(rfn))
+ {
+ FileNameMap key = {.relfilenumber = rfn};
+
+ if (bsearch(&key, maps, size, sizeof(FileNameMap), FileNameMapCmp))
+ continue;
+ }
+
+ snprintf(dest, sizeof(dest), "%s/%s", old_catalog_dir, de->d_name);
+ if (rename(path, dest) != 0)
+ pg_fatal("could not rename \"%s\" to \"%s\": %m", path, dest);
+ }
+ if (errno)
+ pg_fatal("could not read directory \"%s\": %m", new_db_dir);
+ (void) closedir(dir);
+
+ /* Move the new catalog files into place. */
+ dir = opendir(moved_db_dir);
+ if (dir == NULL)
+ pg_fatal("could not open directory \"%s\": %m", moved_db_dir);
+ while (errno = 0, (de = readdir(dir)) != NULL)
+ {
+ snprintf(path, sizeof(path), "%s/%s", moved_db_dir, de->d_name);
+ if (get_dirent_type(path, de, false, PG_LOG_ERROR) != PGFILETYPE_REG)
+ continue;
+
+ rfn = parse_relfilenumber(de->d_name);
+ if (RelFileNumberIsValid(rfn))
+ {
+ FileNameMap key = {.relfilenumber = rfn};
+
+ if (bsearch(&key, maps, size, sizeof(FileNameMap), FileNameMapCmp))
+ continue;
+ }
+
+ snprintf(dest, sizeof(dest), "%s/%s", new_db_dir, de->d_name);
+ if (rename(path, dest) != 0)
+ pg_fatal("could not rename \"%s\" to \"%s\": %m", path, dest);
+
+ /*
+ * We don't fsync() the database files in the file synchronization
+ * stage of pg_upgrade in swap mode, so we need to synchronize them
+ * ourselves. We only do this for the catalog files because they were
+ * created during pg_restore with fsync=off. We assume that the user
+ * data files files were properly persisted to disk when the user last
+ * shut it down.
+ */
+ if (user_opts.do_sync)
+ sync_queue_push(dest);
+ }
+ if (errno)
+ pg_fatal("could not read directory \"%s\": %m", moved_db_dir);
+ (void) closedir(dir);
+
+ /* Ensure the directory entries are persisted to disk. */
+ if (fsync_fname(new_db_dir, true) != 0)
+ pg_fatal("could not synchronize directory \"%s\": %m", new_db_dir);
+ if (fsync_parent_path(new_db_dir) != 0)
+ pg_fatal("could not synchronize parent directory of \"%s\": %m", new_db_dir);
+}
+
+/*
+ * do_swap()
+ *
+ * Perform the required steps for --swap for a single database. In short this
+ * moves the old cluster's database directory into the new cluster and then
+ * replaces any files for system catalogs with the ones that were generated
+ * during pg_restore.
+ */
+static void
+do_swap(FileNameMap *maps, int size, char *old_tablespace)
+{
+ char old_catalog_dir[MAXPGPATH];
+ char new_db_dir[MAXPGPATH];
+ char moved_db_dir[MAXPGPATH];
+
+ /*
+ * We perform many lookups on maps by relfilenumber in swap mode, so make
+ * sure it's sorted by relfilenumber. maps should already be sorted by
+ * OID, so in general this shouldn't have much work to do.
+ */
+ qsort(maps, size, sizeof(FileNameMap), FileNameMapCmp);
+
+ /*
+ * If an old tablespace is given, we only need to process that one. If no
+ * old tablespace is specified, we need to process all the tablespaces on
+ * the system.
+ */
+ if (old_tablespace)
+ {
+ if (prepare_for_swap(old_tablespace, maps[0].db_oid,
+ old_catalog_dir, new_db_dir, moved_db_dir))
+ swap_catalog_files(maps, size,
+ old_catalog_dir, new_db_dir, moved_db_dir);
+ }
+ else
+ {
+ if (prepare_for_swap(old_cluster.pgdata, maps[0].db_oid,
+ old_catalog_dir, new_db_dir, moved_db_dir))
+ swap_catalog_files(maps, size,
+ old_catalog_dir, new_db_dir, moved_db_dir);
+
+ for (int tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
+ {
+ if (prepare_for_swap(os_info.old_tablespaces[tblnum], maps[0].db_oid,
+ old_catalog_dir, new_db_dir, moved_db_dir))
+ swap_catalog_files(maps, size,
+ old_catalog_dir, new_db_dir, moved_db_dir);
+ }
+ }
}
/*
@@ -145,6 +510,20 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
vm_must_add_frozenbit = true;
+ /* --swap has its own subroutine */
+ if (user_opts.transfer_mode == TRANSFER_MODE_SWAP)
+ {
+ /*
+ * We don't support --swap to upgrade from versions that require
+ * rewriting the visibility map. We should've failed already if
+ * someone tries to do that.
+ */
+ Assert(!vm_must_add_frozenbit);
+
+ do_swap(maps, size, old_tablespace);
+ return;
+ }
+
for (mapnum = 0; mapnum < size; mapnum++)
{
if (old_tablespace == NULL ||
@@ -259,6 +638,11 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"",
old_file, new_file);
linkFile(old_file, new_file, map->nspname, map->relname);
+ break;
+ case TRANSFER_MODE_SWAP:
+ /* swap mode is handled in its own code path */
+ pg_fatal("should never happen");
+ break;
}
}
}
diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl
index 518e0994145..34fddbcdab5 100644
--- a/src/bin/pg_upgrade/t/006_transfer_modes.pl
+++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl
@@ -16,6 +16,15 @@ sub test_mode
my $old = PostgreSQL::Test::Cluster->new('old', install_path => $ENV{oldinstall});
my $new = PostgreSQL::Test::Cluster->new('new');
+ # --swap can't be used to upgrade from versions older than 10, so just skip
+ # the test if the old cluster version is too old.
+ if ($old->pg_version < 10 && $mode eq "--swap")
+ {
+ $old->clean_node();
+ $new->clean_node();
+ return;
+ }
+
if (defined($ENV{oldinstall}))
{
# Checksums are now enabled by default, but weren't before 18, so pass
@@ -97,5 +106,6 @@ test_mode('--clone');
test_mode('--copy');
test_mode('--copy-file-range');
test_mode('--link');
+test_mode('--swap');
done_testing();
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 1e6250cc190..7b62687a2aa 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -45,9 +45,6 @@
*/
#define MINIMUM_VERSION_FOR_PG_WAL 100000
-#ifdef PG_FLUSH_DATA_WORKS
-static int pre_sync_fname(const char *fname, bool isdir);
-#endif
static void walkdir(const char *path,
int (*action) (const char *fname, bool isdir),
bool process_symlinks,
@@ -352,16 +349,16 @@ walkdir(const char *path,
}
/*
- * Hint to the OS that it should get ready to fsync() this file.
+ * Hint to the OS that it should get ready to fsync() this file, if supported
+ * by the platform.
*
* Ignores errors trying to open unreadable files, and reports other errors
* non-fatally.
*/
-#ifdef PG_FLUSH_DATA_WORKS
-
-static int
+int
pre_sync_fname(const char *fname, bool isdir)
{
+#ifdef PG_FLUSH_DATA_WORKS
int fd;
fd = open(fname, O_RDONLY | PG_BINARY, 0);
@@ -388,11 +385,10 @@ pre_sync_fname(const char *fname, bool isdir)
#endif
(void) close(fd);
+#endif /* PG_FLUSH_DATA_WORKS */
return 0;
}
-#endif /* PG_FLUSH_DATA_WORKS */
-
/*
* fsync_fname -- Try to fsync a file or directory
*
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 8274bc877ab..9fd88953e43 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -33,6 +33,7 @@ typedef enum DataDirSyncMethod
struct iovec; /* avoid including port/pg_iovec.h here */
#ifdef FRONTEND
+extern int pre_sync_fname(const char *fname, bool isdir);
extern int fsync_fname(const char *fname, bool isdir);
extern void sync_pgdata(const char *pg_data, int serverVersion,
DataDirSyncMethod sync_method, bool sync_data_files);