aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Momjian <bruce@momjian.us>2012-11-14 17:32:04 -0500
committerBruce Momjian <bruce@momjian.us>2012-11-14 17:32:07 -0500
commit29add0de4920e4f448a30bfc35798b939c211d97 (patch)
treeb6bfd2849a44b9822a7f36c84fde931a121833c1
parenta235b85a0ba06666dbbfdb9249a65dbfa9b42ebd (diff)
downloadpostgresql-29add0de4920e4f448a30bfc35798b939c211d97.tar.gz
postgresql-29add0de4920e4f448a30bfc35798b939c211d97.zip
In pg_upgrade, copy fsm, vm, and extent files by checking for file
existence via open(), rather than collecting a directory listing and looking up matching relfilenode files with sequential scans of the array. This speeds up pg_upgrade by 2x for a large number of tables, e.g. 16k. Per observation by Ants Aasma.
-rw-r--r--contrib/pg_upgrade/file.c55
-rw-r--r--contrib/pg_upgrade/pg_upgrade.h2
-rw-r--r--contrib/pg_upgrade/relfilenode.c208
3 files changed, 82 insertions, 183 deletions
diff --git a/contrib/pg_upgrade/file.c b/contrib/pg_upgrade/file.c
index a5d92c62fce..d8cd8f5b045 100644
--- a/contrib/pg_upgrade/file.c
+++ b/contrib/pg_upgrade/file.c
@@ -221,61 +221,6 @@ copy_file(const char *srcfile, const char *dstfile, bool force)
#endif
-/*
- * load_directory()
- *
- * Read all the file names in the specified directory, and return them as
- * an array of "char *" pointers. The array address is returned in
- * *namelist, and the function result is the count of file names.
- *
- * To free the result data, free each (char *) array member, then free the
- * namelist array itself.
- */
-int
-load_directory(const char *dirname, char ***namelist)
-{
- DIR *dirdesc;
- struct dirent *direntry;
- int count = 0;
- int allocsize = 64; /* initial array size */
-
- *namelist = (char **) pg_malloc(allocsize * sizeof(char *));
-
- if ((dirdesc = opendir(dirname)) == NULL)
- pg_log(PG_FATAL, "could not open directory \"%s\": %s\n",
- dirname, getErrorText(errno));
-
- while (errno = 0, (direntry = readdir(dirdesc)) != NULL)
- {
- if (count >= allocsize)
- {
- allocsize *= 2;
- *namelist = (char **)
- pg_realloc(*namelist, allocsize * sizeof(char *));
- }
-
- (*namelist)[count++] = pg_strdup(direntry->d_name);
- }
-
-#ifdef WIN32
- /*
- * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
- * released version
- */
- if (GetLastError() == ERROR_NO_MORE_FILES)
- errno = 0;
-#endif
-
- if (errno)
- pg_log(PG_FATAL, "could not read directory \"%s\": %s\n",
- dirname, getErrorText(errno));
-
- closedir(dirdesc);
-
- return count;
-}
-
-
void
check_hard_link(void)
{
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
index 0248d40e852..ace56e59905 100644
--- a/contrib/pg_upgrade/pg_upgrade.h
+++ b/contrib/pg_upgrade/pg_upgrade.h
@@ -7,7 +7,6 @@
#include <unistd.h>
#include <assert.h>
-#include <dirent.h>
#include <sys/stat.h>
#include <sys/time.h>
@@ -366,7 +365,6 @@ const char *setupPageConverter(pageCnvCtx **result);
typedef void *pageCnvCtx;
#endif
-int load_directory(const char *dirname, char ***namelist);
const char *copyAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
const char *dst, bool force);
const char *linkAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
diff --git a/contrib/pg_upgrade/relfilenode.c b/contrib/pg_upgrade/relfilenode.c
index 33a867f0d02..d763ba7d2ba 100644
--- a/contrib/pg_upgrade/relfilenode.c
+++ b/contrib/pg_upgrade/relfilenode.c
@@ -17,9 +17,8 @@
static void transfer_single_new_db(pageCnvCtx *pageConverter,
FileNameMap *maps, int size);
-static void transfer_relfile(pageCnvCtx *pageConverter,
- const char *fromfile, const char *tofile,
- const char *nspname, const char *relname);
+static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
+ const char *suffix);
/*
@@ -131,55 +130,21 @@ static void
transfer_single_new_db(pageCnvCtx *pageConverter,
FileNameMap *maps, int size)
{
- char old_dir[MAXPGPATH];
- char file_pattern[MAXPGPATH];
- char **namelist = NULL;
- int numFiles = 0;
int mapnum;
- int fileno;
- bool vm_crashsafe_change = false;
-
- old_dir[0] = '\0';
-
- /* Do not copy non-crashsafe vm files for binaries that assume crashsafety */
+ bool vm_crashsafe_match = true;
+
+ /*
+ * Do the old and new cluster disagree on the crash-safetiness of the vm
+ * files? If so, do not copy them.
+ */
if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_CRASHSAFE_CAT_VER &&
new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER)
- vm_crashsafe_change = true;
+ vm_crashsafe_match = false;
for (mapnum = 0; mapnum < size; mapnum++)
{
- char old_file[MAXPGPATH];
- char new_file[MAXPGPATH];
-
- /* Changed tablespaces? Need a new directory scan? */
- if (strcmp(maps[mapnum].old_dir, old_dir) != 0)
- {
- if (numFiles > 0)
- {
- for (fileno = 0; fileno < numFiles; fileno++)
- pg_free(namelist[fileno]);
- pg_free(namelist);
- }
-
- snprintf(old_dir, sizeof(old_dir), "%s", maps[mapnum].old_dir);
- numFiles = load_directory(old_dir, &namelist);
- }
-
- /* Copying files might take some time, so give feedback. */
-
- snprintf(old_file, sizeof(old_file), "%s/%u", maps[mapnum].old_dir,
- maps[mapnum].old_relfilenode);
- snprintf(new_file, sizeof(new_file), "%s/%u", maps[mapnum].new_dir,
- maps[mapnum].new_relfilenode);
- pg_log(PG_REPORT, OVERWRITE_MESSAGE, old_file);
-
- /*
- * Copy/link the relation's primary file (segment 0 of main fork)
- * to the new cluster
- */
- unlink(new_file);
- transfer_relfile(pageConverter, old_file, new_file,
- maps[mapnum].nspname, maps[mapnum].relname);
+ /* transfer primary file */
+ transfer_relfile(pageConverter, &maps[mapnum], "");
/* fsm/vm files added in PG 8.4 */
if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -187,67 +152,11 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
/*
* Copy/link any fsm and vm files, if they exist
*/
- snprintf(file_pattern, sizeof(file_pattern), "%u_",
- maps[mapnum].old_relfilenode);
-
- for (fileno = 0; fileno < numFiles; fileno++)
- {
- char *vm_offset = strstr(namelist[fileno], "_vm");
- bool is_vm_file = false;
-
- /* Is a visibility map file? (name ends with _vm) */
- if (vm_offset && strlen(vm_offset) == strlen("_vm"))
- is_vm_file = true;
-
- if (strncmp(namelist[fileno], file_pattern,
- strlen(file_pattern)) == 0 &&
- (!is_vm_file || !vm_crashsafe_change))
- {
- snprintf(old_file, sizeof(old_file), "%s/%s", maps[mapnum].old_dir,
- namelist[fileno]);
- snprintf(new_file, sizeof(new_file), "%s/%u%s", maps[mapnum].new_dir,
- maps[mapnum].new_relfilenode, strchr(namelist[fileno], '_'));
-
- unlink(new_file);
- transfer_relfile(pageConverter, old_file, new_file,
- maps[mapnum].nspname, maps[mapnum].relname);
- }
- }
- }
-
- /*
- * Now copy/link any related segments as well. Remember, PG breaks
- * large files into 1GB segments, the first segment has no extension,
- * subsequent segments are named relfilenode.1, relfilenode.2,
- * relfilenode.3, ... 'fsm' and 'vm' files use underscores so are not
- * copied.
- */
- snprintf(file_pattern, sizeof(file_pattern), "%u.",
- maps[mapnum].old_relfilenode);
-
- for (fileno = 0; fileno < numFiles; fileno++)
- {
- if (strncmp(namelist[fileno], file_pattern,
- strlen(file_pattern)) == 0)
- {
- snprintf(old_file, sizeof(old_file), "%s/%s", maps[mapnum].old_dir,
- namelist[fileno]);
- snprintf(new_file, sizeof(new_file), "%s/%u%s", maps[mapnum].new_dir,
- maps[mapnum].new_relfilenode, strchr(namelist[fileno], '.'));
-
- unlink(new_file);
- transfer_relfile(pageConverter, old_file, new_file,
- maps[mapnum].nspname, maps[mapnum].relname);
- }
+ transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
+ if (vm_crashsafe_match)
+ transfer_relfile(pageConverter, &maps[mapnum], "_vm");
}
}
-
- if (numFiles > 0)
- {
- for (fileno = 0; fileno < numFiles; fileno++)
- pg_free(namelist[fileno]);
- pg_free(namelist);
- }
}
@@ -257,31 +166,78 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
* Copy or link file from old cluster to new one.
*/
static void
-transfer_relfile(pageCnvCtx *pageConverter, const char *old_file,
- const char *new_file, const char *nspname, const char *relname)
+transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
+ const char *type_suffix)
{
const char *msg;
-
- if ((user_opts.transfer_mode == TRANSFER_MODE_LINK) && (pageConverter != NULL))
- pg_log(PG_FATAL, "This upgrade requires page-by-page conversion, "
- "you must use copy mode instead of link mode.\n");
-
- if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
+ char old_file[MAXPGPATH];
+ char new_file[MAXPGPATH];
+ int fd;
+ int segno;
+ char extent_suffix[65];
+
+ /*
+ * Now copy/link any related segments as well. Remember, PG breaks
+ * large files into 1GB segments, the first segment has no extension,
+ * subsequent segments are named relfilenode.1, relfilenode.2,
+ * relfilenode.3.
+ * copied.
+ */
+ for (segno = 0;; segno++)
{
- pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
+ if (segno == 0)
+ extent_suffix[0] = '\0';
+ else
+ snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);
+
+ snprintf(old_file, sizeof(old_file), "%s/%u%s%s", map->old_dir,
+ map->old_relfilenode, type_suffix, extent_suffix);
+ snprintf(new_file, sizeof(new_file), "%s/%u%s%s", map->new_dir,
+ map->new_relfilenode, type_suffix, extent_suffix);
+
+ /* Is it an extent, fsm, or vm file? */
+ if (type_suffix[0] != '\0' || segno != 0)
+ {
+ /* Did file open fail? */
+ if ((fd = open(old_file, O_RDONLY)) == -1)
+ {
+ /* File does not exist? That's OK, just return */
+ if (errno == ENOENT)
+ return;
+ else
+ pg_log(PG_FATAL, "non-existant file error while copying relation \"%s.%s\" (\"%s\" to \"%s\")\n",
+ map->nspname, map->relname, old_file, new_file);
+ }
+ close(fd);
+ }
- if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL)
- pg_log(PG_FATAL, "error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
- nspname, relname, old_file, new_file, msg);
- }
- else
- {
- pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file);
+ unlink(new_file);
+
+ /* Copying files might take some time, so give feedback. */
+ pg_log(PG_REPORT, OVERWRITE_MESSAGE, old_file);
+
+ if ((user_opts.transfer_mode == TRANSFER_MODE_LINK) && (pageConverter != NULL))
+ pg_log(PG_FATAL, "This upgrade requires page-by-page conversion, "
+ "you must use copy mode instead of link mode.\n");
+
+ if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
+ {
+ pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
+
+ if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL)
+ pg_log(PG_FATAL, "error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
+ map->nspname, map->relname, old_file, new_file, msg);
+ }
+ else
+ {
+ pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file);
+
+ if ((msg = linkAndUpdateFile(pageConverter, old_file, new_file)) != NULL)
+ pg_log(PG_FATAL,
+ "error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
+ map->nspname, map->relname, old_file, new_file, msg);
+ }
+ }
- if ((msg = linkAndUpdateFile(pageConverter, old_file, new_file)) != NULL)
- pg_log(PG_FATAL,
- "error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
- nspname, relname, old_file, new_file, msg);
- }
return;
}