aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2020-11-04 11:21:14 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2020-11-04 11:21:14 +0200
commitf81e97d0475cd4bc597adc23b665bd84fbf79a0d (patch)
tree1d5c24adced36b141110fa0fd29592f317991773
parenteb00f1d4bf96bdba236bcc089f3ae94db9b7c603 (diff)
downloadpostgresql-f81e97d0475cd4bc597adc23b665bd84fbf79a0d.tar.gz
postgresql-f81e97d0475cd4bc597adc23b665bd84fbf79a0d.zip
pg_rewind: Replace the hybrid list+array data structure with simplehash.
Now that simplehash can be used in frontend code, let's make use of it. Reviewed-by: Kyotaro Horiguchi, Soumyadeep Chakraborty Discussion: https://www.postgresql.org/message-id/0c5b3783-af52-3ee5-f8fa-6e794061f70d%40iki.fi
-rw-r--r--src/bin/pg_rewind/copy_fetch.c4
-rw-r--r--src/bin/pg_rewind/fetch.c2
-rw-r--r--src/bin/pg_rewind/fetch.h2
-rw-r--r--src/bin/pg_rewind/filemap.c294
-rw-r--r--src/bin/pg_rewind/filemap.h67
-rw-r--r--src/bin/pg_rewind/libpq_fetch.c4
-rw-r--r--src/bin/pg_rewind/pg_rewind.c14
7 files changed, 176 insertions, 211 deletions
diff --git a/src/bin/pg_rewind/copy_fetch.c b/src/bin/pg_rewind/copy_fetch.c
index e4b8ce6aaf4..1cd4449314d 100644
--- a/src/bin/pg_rewind/copy_fetch.c
+++ b/src/bin/pg_rewind/copy_fetch.c
@@ -207,9 +207,9 @@ copy_executeFileMap(filemap_t *map)
file_entry_t *entry;
int i;
- for (i = 0; i < map->narray; i++)
+ for (i = 0; i < map->nentries; i++)
{
- entry = map->array[i];
+ entry = map->entries[i];
execute_pagemap(&entry->target_pages_to_overwrite, entry->path);
switch (entry->action)
diff --git a/src/bin/pg_rewind/fetch.c b/src/bin/pg_rewind/fetch.c
index f18fe5386ed..f41d0f295ea 100644
--- a/src/bin/pg_rewind/fetch.c
+++ b/src/bin/pg_rewind/fetch.c
@@ -37,7 +37,7 @@ fetchSourceFileList(void)
* Fetch all relation data files that are marked in the given data page map.
*/
void
-executeFileMap(void)
+execute_file_actions(filemap_t *filemap)
{
if (datadir_source)
copy_executeFileMap(filemap);
diff --git a/src/bin/pg_rewind/fetch.h b/src/bin/pg_rewind/fetch.h
index 7cf8b6ea090..b20df8b1537 100644
--- a/src/bin/pg_rewind/fetch.h
+++ b/src/bin/pg_rewind/fetch.h
@@ -25,7 +25,7 @@
*/
extern void fetchSourceFileList(void);
extern char *fetchFile(const char *filename, size_t *filesize);
-extern void executeFileMap(void);
+extern void execute_file_actions(filemap_t *filemap);
/* in libpq_fetch.c */
extern void libpqProcessFileList(void);
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index d756c28ca8a..314b064b223 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -3,6 +3,19 @@
* filemap.c
* A data structure for keeping track of files that have changed.
*
+ * This source file contains the logic to decide what to do with different
+ * kinds of files, and the data structure to support it. Before modifying
+ * anything, pg_rewind collects information about all the files and their
+ * attributes in the target and source data directories. It also scans the
+ * WAL log in the target, and collects information about data blocks that
+ * were changed. All this information is stored in a hash table, using the
+ * file path relative to the root of the data directory as the key.
+ *
+ * After collecting all the information required, the decide_file_actions()
+ * function scans the hash table and decides what action needs to be taken
+ * for each file. Finally, it sorts the array to the final order that the
+ * actions should be executed in.
+ *
* Copyright (c) 2013-2020, PostgreSQL Global Development Group
*
*-------------------------------------------------------------------------
@@ -14,22 +27,41 @@
#include <unistd.h>
#include "catalog/pg_tablespace_d.h"
+#include "common/hashfn.h"
#include "common/string.h"
#include "datapagemap.h"
#include "filemap.h"
#include "pg_rewind.h"
#include "storage/fd.h"
-filemap_t *filemap = NULL;
+/*
+ * Define a hash table which we can use to store information about the files
+ * appearing in source and target systems.
+ */
+static uint32 hash_string_pointer(const char *s);
+#define SH_PREFIX filehash
+#define SH_ELEMENT_TYPE file_entry_t
+#define SH_KEY_TYPE const char *
+#define SH_KEY path
+#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
+#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_RAW_ALLOCATOR pg_malloc0
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+#define FILEHASH_INITIAL_SIZE 1000
+
+static filehash_hash *filehash;
static bool isRelDataFile(const char *path);
static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
BlockNumber segno);
-static int path_cmp(const void *a, const void *b);
-static file_entry_t *get_filemap_entry(const char *path, bool create);
+static file_entry_t *insert_filehash_entry(const char *path);
+static file_entry_t *lookup_filehash_entry(const char *path);
static int final_filemap_cmp(const void *a, const void *b);
-static void filemap_list_to_array(filemap_t *map);
static bool check_file_excluded(const char *path, bool is_source);
/*
@@ -131,54 +163,26 @@ static const struct exclude_list_item excludeFiles[] =
};
/*
- * Create a new file map (stored in the global pointer "filemap").
+ * Initialize the hash table for the file map.
*/
void
-filemap_create(void)
+filehash_init(void)
{
- filemap_t *map;
-
- map = pg_malloc(sizeof(filemap_t));
- map->first = map->last = NULL;
- map->nlist = 0;
- map->array = NULL;
- map->narray = 0;
-
- Assert(filemap == NULL);
- filemap = map;
+ filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
}
-/* Look up or create entry for 'path' */
+/* Look up entry for 'path', creating a new one if it doesn't exist */
static file_entry_t *
-get_filemap_entry(const char *path, bool create)
+insert_filehash_entry(const char *path)
{
- filemap_t *map = filemap;
file_entry_t *entry;
- file_entry_t **e;
- file_entry_t key;
- file_entry_t *key_ptr;
-
- if (map->array)
- {
- key.path = (char *) path;
- key_ptr = &key;
- e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
- path_cmp);
- }
- else
- e = NULL;
+ bool found;
- if (e)
- entry = *e;
- else if (!create)
- entry = NULL;
- else
+ entry = filehash_insert(filehash, path, &found);
+ if (!found)
{
- /* Create a new entry for this file */
- entry = pg_malloc(sizeof(file_entry_t));
entry->path = pg_strdup(path);
entry->isrelfile = isRelDataFile(path);
- entry->action = FILE_ACTION_UNDECIDED;
entry->target_exists = false;
entry->target_type = FILE_TYPE_UNDEFINED;
@@ -192,21 +196,18 @@ get_filemap_entry(const char *path, bool create)
entry->source_size = 0;
entry->source_link_target = NULL;
- entry->next = NULL;
-
- if (map->last)
- {
- map->last->next = entry;
- map->last = entry;
- }
- else
- map->first = map->last = entry;
- map->nlist++;
+ entry->action = FILE_ACTION_UNDECIDED;
}
return entry;
}
+static file_entry_t *
+lookup_filehash_entry(const char *path)
+{
+ return filehash_lookup(filehash, path);
+}
+
/*
* Callback for processing source file list.
*
@@ -220,8 +221,6 @@ process_source_file(const char *path, file_type_t type, size_t size,
{
file_entry_t *entry;
- Assert(filemap->array == NULL);
-
/*
* Pretend that pg_wal is a directory, even if it's really a symlink. We
* don't want to mess with the symlink itself, nor complain if it's a
@@ -238,7 +237,9 @@ process_source_file(const char *path, file_type_t type, size_t size,
pg_fatal("data file \"%s\" in source is not a regular file", path);
/* Remember this source file */
- entry = get_filemap_entry(path, true);
+ entry = insert_filehash_entry(path);
+ if (entry->source_exists)
+ pg_fatal("duplicate source file \"%s\"", path);
entry->source_exists = true;
entry->source_type = type;
entry->source_size = size;
@@ -248,15 +249,12 @@ process_source_file(const char *path, file_type_t type, size_t size,
/*
* Callback for processing target file list.
*
- * All source files must be already processed before calling this. We record
- * the type and size of file, so that decide_file_action() can later decide
- * what to do with it.
+ * Record the type and size of the file, like process_source_file() does.
*/
void
process_target_file(const char *path, file_type_t type, size_t size,
const char *link_target)
{
- filemap_t *map = filemap;
file_entry_t *entry;
/*
@@ -264,21 +262,6 @@ process_target_file(const char *path, file_type_t type, size_t size,
* from the target data folder all paths which have been filtered out from
* the source data folder when processing the source files.
*/
- if (map->array == NULL)
- {
- /* on first call, initialize lookup array */
- if (map->nlist == 0)
- {
- /* should not happen */
- pg_fatal("source file list is empty");
- }
-
- filemap_list_to_array(map);
-
- Assert(map->array != NULL);
-
- qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
- }
/*
* Like in process_source_file, pretend that pg_wal is always a directory.
@@ -287,7 +270,9 @@ process_target_file(const char *path, file_type_t type, size_t size,
type = FILE_TYPE_DIRECTORY;
/* Remember this target file */
- entry = get_filemap_entry(path, true);
+ entry = insert_filehash_entry(path);
+ if (entry->target_exists)
+ pg_fatal("duplicate source file \"%s\"", path);
entry->target_exists = true;
entry->target_type = type;
entry->target_size = size;
@@ -301,7 +286,7 @@ process_target_file(const char *path, file_type_t type, size_t size,
* if so, records it in 'target_pages_to_overwrite' bitmap.
*
* NOTE: All the files on both systems must have already been added to the
- * file map!
+ * hash table!
*/
void
process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
@@ -312,47 +297,45 @@ process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
BlockNumber blkno_inseg;
int segno;
- Assert(filemap->array);
-
segno = blkno / RELSEG_SIZE;
blkno_inseg = blkno % RELSEG_SIZE;
path = datasegpath(rnode, forknum, segno);
- entry = get_filemap_entry(path, false);
+ entry = lookup_filehash_entry(path);
pfree(path);
+ /*
+ * If the block still exists in both systems, remember it. Otherwise we
+ * can safely ignore it.
+ *
+ * If the block is beyond the EOF in the source system, or the file
+ * doesn't exist in the source at all, we're going to truncate/remove it
+ * away from the target anyway. Likewise, if it doesn't exist in the
+ * target anymore, we will copy it over with the "tail" from the source
+ * system, anyway.
+ *
+ * It is possible to find WAL for a file that doesn't exist on either
+ * system anymore. It means that the relation was dropped later in the
+ * target system, and independently on the source system too, or that it
+ * was created and dropped in the target system and it never existed in
+ * the source. Either way, we can safely ignore it.
+ */
if (entry)
{
- int64 end_offset;
-
Assert(entry->isrelfile);
if (entry->target_type != FILE_TYPE_REGULAR)
pg_fatal("unexpected page modification for non-regular file \"%s\"",
entry->path);
- /*
- * If the block beyond the EOF in the source system, no need to
- * remember it now, because we're going to truncate it away from the
- * target anyway. Also no need to remember the block if it's beyond
- * the current EOF in the target system; we will copy it over with the
- * "tail" from the source system, anyway.
- */
- end_offset = (blkno_inseg + 1) * BLCKSZ;
- if (end_offset <= entry->source_size &&
- end_offset <= entry->target_size)
- datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
- }
- else
- {
- /*
- * If we don't have any record of this file in the file map, it means
- * that it's a relation that doesn't exist in the source system. It
- * could exist in the target system; we haven't moved the target-only
- * entries from the linked list to the array yet! But in any case, if
- * it doesn't exist in the source it will be removed from the target
- * too, and we can safely ignore it.
- */
+ if (entry->target_exists && entry->source_exists)
+ {
+ off_t end_offset;
+
+ end_offset = (blkno_inseg + 1) * BLCKSZ;
+ if (end_offset <= entry->source_size && end_offset <= entry->target_size)
+ datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
+ }
}
}
@@ -423,34 +406,6 @@ check_file_excluded(const char *path, bool is_source)
return false;
}
-/*
- * Convert the linked list of entries in map->first/last to the array,
- * map->array.
- */
-static void
-filemap_list_to_array(filemap_t *map)
-{
- int narray;
- file_entry_t *entry,
- *next;
-
- map->array = (file_entry_t **)
- pg_realloc(map->array,
- (map->nlist + map->narray) * sizeof(file_entry_t *));
-
- narray = map->narray;
- for (entry = map->first; entry != NULL; entry = next)
- {
- map->array[narray++] = entry;
- next = entry->next;
- entry->next = NULL;
- }
- Assert(narray == map->nlist + map->narray);
- map->narray = narray;
- map->nlist = 0;
- map->first = map->last = NULL;
-}
-
static const char *
action_to_str(file_action_t action)
{
@@ -478,32 +433,31 @@ action_to_str(file_action_t action)
* Calculate the totals needed for progress reports.
*/
void
-calculate_totals(void)
+calculate_totals(filemap_t *filemap)
{
file_entry_t *entry;
int i;
- filemap_t *map = filemap;
- map->total_size = 0;
- map->fetch_size = 0;
+ filemap->total_size = 0;
+ filemap->fetch_size = 0;
- for (i = 0; i < map->narray; i++)
+ for (i = 0; i < filemap->nentries; i++)
{
- entry = map->array[i];
+ entry = filemap->entries[i];
if (entry->source_type != FILE_TYPE_REGULAR)
continue;
- map->total_size += entry->source_size;
+ filemap->total_size += entry->source_size;
if (entry->action == FILE_ACTION_COPY)
{
- map->fetch_size += entry->source_size;
+ filemap->fetch_size += entry->source_size;
continue;
}
if (entry->action == FILE_ACTION_COPY_TAIL)
- map->fetch_size += (entry->source_size - entry->target_size);
+ filemap->fetch_size += (entry->source_size - entry->target_size);
if (entry->target_pages_to_overwrite.bitmapsize > 0)
{
@@ -512,7 +466,7 @@ calculate_totals(void)
iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
while (datapagemap_next(iter, &blk))
- map->fetch_size += BLCKSZ;
+ filemap->fetch_size += BLCKSZ;
pg_free(iter);
}
@@ -520,15 +474,14 @@ calculate_totals(void)
}
void
-print_filemap(void)
+print_filemap(filemap_t *filemap)
{
- filemap_t *map = filemap;
file_entry_t *entry;
int i;
- for (i = 0; i < map->narray; i++)
+ for (i = 0; i < filemap->nentries; i++)
{
- entry = map->array[i];
+ entry = filemap->entries[i];
if (entry->action != FILE_ACTION_NONE ||
entry->target_pages_to_overwrite.bitmapsize > 0)
{
@@ -650,15 +603,6 @@ datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
return path;
}
-static int
-path_cmp(const void *a, const void *b)
-{
- file_entry_t *fa = *((file_entry_t **) a);
- file_entry_t *fb = *((file_entry_t **) b);
-
- return strcmp(fa->path, fb->path);
-}
-
/*
* In the final stage, the filemap is sorted so that removals come last.
* From disk space usage point of view, it would be better to do removals
@@ -834,22 +778,52 @@ decide_file_action(file_entry_t *entry)
/*
* Decide what to do with each file.
+ *
+ * Returns a 'filemap' with the entries in the order that their actions
+ * should be executed.
*/
-void
+filemap_t *
decide_file_actions(void)
{
int i;
+ filehash_iterator it;
+ file_entry_t *entry;
+ filemap_t *filemap;
- filemap_list_to_array(filemap);
-
- for (i = 0; i < filemap->narray; i++)
+ filehash_start_iterate(filehash, &it);
+ while ((entry = filehash_iterate(filehash, &it)) != NULL)
{
- file_entry_t *entry = filemap->array[i];
-
entry->action = decide_file_action(entry);
}
- /* Sort the actions to the order that they should be performed */
- qsort(filemap->array, filemap->narray, sizeof(file_entry_t *),
+ /*
+ * Turn the hash table into an array, and sort in the order that the
+ * actions should be performed.
+ */
+ filemap = pg_malloc(offsetof(filemap_t, entries) +
+ filehash->members * sizeof(file_entry_t *));
+ filemap->nentries = filehash->members;
+ filehash_start_iterate(filehash, &it);
+ i = 0;
+ while ((entry = filehash_iterate(filehash, &it)) != NULL)
+ {
+ filemap->entries[i++] = entry;
+ }
+
+ qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
final_filemap_cmp);
+
+ return filemap;
+}
+
+
+/*
+ * Helper function for filemap hash table.
+ */
+static uint32
+hash_string_pointer(const char *s)
+{
+ unsigned char *ss = (unsigned char *) s;
+
+ return hash_bytes(ss, strlen(s));
}
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index 3d423558734..6f03447d7eb 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -12,15 +12,6 @@
#include "storage/block.h"
#include "storage/relfilenode.h"
-/*
- * For every file found in the local or remote system, we have a file entry
- * that contains information about the file on both systems. For relation
- * files, there is also a page map that marks pages in the file that were
- * changed in the target after the last common checkpoint. Each entry also
- * contains an 'action' field, which says what we are going to do with the
- * file.
- */
-
/* these enum values are sorted in the order we want actions to be processed */
typedef enum
{
@@ -45,9 +36,21 @@ typedef enum
FILE_TYPE_SYMLINK
} file_type_t;
+/*
+ * For every file found in the local or remote system, we have a file entry
+ * that contains information about the file on both systems. For relation
+ * files, there is also a page map that marks pages in the file that were
+ * changed in the target after the last common checkpoint.
+ *
+ * When gathering information, these are kept in a hash table, private to
+ * filemap.c. decide_file_actions() fills in the 'action' field, sorts all
+ * the entries, and returns them in an array, ready for executing the actions.
+ */
typedef struct file_entry_t
{
- char *path;
+ uint32 status; /* hash status */
+
+ const char *path;
bool isrelfile; /* is it a relation data file? */
/*
@@ -76,44 +79,25 @@ typedef struct file_entry_t
* What will we do to the file?
*/
file_action_t action;
-
- struct file_entry_t *next;
} file_entry_t;
+/*
+ * This contains the final decisions on what to do with each file.
+ * 'entries' array contains an entry for each file, sorted in the order
+ * that their actions should executed.
+ */
typedef struct filemap_t
{
- /*
- * New entries are accumulated to a linked list, in process_source_file
- * and process_target_file.
- */
- file_entry_t *first;
- file_entry_t *last;
- int nlist; /* number of entries currently in list */
-
- /*
- * After processing all the remote files, the entries in the linked list
- * are moved to this array. After processing local files, too, all the
- * local entries are added to the array by decide_file_actions(), and
- * sorted in the final order. After decide_file_actions(), all the entries
- * are in the array, and the linked list is empty.
- */
- file_entry_t **array;
- int narray; /* current length of array */
-
- /*
- * Summary information.
- */
+ /* Summary information, filled by calculate_totals() */
uint64 total_size; /* total size of the source cluster */
uint64 fetch_size; /* number of bytes that needs to be copied */
-} filemap_t;
-extern filemap_t *filemap;
-
-extern void filemap_create(void);
-extern void calculate_totals(void);
-extern void print_filemap(void);
+ int nentries; /* size of 'entries' array */
+ file_entry_t *entries[FLEXIBLE_ARRAY_MEMBER];
+} filemap_t;
/* Functions for populating the filemap */
+extern void filehash_init(void);
extern void process_source_file(const char *path, file_type_t type,
size_t size, const char *link_target);
extern void process_target_file(const char *path, file_type_t type,
@@ -121,6 +105,9 @@ extern void process_target_file(const char *path, file_type_t type,
extern void process_target_wal_block_change(ForkNumber forknum,
RelFileNode rnode,
BlockNumber blkno);
-extern void decide_file_actions(void);
+
+extern filemap_t *decide_file_actions(void);
+extern void calculate_totals(filemap_t *filemap);
+extern void print_filemap(filemap_t *filemap);
#endif /* FILEMAP_H */
diff --git a/src/bin/pg_rewind/libpq_fetch.c b/src/bin/pg_rewind/libpq_fetch.c
index 2fc4a784bdb..16d451ae167 100644
--- a/src/bin/pg_rewind/libpq_fetch.c
+++ b/src/bin/pg_rewind/libpq_fetch.c
@@ -460,9 +460,9 @@ libpq_executeFileMap(filemap_t *map)
PQresultErrorMessage(res));
PQclear(res);
- for (i = 0; i < map->narray; i++)
+ for (i = 0; i < map->nentries; i++)
{
- entry = map->array[i];
+ entry = map->entries[i];
/* If this is a relation file, copy the modified blocks */
execute_pagemap(&entry->target_pages_to_overwrite, entry->path);
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 4760090d06e..574d7f7163b 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -129,6 +129,7 @@ main(int argc, char **argv)
TimeLineID endtli;
ControlFileData ControlFile_new;
bool writerecoveryconf = false;
+ filemap_t *filemap;
pg_logging_init(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
@@ -368,13 +369,16 @@ main(int argc, char **argv)
(uint32) (chkptrec >> 32), (uint32) chkptrec,
chkpttli);
+ /* Initialize the hash table to track the status of each file */
+ filehash_init();
+
/*
* Collect information about all files in the target and source systems.
*/
- filemap_create();
if (showprogress)
pg_log_info("reading source file list");
fetchSourceFileList();
+
if (showprogress)
pg_log_info("reading target file list");
traverse_datadir(datadir_target, &process_target_file);
@@ -395,13 +399,13 @@ main(int argc, char **argv)
* We have collected all information we need from both systems. Decide
* what to do with each file.
*/
- decide_file_actions();
+ filemap = decide_file_actions();
if (showprogress)
- calculate_totals();
+ calculate_totals(filemap);
/* this is too verbose even for verbose mode */
if (debug)
- print_filemap();
+ print_filemap(filemap);
/*
* Ok, we're ready to start copying things over.
@@ -421,7 +425,7 @@ main(int argc, char **argv)
* modified the target directory and there is no turning back!
*/
- executeFileMap();
+ execute_file_actions(filemap);
progress_report(true);