diff options
Diffstat (limited to 'src/bin/pg_rewind/filemap.c')
-rw-r--r-- | src/bin/pg_rewind/filemap.c | 294 |
1 files changed, 134 insertions, 160 deletions
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index d756c28ca8a..314b064b223 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -3,6 +3,19 @@ * filemap.c * A data structure for keeping track of files that have changed. * + * This source file contains the logic to decide what to do with different + * kinds of files, and the data structure to support it. Before modifying + * anything, pg_rewind collects information about all the files and their + * attributes in the target and source data directories. It also scans the + * WAL log in the target, and collects information about data blocks that + * were changed. All this information is stored in a hash table, using the + * file path relative to the root of the data directory as the key. + * + * After collecting all the information required, the decide_file_actions() + * function scans the hash table and decides what action needs to be taken + * for each file. Finally, it sorts the array to the final order that the + * actions should be executed in. + * * Copyright (c) 2013-2020, PostgreSQL Global Development Group * *------------------------------------------------------------------------- @@ -14,22 +27,41 @@ #include <unistd.h> #include "catalog/pg_tablespace_d.h" +#include "common/hashfn.h" #include "common/string.h" #include "datapagemap.h" #include "filemap.h" #include "pg_rewind.h" #include "storage/fd.h" -filemap_t *filemap = NULL; +/* + * Define a hash table which we can use to store information about the files + * appearing in source and target systems. + */ +static uint32 hash_string_pointer(const char *s); +#define SH_PREFIX filehash +#define SH_ELEMENT_TYPE file_entry_t +#define SH_KEY_TYPE const char * +#define SH_KEY path +#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) +#define SH_SCOPE static inline +#define SH_RAW_ALLOCATOR pg_malloc0 +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +#define FILEHASH_INITIAL_SIZE 1000 + +static filehash_hash *filehash; static bool isRelDataFile(const char *path); static char *datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno); -static int path_cmp(const void *a, const void *b); -static file_entry_t *get_filemap_entry(const char *path, bool create); +static file_entry_t *insert_filehash_entry(const char *path); +static file_entry_t *lookup_filehash_entry(const char *path); static int final_filemap_cmp(const void *a, const void *b); -static void filemap_list_to_array(filemap_t *map); static bool check_file_excluded(const char *path, bool is_source); /* @@ -131,54 +163,26 @@ static const struct exclude_list_item excludeFiles[] = }; /* - * Create a new file map (stored in the global pointer "filemap"). + * Initialize the hash table for the file map. */ void -filemap_create(void) +filehash_init(void) { - filemap_t *map; - - map = pg_malloc(sizeof(filemap_t)); - map->first = map->last = NULL; - map->nlist = 0; - map->array = NULL; - map->narray = 0; - - Assert(filemap == NULL); - filemap = map; + filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL); } -/* Look up or create entry for 'path' */ +/* Look up entry for 'path', creating a new one if it doesn't exist */ static file_entry_t * -get_filemap_entry(const char *path, bool create) +insert_filehash_entry(const char *path) { - filemap_t *map = filemap; file_entry_t *entry; - file_entry_t **e; - file_entry_t key; - file_entry_t *key_ptr; - - if (map->array) - { - key.path = (char *) path; - key_ptr = &key; - e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *), - path_cmp); - } - else - e = NULL; + bool found; - if (e) - entry = *e; - else if (!create) - entry = NULL; - else + entry = filehash_insert(filehash, path, &found); + if (!found) { - /* Create a new entry for this file */ - entry = pg_malloc(sizeof(file_entry_t)); entry->path = pg_strdup(path); entry->isrelfile = isRelDataFile(path); - entry->action = FILE_ACTION_UNDECIDED; entry->target_exists = false; entry->target_type = FILE_TYPE_UNDEFINED; @@ -192,21 +196,18 @@ get_filemap_entry(const char *path, bool create) entry->source_size = 0; entry->source_link_target = NULL; - entry->next = NULL; - - if (map->last) - { - map->last->next = entry; - map->last = entry; - } - else - map->first = map->last = entry; - map->nlist++; + entry->action = FILE_ACTION_UNDECIDED; } return entry; } +static file_entry_t * +lookup_filehash_entry(const char *path) +{ + return filehash_lookup(filehash, path); +} + /* * Callback for processing source file list. * @@ -220,8 +221,6 @@ process_source_file(const char *path, file_type_t type, size_t size, { file_entry_t *entry; - Assert(filemap->array == NULL); - /* * Pretend that pg_wal is a directory, even if it's really a symlink. We * don't want to mess with the symlink itself, nor complain if it's a @@ -238,7 +237,9 @@ process_source_file(const char *path, file_type_t type, size_t size, pg_fatal("data file \"%s\" in source is not a regular file", path); /* Remember this source file */ - entry = get_filemap_entry(path, true); + entry = insert_filehash_entry(path); + if (entry->source_exists) + pg_fatal("duplicate source file \"%s\"", path); entry->source_exists = true; entry->source_type = type; entry->source_size = size; @@ -248,15 +249,12 @@ process_source_file(const char *path, file_type_t type, size_t size, /* * Callback for processing target file list. * - * All source files must be already processed before calling this. We record - * the type and size of file, so that decide_file_action() can later decide - * what to do with it. + * Record the type and size of the file, like process_source_file() does. */ void process_target_file(const char *path, file_type_t type, size_t size, const char *link_target) { - filemap_t *map = filemap; file_entry_t *entry; /* @@ -264,21 +262,6 @@ process_target_file(const char *path, file_type_t type, size_t size, * from the target data folder all paths which have been filtered out from * the source data folder when processing the source files. */ - if (map->array == NULL) - { - /* on first call, initialize lookup array */ - if (map->nlist == 0) - { - /* should not happen */ - pg_fatal("source file list is empty"); - } - - filemap_list_to_array(map); - - Assert(map->array != NULL); - - qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp); - } /* * Like in process_source_file, pretend that pg_wal is always a directory. @@ -287,7 +270,9 @@ process_target_file(const char *path, file_type_t type, size_t size, type = FILE_TYPE_DIRECTORY; /* Remember this target file */ - entry = get_filemap_entry(path, true); + entry = insert_filehash_entry(path); + if (entry->target_exists) + pg_fatal("duplicate source file \"%s\"", path); entry->target_exists = true; entry->target_type = type; entry->target_size = size; @@ -301,7 +286,7 @@ process_target_file(const char *path, file_type_t type, size_t size, * if so, records it in 'target_pages_to_overwrite' bitmap. * * NOTE: All the files on both systems must have already been added to the - * file map! + * hash table! */ void process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode, @@ -312,47 +297,45 @@ process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno_inseg; int segno; - Assert(filemap->array); - segno = blkno / RELSEG_SIZE; blkno_inseg = blkno % RELSEG_SIZE; path = datasegpath(rnode, forknum, segno); - entry = get_filemap_entry(path, false); + entry = lookup_filehash_entry(path); pfree(path); + /* + * If the block still exists in both systems, remember it. Otherwise we + * can safely ignore it. + * + * If the block is beyond the EOF in the source system, or the file + * doesn't exist in the source at all, we're going to truncate/remove it + * away from the target anyway. Likewise, if it doesn't exist in the + * target anymore, we will copy it over with the "tail" from the source + * system, anyway. + * + * It is possible to find WAL for a file that doesn't exist on either + * system anymore. It means that the relation was dropped later in the + * target system, and independently on the source system too, or that it + * was created and dropped in the target system and it never existed in + * the source. Either way, we can safely ignore it. + */ if (entry) { - int64 end_offset; - Assert(entry->isrelfile); if (entry->target_type != FILE_TYPE_REGULAR) pg_fatal("unexpected page modification for non-regular file \"%s\"", entry->path); - /* - * If the block beyond the EOF in the source system, no need to - * remember it now, because we're going to truncate it away from the - * target anyway. Also no need to remember the block if it's beyond - * the current EOF in the target system; we will copy it over with the - * "tail" from the source system, anyway. - */ - end_offset = (blkno_inseg + 1) * BLCKSZ; - if (end_offset <= entry->source_size && - end_offset <= entry->target_size) - datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg); - } - else - { - /* - * If we don't have any record of this file in the file map, it means - * that it's a relation that doesn't exist in the source system. It - * could exist in the target system; we haven't moved the target-only - * entries from the linked list to the array yet! But in any case, if - * it doesn't exist in the source it will be removed from the target - * too, and we can safely ignore it. - */ + if (entry->target_exists && entry->source_exists) + { + off_t end_offset; + + end_offset = (blkno_inseg + 1) * BLCKSZ; + if (end_offset <= entry->source_size && end_offset <= entry->target_size) + datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg); + } } } @@ -423,34 +406,6 @@ check_file_excluded(const char *path, bool is_source) return false; } -/* - * Convert the linked list of entries in map->first/last to the array, - * map->array. - */ -static void -filemap_list_to_array(filemap_t *map) -{ - int narray; - file_entry_t *entry, - *next; - - map->array = (file_entry_t **) - pg_realloc(map->array, - (map->nlist + map->narray) * sizeof(file_entry_t *)); - - narray = map->narray; - for (entry = map->first; entry != NULL; entry = next) - { - map->array[narray++] = entry; - next = entry->next; - entry->next = NULL; - } - Assert(narray == map->nlist + map->narray); - map->narray = narray; - map->nlist = 0; - map->first = map->last = NULL; -} - static const char * action_to_str(file_action_t action) { @@ -478,32 +433,31 @@ action_to_str(file_action_t action) * Calculate the totals needed for progress reports. */ void -calculate_totals(void) +calculate_totals(filemap_t *filemap) { file_entry_t *entry; int i; - filemap_t *map = filemap; - map->total_size = 0; - map->fetch_size = 0; + filemap->total_size = 0; + filemap->fetch_size = 0; - for (i = 0; i < map->narray; i++) + for (i = 0; i < filemap->nentries; i++) { - entry = map->array[i]; + entry = filemap->entries[i]; if (entry->source_type != FILE_TYPE_REGULAR) continue; - map->total_size += entry->source_size; + filemap->total_size += entry->source_size; if (entry->action == FILE_ACTION_COPY) { - map->fetch_size += entry->source_size; + filemap->fetch_size += entry->source_size; continue; } if (entry->action == FILE_ACTION_COPY_TAIL) - map->fetch_size += (entry->source_size - entry->target_size); + filemap->fetch_size += (entry->source_size - entry->target_size); if (entry->target_pages_to_overwrite.bitmapsize > 0) { @@ -512,7 +466,7 @@ calculate_totals(void) iter = datapagemap_iterate(&entry->target_pages_to_overwrite); while (datapagemap_next(iter, &blk)) - map->fetch_size += BLCKSZ; + filemap->fetch_size += BLCKSZ; pg_free(iter); } @@ -520,15 +474,14 @@ calculate_totals(void) } void -print_filemap(void) +print_filemap(filemap_t *filemap) { - filemap_t *map = filemap; file_entry_t *entry; int i; - for (i = 0; i < map->narray; i++) + for (i = 0; i < filemap->nentries; i++) { - entry = map->array[i]; + entry = filemap->entries[i]; if (entry->action != FILE_ACTION_NONE || entry->target_pages_to_overwrite.bitmapsize > 0) { @@ -650,15 +603,6 @@ datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) return path; } -static int -path_cmp(const void *a, const void *b) -{ - file_entry_t *fa = *((file_entry_t **) a); - file_entry_t *fb = *((file_entry_t **) b); - - return strcmp(fa->path, fb->path); -} - /* * In the final stage, the filemap is sorted so that removals come last. * From disk space usage point of view, it would be better to do removals @@ -834,22 +778,52 @@ decide_file_action(file_entry_t *entry) /* * Decide what to do with each file. + * + * Returns a 'filemap' with the entries in the order that their actions + * should be executed. */ -void +filemap_t * decide_file_actions(void) { int i; + filehash_iterator it; + file_entry_t *entry; + filemap_t *filemap; - filemap_list_to_array(filemap); - - for (i = 0; i < filemap->narray; i++) + filehash_start_iterate(filehash, &it); + while ((entry = filehash_iterate(filehash, &it)) != NULL) { - file_entry_t *entry = filemap->array[i]; - entry->action = decide_file_action(entry); } - /* Sort the actions to the order that they should be performed */ - qsort(filemap->array, filemap->narray, sizeof(file_entry_t *), + /* + * Turn the hash table into an array, and sort in the order that the + * actions should be performed. + */ + filemap = pg_malloc(offsetof(filemap_t, entries) + + filehash->members * sizeof(file_entry_t *)); + filemap->nentries = filehash->members; + filehash_start_iterate(filehash, &it); + i = 0; + while ((entry = filehash_iterate(filehash, &it)) != NULL) + { + filemap->entries[i++] = entry; + } + + qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *), final_filemap_cmp); + + return filemap; +} + + +/* + * Helper function for filemap hash table. + */ +static uint32 +hash_string_pointer(const char *s) +{ + unsigned char *ss = (unsigned char *) s; + + return hash_bytes(ss, strlen(s)); } |