diff options
author | Robert Haas <rhaas@postgresql.org> | 2022-08-10 14:03:23 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2022-08-10 14:03:23 -0400 |
commit | a8c012869763c711abc9085f54b2a100b60a85fa (patch) | |
tree | 1a278296a8f719835afe477ffa43d89c29f0e43b /src/backend/replication/basebackup.c | |
parent | 309857f9c1825d0591579579bdde2a8c8bd3e491 (diff) | |
download | postgresql-a8c012869763c711abc9085f54b2a100b60a85fa.tar.gz postgresql-a8c012869763c711abc9085f54b2a100b60a85fa.zip |
Move basebackup code to new directory src/backend/backup
Reviewed by David Steele and Justin Pryzby
Discussion: http://postgr.es/m/CA+TgmoafqboATDSoXHz8VLrSwK_MDhjthK4hEpYjqf9_1Fmczw%40mail.gmail.com
Diffstat (limited to 'src/backend/replication/basebackup.c')
-rw-r--r-- | src/backend/replication/basebackup.c | 1829 |
1 files changed, 0 insertions, 1829 deletions
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c deleted file mode 100644 index deeddd09a9c..00000000000 --- a/src/backend/replication/basebackup.c +++ /dev/null @@ -1,1829 +0,0 @@ -/*------------------------------------------------------------------------- - * - * basebackup.c - * code for taking a base backup and streaming it to a standby - * - * Portions Copyright (c) 2010-2022, PostgreSQL Global Development Group - * - * IDENTIFICATION - * src/backend/replication/basebackup.c - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <sys/stat.h> -#include <unistd.h> -#include <time.h> - -#include "access/xlog_internal.h" /* for pg_backup_start/stop */ -#include "common/compression.h" -#include "common/file_perm.h" -#include "commands/defrem.h" -#include "lib/stringinfo.h" -#include "miscadmin.h" -#include "nodes/pg_list.h" -#include "pgstat.h" -#include "pgtar.h" -#include "port.h" -#include "postmaster/syslogger.h" -#include "replication/basebackup.h" -#include "replication/basebackup_sink.h" -#include "replication/basebackup_target.h" -#include "replication/backup_manifest.h" -#include "replication/walsender.h" -#include "replication/walsender_private.h" -#include "storage/bufpage.h" -#include "storage/checksum.h" -#include "storage/dsm_impl.h" -#include "storage/fd.h" -#include "storage/ipc.h" -#include "storage/reinit.h" -#include "utils/builtins.h" -#include "utils/ps_status.h" -#include "utils/relcache.h" -#include "utils/resowner.h" -#include "utils/timestamp.h" - -/* - * How much data do we want to send in one CopyData message? Note that - * this may also result in reading the underlying files in chunks of this - * size. - * - * NB: The buffer size is required to be a multiple of the system block - * size, so use that value instead if it's bigger than our preference. - */ -#define SINK_BUFFER_LENGTH Max(32768, BLCKSZ) - -typedef struct -{ - const char *label; - bool progress; - bool fastcheckpoint; - bool nowait; - bool includewal; - uint32 maxrate; - bool sendtblspcmapfile; - bool send_to_client; - bool use_copytblspc; - BaseBackupTargetHandle *target_handle; - backup_manifest_option manifest; - pg_compress_algorithm compression; - pg_compress_specification compression_specification; - pg_checksum_type manifest_checksum_type; -} basebackup_options; - -static int64 sendTablespace(bbsink *sink, char *path, char *oid, bool sizeonly, - struct backup_manifest_info *manifest); -static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, - List *tablespaces, bool sendtblspclinks, - backup_manifest_info *manifest, const char *spcoid); -static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, - struct stat *statbuf, bool missing_ok, Oid dboid, - backup_manifest_info *manifest, const char *spcoid); -static void sendFileWithContent(bbsink *sink, const char *filename, - const char *content, - backup_manifest_info *manifest); -static int64 _tarWriteHeader(bbsink *sink, const char *filename, - const char *linktarget, struct stat *statbuf, - bool sizeonly); -static void _tarWritePadding(bbsink *sink, int len); -static void convert_link_to_directory(const char *pathbuf, struct stat *statbuf); -static void perform_base_backup(basebackup_options *opt, bbsink *sink); -static void parse_basebackup_options(List *options, basebackup_options *opt); -static int compareWalFileNames(const ListCell *a, const ListCell *b); -static bool is_checksummed_file(const char *fullpath, const char *filename); -static int basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset, - const char *filename, bool partial_read_ok); - -/* Was the backup currently in-progress initiated in recovery mode? */ -static bool backup_started_in_recovery = false; - -/* Total number of checksum failures during base backup. */ -static long long int total_checksum_failures; - -/* Do not verify checksums. */ -static bool noverify_checksums = false; - -/* - * Definition of one element part of an exclusion list, used for paths part - * of checksum validation or base backups. "name" is the name of the file - * or path to check for exclusion. If "match_prefix" is true, any items - * matching the name as prefix are excluded. - */ -struct exclude_list_item -{ - const char *name; - bool match_prefix; -}; - -/* - * The contents of these directories are removed or recreated during server - * start so they are not included in backups. The directories themselves are - * kept and included as empty to preserve access permissions. - * - * Note: this list should be kept in sync with the filter lists in pg_rewind's - * filemap.c. - */ -static const char *const excludeDirContents[] = -{ - /* - * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped - * because extensions like pg_stat_statements store data there. - */ - PG_STAT_TMP_DIR, - - /* - * It is generally not useful to backup the contents of this directory - * even if the intention is to restore to another primary. See backup.sgml - * for a more detailed description. - */ - "pg_replslot", - - /* Contents removed on startup, see dsm_cleanup_for_mmap(). */ - PG_DYNSHMEM_DIR, - - /* Contents removed on startup, see AsyncShmemInit(). */ - "pg_notify", - - /* - * Old contents are loaded for possible debugging but are not required for - * normal operation, see SerialInit(). - */ - "pg_serial", - - /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */ - "pg_snapshots", - - /* Contents zeroed on startup, see StartupSUBTRANS(). */ - "pg_subtrans", - - /* end of list */ - NULL -}; - -/* - * List of files excluded from backups. - */ -static const struct exclude_list_item excludeFiles[] = -{ - /* Skip auto conf temporary file. */ - {PG_AUTOCONF_FILENAME ".tmp", false}, - - /* Skip current log file temporary file */ - {LOG_METAINFO_DATAFILE_TMP, false}, - - /* - * Skip relation cache because it is rebuilt on startup. This includes - * temporary files. - */ - {RELCACHE_INIT_FILENAME, true}, - - /* - * backup_label and tablespace_map should not exist in a running cluster - * capable of doing an online backup, but exclude them just in case. - */ - {BACKUP_LABEL_FILE, false}, - {TABLESPACE_MAP, false}, - - /* - * If there's a backup_manifest, it belongs to a backup that was used to - * start this server. It is *not* correct for this backup. Our - * backup_manifest is injected into the backup separately if users want - * it. - */ - {"backup_manifest", false}, - - {"postmaster.pid", false}, - {"postmaster.opts", false}, - - /* end of list */ - {NULL, false} -}; - -/* - * List of files excluded from checksum validation. - * - * Note: this list should be kept in sync with what pg_checksums.c - * includes. - */ -static const struct exclude_list_item noChecksumFiles[] = { - {"pg_control", false}, - {"pg_filenode.map", false}, - {"pg_internal.init", true}, - {"PG_VERSION", false}, -#ifdef EXEC_BACKEND - {"config_exec_params", true}, -#endif - {NULL, false} -}; - -/* - * Actually do a base backup for the specified tablespaces. - * - * This is split out mainly to avoid complaints about "variable might be - * clobbered by longjmp" from stupider versions of gcc. - */ -static void -perform_base_backup(basebackup_options *opt, bbsink *sink) -{ - bbsink_state state; - XLogRecPtr endptr; - TimeLineID endtli; - StringInfo labelfile; - StringInfo tblspc_map_file; - backup_manifest_info manifest; - - /* Initial backup state, insofar as we know it now. */ - state.tablespaces = NIL; - state.tablespace_num = 0; - state.bytes_done = 0; - state.bytes_total = 0; - state.bytes_total_is_valid = false; - - /* we're going to use a BufFile, so we need a ResourceOwner */ - Assert(CurrentResourceOwner == NULL); - CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup"); - - backup_started_in_recovery = RecoveryInProgress(); - - labelfile = makeStringInfo(); - tblspc_map_file = makeStringInfo(); - InitializeBackupManifest(&manifest, opt->manifest, - opt->manifest_checksum_type); - - total_checksum_failures = 0; - - basebackup_progress_wait_checkpoint(); - state.startptr = do_pg_backup_start(opt->label, opt->fastcheckpoint, - &state.starttli, - labelfile, &state.tablespaces, - tblspc_map_file); - - /* - * Once do_pg_backup_start has been called, ensure that any failure causes - * us to abort the backup so we don't "leak" a backup counter. For this - * reason, *all* functionality between do_pg_backup_start() and the end of - * do_pg_backup_stop() should be inside the error cleanup block! - */ - - PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false)); - { - ListCell *lc; - tablespaceinfo *ti; - - /* Add a node for the base directory at the end */ - ti = palloc0(sizeof(tablespaceinfo)); - ti->size = -1; - state.tablespaces = lappend(state.tablespaces, ti); - - /* - * Calculate the total backup size by summing up the size of each - * tablespace - */ - if (opt->progress) - { - basebackup_progress_estimate_backup_size(); - - foreach(lc, state.tablespaces) - { - tablespaceinfo *tmp = (tablespaceinfo *) lfirst(lc); - - if (tmp->path == NULL) - tmp->size = sendDir(sink, ".", 1, true, state.tablespaces, - true, NULL, NULL); - else - tmp->size = sendTablespace(sink, tmp->path, tmp->oid, true, - NULL); - state.bytes_total += tmp->size; - } - state.bytes_total_is_valid = true; - } - - /* notify basebackup sink about start of backup */ - bbsink_begin_backup(sink, &state, SINK_BUFFER_LENGTH); - - /* Send off our tablespaces one by one */ - foreach(lc, state.tablespaces) - { - tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); - - if (ti->path == NULL) - { - struct stat statbuf; - bool sendtblspclinks = true; - - bbsink_begin_archive(sink, "base.tar"); - - /* In the main tar, include the backup_label first... */ - sendFileWithContent(sink, BACKUP_LABEL_FILE, labelfile->data, - &manifest); - - /* Then the tablespace_map file, if required... */ - if (opt->sendtblspcmapfile) - { - sendFileWithContent(sink, TABLESPACE_MAP, tblspc_map_file->data, - &manifest); - sendtblspclinks = false; - } - - /* Then the bulk of the files... */ - sendDir(sink, ".", 1, false, state.tablespaces, - sendtblspclinks, &manifest, NULL); - - /* ... and pg_control after everything else. */ - if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - XLOG_CONTROL_FILE))); - sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, - false, InvalidOid, &manifest, NULL); - } - else - { - char *archive_name = psprintf("%s.tar", ti->oid); - - bbsink_begin_archive(sink, archive_name); - - sendTablespace(sink, ti->path, ti->oid, false, &manifest); - } - - /* - * If we're including WAL, and this is the main data directory we - * don't treat this as the end of the tablespace. Instead, we will - * include the xlog files below and stop afterwards. This is safe - * since the main data directory is always sent *last*. - */ - if (opt->includewal && ti->path == NULL) - { - Assert(lnext(state.tablespaces, lc) == NULL); - } - else - { - /* Properly terminate the tarfile. */ - StaticAssertStmt(2 * TAR_BLOCK_SIZE <= BLCKSZ, - "BLCKSZ too small for 2 tar blocks"); - memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE); - bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE); - - /* OK, that's the end of the archive. */ - bbsink_end_archive(sink); - } - } - - basebackup_progress_wait_wal_archive(&state); - endptr = do_pg_backup_stop(labelfile->data, !opt->nowait, &endtli); - } - PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false)); - - - if (opt->includewal) - { - /* - * We've left the last tar file "open", so we can now append the - * required WAL files to it. - */ - char pathbuf[MAXPGPATH]; - XLogSegNo segno; - XLogSegNo startsegno; - XLogSegNo endsegno; - struct stat statbuf; - List *historyFileList = NIL; - List *walFileList = NIL; - char firstoff[MAXFNAMELEN]; - char lastoff[MAXFNAMELEN]; - DIR *dir; - struct dirent *de; - ListCell *lc; - TimeLineID tli; - - basebackup_progress_transfer_wal(); - - /* - * I'd rather not worry about timelines here, so scan pg_wal and - * include all WAL files in the range between 'startptr' and 'endptr', - * regardless of the timeline the file is stamped with. If there are - * some spurious WAL files belonging to timelines that don't belong in - * this server's history, they will be included too. Normally there - * shouldn't be such files, but if there are, there's little harm in - * including them. - */ - XLByteToSeg(state.startptr, startsegno, wal_segment_size); - XLogFileName(firstoff, state.starttli, startsegno, wal_segment_size); - XLByteToPrevSeg(endptr, endsegno, wal_segment_size); - XLogFileName(lastoff, endtli, endsegno, wal_segment_size); - - dir = AllocateDir("pg_wal"); - while ((de = ReadDir(dir, "pg_wal")) != NULL) - { - /* Does it look like a WAL segment, and is it in the range? */ - if (IsXLogFileName(de->d_name) && - strcmp(de->d_name + 8, firstoff + 8) >= 0 && - strcmp(de->d_name + 8, lastoff + 8) <= 0) - { - walFileList = lappend(walFileList, pstrdup(de->d_name)); - } - /* Does it look like a timeline history file? */ - else if (IsTLHistoryFileName(de->d_name)) - { - historyFileList = lappend(historyFileList, pstrdup(de->d_name)); - } - } - FreeDir(dir); - - /* - * Before we go any further, check that none of the WAL segments we - * need were removed. - */ - CheckXLogRemoved(startsegno, state.starttli); - - /* - * Sort the WAL filenames. We want to send the files in order from - * oldest to newest, to reduce the chance that a file is recycled - * before we get a chance to send it over. - */ - list_sort(walFileList, compareWalFileNames); - - /* - * There must be at least one xlog file in the pg_wal directory, since - * we are doing backup-including-xlog. - */ - if (walFileList == NIL) - ereport(ERROR, - (errmsg("could not find any WAL files"))); - - /* - * Sanity check: the first and last segment should cover startptr and - * endptr, with no gaps in between. - */ - XLogFromFileName((char *) linitial(walFileList), - &tli, &segno, wal_segment_size); - if (segno != startsegno) - { - char startfname[MAXFNAMELEN]; - - XLogFileName(startfname, state.starttli, startsegno, - wal_segment_size); - ereport(ERROR, - (errmsg("could not find WAL file \"%s\"", startfname))); - } - foreach(lc, walFileList) - { - char *walFileName = (char *) lfirst(lc); - XLogSegNo currsegno = segno; - XLogSegNo nextsegno = segno + 1; - - XLogFromFileName(walFileName, &tli, &segno, wal_segment_size); - if (!(nextsegno == segno || currsegno == segno)) - { - char nextfname[MAXFNAMELEN]; - - XLogFileName(nextfname, tli, nextsegno, wal_segment_size); - ereport(ERROR, - (errmsg("could not find WAL file \"%s\"", nextfname))); - } - } - if (segno != endsegno) - { - char endfname[MAXFNAMELEN]; - - XLogFileName(endfname, endtli, endsegno, wal_segment_size); - ereport(ERROR, - (errmsg("could not find WAL file \"%s\"", endfname))); - } - - /* Ok, we have everything we need. Send the WAL files. */ - foreach(lc, walFileList) - { - char *walFileName = (char *) lfirst(lc); - int fd; - size_t cnt; - pgoff_t len = 0; - - snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName); - XLogFromFileName(walFileName, &tli, &segno, wal_segment_size); - - fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY); - if (fd < 0) - { - int save_errno = errno; - - /* - * Most likely reason for this is that the file was already - * removed by a checkpoint, so check for that to get a better - * error message. - */ - CheckXLogRemoved(segno, tli); - - errno = save_errno; - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", pathbuf))); - } - - if (fstat(fd, &statbuf) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - pathbuf))); - if (statbuf.st_size != wal_segment_size) - { - CheckXLogRemoved(segno, tli); - ereport(ERROR, - (errcode_for_file_access(), - errmsg("unexpected WAL file size \"%s\"", walFileName))); - } - - /* send the WAL file itself */ - _tarWriteHeader(sink, pathbuf, NULL, &statbuf, false); - - while ((cnt = basebackup_read_file(fd, sink->bbs_buffer, - Min(sink->bbs_buffer_length, - wal_segment_size - len), - len, pathbuf, true)) > 0) - { - CheckXLogRemoved(segno, tli); - bbsink_archive_contents(sink, cnt); - - len += cnt; - - if (len == wal_segment_size) - break; - } - - if (len != wal_segment_size) - { - CheckXLogRemoved(segno, tli); - ereport(ERROR, - (errcode_for_file_access(), - errmsg("unexpected WAL file size \"%s\"", walFileName))); - } - - /* - * wal_segment_size is a multiple of TAR_BLOCK_SIZE, so no need - * for padding. - */ - Assert(wal_segment_size % TAR_BLOCK_SIZE == 0); - - CloseTransientFile(fd); - - /* - * Mark file as archived, otherwise files can get archived again - * after promotion of a new node. This is in line with - * walreceiver.c always doing an XLogArchiveForceDone() after a - * complete segment. - */ - StatusFilePath(pathbuf, walFileName, ".done"); - sendFileWithContent(sink, pathbuf, "", &manifest); - } - - /* - * Send timeline history files too. Only the latest timeline history - * file is required for recovery, and even that only if there happens - * to be a timeline switch in the first WAL segment that contains the - * checkpoint record, or if we're taking a base backup from a standby - * server and the target timeline changes while the backup is taken. - * But they are small and highly useful for debugging purposes, so - * better include them all, always. - */ - foreach(lc, historyFileList) - { - char *fname = lfirst(lc); - - snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname); - - if (lstat(pathbuf, &statbuf) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", pathbuf))); - - sendFile(sink, pathbuf, pathbuf, &statbuf, false, InvalidOid, - &manifest, NULL); - - /* unconditionally mark file as archived */ - StatusFilePath(pathbuf, fname, ".done"); - sendFileWithContent(sink, pathbuf, "", &manifest); - } - - /* Properly terminate the tar file. */ - StaticAssertStmt(2 * TAR_BLOCK_SIZE <= BLCKSZ, - "BLCKSZ too small for 2 tar blocks"); - memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE); - bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE); - - /* OK, that's the end of the archive. */ - bbsink_end_archive(sink); - } - - AddWALInfoToBackupManifest(&manifest, state.startptr, state.starttli, - endptr, endtli); - - SendBackupManifest(&manifest, sink); - - bbsink_end_backup(sink, endptr, endtli); - - if (total_checksum_failures) - { - if (total_checksum_failures > 1) - ereport(WARNING, - (errmsg_plural("%lld total checksum verification failure", - "%lld total checksum verification failures", - total_checksum_failures, - total_checksum_failures))); - - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("checksum verification failure during base backup"))); - } - - /* - * Make sure to free the manifest before the resource owners as manifests - * use cryptohash contexts that may depend on resource owners (like - * OpenSSL). - */ - FreeBackupManifest(&manifest); - - /* clean up the resource owner we created */ - WalSndResourceCleanup(true); - - basebackup_progress_done(); -} - -/* - * list_sort comparison function, to compare log/seg portion of WAL segment - * filenames, ignoring the timeline portion. - */ -static int -compareWalFileNames(const ListCell *a, const ListCell *b) -{ - char *fna = (char *) lfirst(a); - char *fnb = (char *) lfirst(b); - - return strcmp(fna + 8, fnb + 8); -} - -/* - * Parse the base backup options passed down by the parser - */ -static void -parse_basebackup_options(List *options, basebackup_options *opt) -{ - ListCell *lopt; - bool o_label = false; - bool o_progress = false; - bool o_checkpoint = false; - bool o_nowait = false; - bool o_wal = false; - bool o_maxrate = false; - bool o_tablespace_map = false; - bool o_noverify_checksums = false; - bool o_manifest = false; - bool o_manifest_checksums = false; - bool o_target = false; - bool o_target_detail = false; - char *target_str = NULL; - char *target_detail_str = NULL; - bool o_compression = false; - bool o_compression_detail = false; - char *compression_detail_str = NULL; - - MemSet(opt, 0, sizeof(*opt)); - opt->manifest = MANIFEST_OPTION_NO; - opt->manifest_checksum_type = CHECKSUM_TYPE_CRC32C; - opt->compression = PG_COMPRESSION_NONE; - opt->compression_specification.algorithm = PG_COMPRESSION_NONE; - - foreach(lopt, options) - { - DefElem *defel = (DefElem *) lfirst(lopt); - - if (strcmp(defel->defname, "label") == 0) - { - if (o_label) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - opt->label = defGetString(defel); - o_label = true; - } - else if (strcmp(defel->defname, "progress") == 0) - { - if (o_progress) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - opt->progress = defGetBoolean(defel); - o_progress = true; - } - else if (strcmp(defel->defname, "checkpoint") == 0) - { - char *optval = defGetString(defel); - - if (o_checkpoint) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - if (pg_strcasecmp(optval, "fast") == 0) - opt->fastcheckpoint = true; - else if (pg_strcasecmp(optval, "spread") == 0) - opt->fastcheckpoint = false; - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized checkpoint type: \"%s\"", - optval))); - o_checkpoint = true; - } - else if (strcmp(defel->defname, "wait") == 0) - { - if (o_nowait) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - opt->nowait = !defGetBoolean(defel); - o_nowait = true; - } - else if (strcmp(defel->defname, "wal") == 0) - { - if (o_wal) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - opt->includewal = defGetBoolean(defel); - o_wal = true; - } - else if (strcmp(defel->defname, "max_rate") == 0) - { - int64 maxrate; - - if (o_maxrate) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - - maxrate = defGetInt64(defel); - if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)", - (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER))); - - opt->maxrate = (uint32) maxrate; - o_maxrate = true; - } - else if (strcmp(defel->defname, "tablespace_map") == 0) - { - if (o_tablespace_map) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - opt->sendtblspcmapfile = defGetBoolean(defel); - o_tablespace_map = true; - } - else if (strcmp(defel->defname, "verify_checksums") == 0) - { - if (o_noverify_checksums) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - noverify_checksums = !defGetBoolean(defel); - o_noverify_checksums = true; - } - else if (strcmp(defel->defname, "manifest") == 0) - { - char *optval = defGetString(defel); - bool manifest_bool; - - if (o_manifest) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - if (parse_bool(optval, &manifest_bool)) - { - if (manifest_bool) - opt->manifest = MANIFEST_OPTION_YES; - else - opt->manifest = MANIFEST_OPTION_NO; - } - else if (pg_strcasecmp(optval, "force-encode") == 0) - opt->manifest = MANIFEST_OPTION_FORCE_ENCODE; - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized manifest option: \"%s\"", - optval))); - o_manifest = true; - } - else if (strcmp(defel->defname, "manifest_checksums") == 0) - { - char *optval = defGetString(defel); - - if (o_manifest_checksums) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - if (!pg_checksum_parse_type(optval, - &opt->manifest_checksum_type)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized checksum algorithm: \"%s\"", - optval))); - o_manifest_checksums = true; - } - else if (strcmp(defel->defname, "target") == 0) - { - if (o_target) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - target_str = defGetString(defel); - o_target = true; - } - else if (strcmp(defel->defname, "target_detail") == 0) - { - char *optval = defGetString(defel); - - if (o_target_detail) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - target_detail_str = optval; - o_target_detail = true; - } - else if (strcmp(defel->defname, "compression") == 0) - { - char *optval = defGetString(defel); - - if (o_compression) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - if (!parse_compress_algorithm(optval, &opt->compression)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized compression algorithm \"%s\"", - optval))); - o_compression = true; - } - else if (strcmp(defel->defname, "compression_detail") == 0) - { - if (o_compression_detail) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("duplicate option \"%s\"", defel->defname))); - compression_detail_str = defGetString(defel); - o_compression_detail = true; - } - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized base backup option: \"%s\"", - defel->defname))); - } - - if (opt->label == NULL) - opt->label = "base backup"; - if (opt->manifest == MANIFEST_OPTION_NO) - { - if (o_manifest_checksums) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("manifest checksums require a backup manifest"))); - opt->manifest_checksum_type = CHECKSUM_TYPE_NONE; - } - - if (target_str == NULL) - { - if (target_detail_str != NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("target detail cannot be used without target"))); - opt->use_copytblspc = true; - opt->send_to_client = true; - } - else if (strcmp(target_str, "client") == 0) - { - if (target_detail_str != NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("target '%s' does not accept a target detail", - target_str))); - opt->send_to_client = true; - } - else - opt->target_handle = - BaseBackupGetTargetHandle(target_str, target_detail_str); - - if (o_compression_detail && !o_compression) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("compression detail requires compression"))); - - if (o_compression) - { - char *error_detail; - - parse_compress_specification(opt->compression, compression_detail_str, - &opt->compression_specification); - error_detail = - validate_compress_specification(&opt->compression_specification); - if (error_detail != NULL) - ereport(ERROR, - errcode(ERRCODE_SYNTAX_ERROR), - errmsg("invalid compression specification: %s", - error_detail)); - } -} - - -/* - * SendBaseBackup() - send a complete base backup. - * - * The function will put the system into backup mode like pg_backup_start() - * does, so that the backup is consistent even though we read directly from - * the filesystem, bypassing the buffer cache. - */ -void -SendBaseBackup(BaseBackupCmd *cmd) -{ - basebackup_options opt; - bbsink *sink; - SessionBackupState status = get_backup_status(); - - if (status == SESSION_BACKUP_RUNNING) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("a backup is already in progress in this session"))); - - parse_basebackup_options(cmd->options, &opt); - - WalSndSetState(WALSNDSTATE_BACKUP); - - if (update_process_title) - { - char activitymsg[50]; - - snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"", - opt.label); - set_ps_display(activitymsg); - } - - /* - * If the target is specifically 'client' then set up to stream the backup - * to the client; otherwise, it's being sent someplace else and should not - * be sent to the client. BaseBackupGetSink has the job of setting up a - * sink to send the backup data wherever it needs to go. - */ - sink = bbsink_copystream_new(opt.send_to_client); - if (opt.target_handle != NULL) - sink = BaseBackupGetSink(opt.target_handle, sink); - - /* Set up network throttling, if client requested it */ - if (opt.maxrate > 0) - sink = bbsink_throttle_new(sink, opt.maxrate); - - /* Set up server-side compression, if client requested it */ - if (opt.compression == PG_COMPRESSION_GZIP) - sink = bbsink_gzip_new(sink, &opt.compression_specification); - else if (opt.compression == PG_COMPRESSION_LZ4) - sink = bbsink_lz4_new(sink, &opt.compression_specification); - else if (opt.compression == PG_COMPRESSION_ZSTD) - sink = bbsink_zstd_new(sink, &opt.compression_specification); - - /* Set up progress reporting. */ - sink = bbsink_progress_new(sink, opt.progress); - - /* - * Perform the base backup, but make sure we clean up the bbsink even if - * an error occurs. - */ - PG_TRY(); - { - perform_base_backup(&opt, sink); - } - PG_FINALLY(); - { - bbsink_cleanup(sink); - } - PG_END_TRY(); -} - -/* - * Inject a file with given name and content in the output tar stream. - */ -static void -sendFileWithContent(bbsink *sink, const char *filename, const char *content, - backup_manifest_info *manifest) -{ - struct stat statbuf; - int bytes_done = 0, - len; - pg_checksum_context checksum_ctx; - - if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0) - elog(ERROR, "could not initialize checksum of file \"%s\"", - filename); - - len = strlen(content); - - /* - * Construct a stat struct for the backup_label file we're injecting in - * the tar. - */ - /* Windows doesn't have the concept of uid and gid */ -#ifdef WIN32 - statbuf.st_uid = 0; - statbuf.st_gid = 0; -#else - statbuf.st_uid = geteuid(); - statbuf.st_gid = getegid(); -#endif - statbuf.st_mtime = time(NULL); - statbuf.st_mode = pg_file_create_mode; - statbuf.st_size = len; - - _tarWriteHeader(sink, filename, NULL, &statbuf, false); - - if (pg_checksum_update(&checksum_ctx, (uint8 *) content, len) < 0) - elog(ERROR, "could not update checksum of file \"%s\"", - filename); - - while (bytes_done < len) - { - size_t remaining = len - bytes_done; - size_t nbytes = Min(sink->bbs_buffer_length, remaining); - - memcpy(sink->bbs_buffer, content, nbytes); - bbsink_archive_contents(sink, nbytes); - bytes_done += nbytes; - } - - _tarWritePadding(sink, len); - - AddFileToBackupManifest(manifest, NULL, filename, len, - (pg_time_t) statbuf.st_mtime, &checksum_ctx); -} - -/* - * Include the tablespace directory pointed to by 'path' in the output tar - * stream. If 'sizeonly' is true, we just calculate a total length and return - * it, without actually sending anything. - * - * Only used to send auxiliary tablespaces, not PGDATA. - */ -static int64 -sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly, - backup_manifest_info *manifest) -{ - int64 size; - char pathbuf[MAXPGPATH]; - struct stat statbuf; - - /* - * 'path' points to the tablespace location, but we only want to include - * the version directory in it that belongs to us. - */ - snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, - TABLESPACE_VERSION_DIRECTORY); - - /* - * Store a directory entry in the tar file so we get the permissions - * right. - */ - if (lstat(pathbuf, &statbuf) != 0) - { - if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file or directory \"%s\": %m", - pathbuf))); - - /* If the tablespace went away while scanning, it's no error. */ - return 0; - } - - size = _tarWriteHeader(sink, TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf, - sizeonly); - - /* Send all the files in the tablespace version directory */ - size += sendDir(sink, pathbuf, strlen(path), sizeonly, NIL, true, manifest, - spcoid); - - return size; -} - -/* - * Include all files from the given directory in the output tar stream. If - * 'sizeonly' is true, we just calculate a total length and return it, without - * actually sending anything. - * - * Omit any directory in the tablespaces list, to avoid backing up - * tablespaces twice when they were created inside PGDATA. - * - * If sendtblspclinks is true, we need to include symlink - * information in the tar file. If not, we can skip that - * as it will be sent separately in the tablespace_map file. - */ -static int64 -sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, - List *tablespaces, bool sendtblspclinks, backup_manifest_info *manifest, - const char *spcoid) -{ - DIR *dir; - struct dirent *de; - char pathbuf[MAXPGPATH * 2]; - struct stat statbuf; - int64 size = 0; - const char *lastDir; /* Split last dir from parent path. */ - bool isDbDir = false; /* Does this directory contain relations? */ - - /* - * Determine if the current path is a database directory that can contain - * relations. - * - * Start by finding the location of the delimiter between the parent path - * and the current path. - */ - lastDir = last_dir_separator(path); - - /* Does this path look like a database path (i.e. all digits)? */ - if (lastDir != NULL && - strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1)) - { - /* Part of path that contains the parent directory. */ - int parentPathLen = lastDir - path; - - /* - * Mark path as a database directory if the parent path is either - * $PGDATA/base or a tablespace version path. - */ - if (strncmp(path, "./base", parentPathLen) == 0 || - (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) && - strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1), - TABLESPACE_VERSION_DIRECTORY, - sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0)) - isDbDir = true; - } - - dir = AllocateDir(path); - while ((de = ReadDir(dir, path)) != NULL) - { - int excludeIdx; - bool excludeFound; - ForkNumber relForkNum; /* Type of fork if file is a relation */ - int relnumchars; /* Chars in filename that are the - * relnumber */ - - /* Skip special stuff */ - if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) - continue; - - /* Skip temporary files */ - if (strncmp(de->d_name, - PG_TEMP_FILE_PREFIX, - strlen(PG_TEMP_FILE_PREFIX)) == 0) - continue; - - /* - * Check if the postmaster has signaled us to exit, and abort with an - * error in that case. The error handler further up will call - * do_pg_abort_backup() for us. Also check that if the backup was - * started while still in recovery, the server wasn't promoted. - * do_pg_backup_stop() will check that too, but it's better to stop - * the backup early than continue to the end and fail there. - */ - CHECK_FOR_INTERRUPTS(); - if (RecoveryInProgress() != backup_started_in_recovery) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("the standby was promoted during online backup"), - errhint("This means that the backup being taken is corrupt " - "and should not be used. " - "Try taking another online backup."))); - - /* Scan for files that should be excluded */ - excludeFound = false; - for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++) - { - int cmplen = strlen(excludeFiles[excludeIdx].name); - - if (!excludeFiles[excludeIdx].match_prefix) - cmplen++; - if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0) - { - elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name); - excludeFound = true; - break; - } - } - - if (excludeFound) - continue; - - /* Exclude all forks for unlogged tables except the init fork */ - if (isDbDir && - parse_filename_for_nontemp_relation(de->d_name, &relnumchars, - &relForkNum)) - { - /* Never exclude init forks */ - if (relForkNum != INIT_FORKNUM) - { - char initForkFile[MAXPGPATH]; - char relNumber[OIDCHARS + 1]; - - /* - * If any other type of fork, check if there is an init fork - * with the same RelFileNumber. If so, the file can be - * excluded. - */ - memcpy(relNumber, de->d_name, relnumchars); - relNumber[relnumchars] = '\0'; - snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init", - path, relNumber); - - if (lstat(initForkFile, &statbuf) == 0) - { - elog(DEBUG2, - "unlogged relation file \"%s\" excluded from backup", - de->d_name); - - continue; - } - } - } - - /* Exclude temporary relations */ - if (isDbDir && looks_like_temp_rel_name(de->d_name)) - { - elog(DEBUG2, - "temporary relation file \"%s\" excluded from backup", - de->d_name); - - continue; - } - - snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name); - - /* Skip pg_control here to back up it last */ - if (strcmp(pathbuf, "./global/pg_control") == 0) - continue; - - if (lstat(pathbuf, &statbuf) != 0) - { - if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file or directory \"%s\": %m", - pathbuf))); - - /* If the file went away while scanning, it's not an error. */ - continue; - } - - /* Scan for directories whose contents should be excluded */ - excludeFound = false; - for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++) - { - if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0) - { - elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name); - convert_link_to_directory(pathbuf, &statbuf); - size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL, - &statbuf, sizeonly); - excludeFound = true; - break; - } - } - - if (excludeFound) - continue; - - /* - * We can skip pg_wal, the WAL segments need to be fetched from the - * WAL archive anyway. But include it as an empty directory anyway, so - * we get permissions right. - */ - if (strcmp(pathbuf, "./pg_wal") == 0) - { - /* If pg_wal is a symlink, write it as a directory anyway */ - convert_link_to_directory(pathbuf, &statbuf); - size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL, - &statbuf, sizeonly); - - /* - * Also send archive_status directory (by hackishly reusing - * statbuf from above ...). - */ - size += _tarWriteHeader(sink, "./pg_wal/archive_status", NULL, - &statbuf, sizeonly); - - continue; /* don't recurse into pg_wal */ - } - - /* Allow symbolic links in pg_tblspc only */ - if (strcmp(path, "./pg_tblspc") == 0 && S_ISLNK(statbuf.st_mode)) - { - char linkpath[MAXPGPATH]; - int rllen; - - rllen = readlink(pathbuf, linkpath, sizeof(linkpath)); - if (rllen < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read symbolic link \"%s\": %m", - pathbuf))); - if (rllen >= sizeof(linkpath)) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("symbolic link \"%s\" target is too long", - pathbuf))); - linkpath[rllen] = '\0'; - - size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, linkpath, - &statbuf, sizeonly); - } - else if (S_ISDIR(statbuf.st_mode)) - { - bool skip_this_dir = false; - ListCell *lc; - - /* - * Store a directory entry in the tar file so we can get the - * permissions right. - */ - size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL, &statbuf, - sizeonly); - - /* - * Call ourselves recursively for a directory, unless it happens - * to be a separate tablespace located within PGDATA. - */ - foreach(lc, tablespaces) - { - tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); - - /* - * ti->rpath is the tablespace relative path within PGDATA, or - * NULL if the tablespace has been properly located somewhere - * else. - * - * Skip past the leading "./" in pathbuf when comparing. - */ - if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0) - { - skip_this_dir = true; - break; - } - } - - /* - * skip sending directories inside pg_tblspc, if not required. - */ - if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks) - skip_this_dir = true; - - if (!skip_this_dir) - size += sendDir(sink, pathbuf, basepathlen, sizeonly, tablespaces, - sendtblspclinks, manifest, spcoid); - } - else if (S_ISREG(statbuf.st_mode)) - { - bool sent = false; - - if (!sizeonly) - sent = sendFile(sink, pathbuf, pathbuf + basepathlen + 1, &statbuf, - true, isDbDir ? atooid(lastDir + 1) : InvalidOid, - manifest, spcoid); - - if (sent || sizeonly) - { - /* Add size. */ - size += statbuf.st_size; - - /* Pad to a multiple of the tar block size. */ - size += tarPaddingBytesRequired(statbuf.st_size); - - /* Size of the header for the file. */ - size += TAR_BLOCK_SIZE; - } - } - else - ereport(WARNING, - (errmsg("skipping special file \"%s\"", pathbuf))); - } - FreeDir(dir); - return size; -} - -/* - * Check if a file should have its checksum validated. - * We validate checksums on files in regular tablespaces - * (including global and default) only, and in those there - * are some files that are explicitly excluded. - */ -static bool -is_checksummed_file(const char *fullpath, const char *filename) -{ - /* Check that the file is in a tablespace */ - if (strncmp(fullpath, "./global/", 9) == 0 || - strncmp(fullpath, "./base/", 7) == 0 || - strncmp(fullpath, "/", 1) == 0) - { - int excludeIdx; - - /* Compare file against noChecksumFiles skip list */ - for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++) - { - int cmplen = strlen(noChecksumFiles[excludeIdx].name); - - if (!noChecksumFiles[excludeIdx].match_prefix) - cmplen++; - if (strncmp(filename, noChecksumFiles[excludeIdx].name, - cmplen) == 0) - return false; - } - - return true; - } - else - return false; -} - -/***** - * Functions for handling tar file format - * - * Copied from pg_dump, but modified to work with libpq for sending - */ - - -/* - * Given the member, write the TAR header & send the file. - * - * If 'missing_ok' is true, will not throw an error if the file is not found. - * - * If dboid is anything other than InvalidOid then any checksum failures - * detected will get reported to the cumulative stats system. - * - * Returns true if the file was successfully sent, false if 'missing_ok', - * and the file did not exist. - */ -static bool -sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, - struct stat *statbuf, bool missing_ok, Oid dboid, - backup_manifest_info *manifest, const char *spcoid) -{ - int fd; - BlockNumber blkno = 0; - bool block_retry = false; - uint16 checksum; - int checksum_failures = 0; - off_t cnt; - int i; - pgoff_t len = 0; - char *page; - PageHeader phdr; - int segmentno = 0; - char *segmentpath; - bool verify_checksum = false; - pg_checksum_context checksum_ctx; - - if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0) - elog(ERROR, "could not initialize checksum of file \"%s\"", - readfilename); - - fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY); - if (fd < 0) - { - if (errno == ENOENT && missing_ok) - return false; - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", readfilename))); - } - - _tarWriteHeader(sink, tarfilename, NULL, statbuf, false); - - if (!noverify_checksums && DataChecksumsEnabled()) - { - char *filename; - - /* - * Get the filename (excluding path). As last_dir_separator() - * includes the last directory separator, we chop that off by - * incrementing the pointer. - */ - filename = last_dir_separator(readfilename) + 1; - - if (is_checksummed_file(readfilename, filename)) - { - verify_checksum = true; - - /* - * Cut off at the segment boundary (".") to get the segment number - * in order to mix it into the checksum. - */ - segmentpath = strstr(filename, "."); - if (segmentpath != NULL) - { - segmentno = atoi(segmentpath + 1); - if (segmentno == 0) - ereport(ERROR, - (errmsg("invalid segment number %d in file \"%s\"", - segmentno, filename))); - } - } - } - - /* - * Loop until we read the amount of data the caller told us to expect. The - * file could be longer, if it was extended while we were sending it, but - * for a base backup we can ignore such extended data. It will be restored - * from WAL. - */ - while (len < statbuf->st_size) - { - size_t remaining = statbuf->st_size - len; - - /* Try to read some more data. */ - cnt = basebackup_read_file(fd, sink->bbs_buffer, - Min(sink->bbs_buffer_length, remaining), - len, readfilename, true); - - /* - * If we hit end-of-file, a concurrent truncation must have occurred. - * That's not an error condition, because WAL replay will fix things - * up. - */ - if (cnt == 0) - break; - - /* - * The checksums are verified at block level, so we iterate over the - * buffer in chunks of BLCKSZ, after making sure that - * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of - * BLCKSZ bytes. - */ - Assert((sink->bbs_buffer_length % BLCKSZ) == 0); - - if (verify_checksum && (cnt % BLCKSZ != 0)) - { - ereport(WARNING, - (errmsg("could not verify checksum in file \"%s\", block " - "%u: read buffer size %d and page size %d " - "differ", - readfilename, blkno, (int) cnt, BLCKSZ))); - verify_checksum = false; - } - - if (verify_checksum) - { - for (i = 0; i < cnt / BLCKSZ; i++) - { - page = sink->bbs_buffer + BLCKSZ * i; - - /* - * Only check pages which have not been modified since the - * start of the base backup. Otherwise, they might have been - * written only halfway and the checksum would not be valid. - * However, replaying WAL would reinstate the correct page in - * this case. We also skip completely new pages, since they - * don't have a checksum yet. - */ - if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr) - { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) - { - /* - * Retry the block on the first failure. It's - * possible that we read the first 4K page of the - * block just before postgres updated the entire block - * so it ends up looking torn to us. We only need to - * retry once because the LSN should be updated to - * something we can ignore on the next pass. If the - * error happens again then it is a true validation - * failure. - */ - if (block_retry == false) - { - int reread_cnt; - - /* Reread the failed block */ - reread_cnt = - basebackup_read_file(fd, - sink->bbs_buffer + BLCKSZ * i, - BLCKSZ, len + BLCKSZ * i, - readfilename, - false); - if (reread_cnt == 0) - { - /* - * If we hit end-of-file, a concurrent - * truncation must have occurred, so break out - * of this loop just as if the initial fread() - * returned 0. We'll drop through to the same - * code that handles that case. (We must fix - * up cnt first, though.) - */ - cnt = BLCKSZ * i; - break; - } - - /* Set flag so we know a retry was attempted */ - block_retry = true; - - /* Reset loop to validate the block again */ - i--; - continue; - } - - checksum_failures++; - - if (checksum_failures <= 5) - ereport(WARNING, - (errmsg("checksum verification failed in " - "file \"%s\", block %u: calculated " - "%X but expected %X", - readfilename, blkno, checksum, - phdr->pd_checksum))); - if (checksum_failures == 5) - ereport(WARNING, - (errmsg("further checksum verification " - "failures in file \"%s\" will not " - "be reported", readfilename))); - } - } - block_retry = false; - blkno++; - } - } - - bbsink_archive_contents(sink, cnt); - - /* Also feed it to the checksum machinery. */ - if (pg_checksum_update(&checksum_ctx, - (uint8 *) sink->bbs_buffer, cnt) < 0) - elog(ERROR, "could not update checksum of base backup"); - - len += cnt; - } - - /* If the file was truncated while we were sending it, pad it with zeros */ - while (len < statbuf->st_size) - { - size_t remaining = statbuf->st_size - len; - size_t nbytes = Min(sink->bbs_buffer_length, remaining); - - MemSet(sink->bbs_buffer, 0, nbytes); - if (pg_checksum_update(&checksum_ctx, - (uint8 *) sink->bbs_buffer, - nbytes) < 0) - elog(ERROR, "could not update checksum of base backup"); - bbsink_archive_contents(sink, nbytes); - len += nbytes; - } - - /* - * Pad to a block boundary, per tar format requirements. (This small piece - * of data is probably not worth throttling, and is not checksummed - * because it's not actually part of the file.) - */ - _tarWritePadding(sink, len); - - CloseTransientFile(fd); - - if (checksum_failures > 1) - { - ereport(WARNING, - (errmsg_plural("file \"%s\" has a total of %d checksum verification failure", - "file \"%s\" has a total of %d checksum verification failures", - checksum_failures, - readfilename, checksum_failures))); - - pgstat_report_checksum_failures_in_db(dboid, checksum_failures); - } - - total_checksum_failures += checksum_failures; - - AddFileToBackupManifest(manifest, spcoid, tarfilename, statbuf->st_size, - (pg_time_t) statbuf->st_mtime, &checksum_ctx); - - return true; -} - -static int64 -_tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget, - struct stat *statbuf, bool sizeonly) -{ - enum tarError rc; - - if (!sizeonly) - { - /* - * As of this writing, the smallest supported block size is 1kB, which - * is twice TAR_BLOCK_SIZE. Since the buffer size is required to be a - * multiple of BLCKSZ, it should be safe to assume that the buffer is - * large enough to fit an entire tar block. We double-check by means - * of these assertions. - */ - StaticAssertStmt(TAR_BLOCK_SIZE <= BLCKSZ, - "BLCKSZ too small for tar block"); - Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE); - - rc = tarCreateHeader(sink->bbs_buffer, filename, linktarget, - statbuf->st_size, statbuf->st_mode, - statbuf->st_uid, statbuf->st_gid, - statbuf->st_mtime); - - switch (rc) - { - case TAR_OK: - break; - case TAR_NAME_TOO_LONG: - ereport(ERROR, - (errmsg("file name too long for tar format: \"%s\"", - filename))); - break; - case TAR_SYMLINK_TOO_LONG: - ereport(ERROR, - (errmsg("symbolic link target too long for tar format: " - "file name \"%s\", target \"%s\"", - filename, linktarget))); - break; - default: - elog(ERROR, "unrecognized tar error: %d", rc); - } - - bbsink_archive_contents(sink, TAR_BLOCK_SIZE); - } - - return TAR_BLOCK_SIZE; -} - -/* - * Pad with zero bytes out to a multiple of TAR_BLOCK_SIZE. - */ -static void -_tarWritePadding(bbsink *sink, int len) -{ - int pad = tarPaddingBytesRequired(len); - - /* - * As in _tarWriteHeader, it should be safe to assume that the buffer is - * large enough that we don't need to do this in multiple chunks. - */ - Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE); - Assert(pad <= TAR_BLOCK_SIZE); - - if (pad > 0) - { - MemSet(sink->bbs_buffer, 0, pad); - bbsink_archive_contents(sink, pad); - } -} - -/* - * If the entry in statbuf is a link, then adjust statbuf to make it look like a - * directory, so that it will be written that way. - */ -static void -convert_link_to_directory(const char *pathbuf, struct stat *statbuf) -{ - /* If symlink, write it as a directory anyway */ - if (S_ISLNK(statbuf->st_mode)) - statbuf->st_mode = S_IFDIR | pg_dir_create_mode; -} - -/* - * Read some data from a file, setting a wait event and reporting any error - * encountered. - * - * If partial_read_ok is false, also report an error if the number of bytes - * read is not equal to the number of bytes requested. - * - * Returns the number of bytes read. - */ -static int -basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset, - const char *filename, bool partial_read_ok) -{ - int rc; - - pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ); - rc = pread(fd, buf, nbytes, offset); - pgstat_report_wait_end(); - - if (rc < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", filename))); - if (!partial_read_ok && rc > 0 && rc != nbytes) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": read %d of %zu", - filename, rc, nbytes))); - - return rc; -} |