diff options
-rw-r--r-- | doc/src/sgml/protocol.sgml | 22 | ||||
-rw-r--r-- | doc/src/sgml/ref/pg_basebackup.sgml | 29 | ||||
-rw-r--r-- | src/backend/Makefile | 2 | ||||
-rw-r--r-- | src/backend/replication/Makefile | 1 | ||||
-rw-r--r-- | src/backend/replication/basebackup.c | 54 | ||||
-rw-r--r-- | src/backend/replication/basebackup_gzip.c | 309 | ||||
-rw-r--r-- | src/bin/pg_basebackup/pg_basebackup.c | 136 | ||||
-rw-r--r-- | src/bin/pg_verifybackup/Makefile | 7 | ||||
-rw-r--r-- | src/bin/pg_verifybackup/t/008_untar.pl | 104 | ||||
-rw-r--r-- | src/include/replication/basebackup_sink.h | 1 |
10 files changed, 641 insertions, 24 deletions
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index cd6dca691e2..2d63e0132c9 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2720,6 +2720,28 @@ The commands accepted in replication mode are: </varlistentry> <varlistentry> + <term><literal>COMPRESSION</literal> <replaceable>'method'</replaceable></term> + <listitem> + <para> + Instructs the server to compress the backup using the specified + method. Currently, the only supported method is + <literal>gzip</literal>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>COMPRESSION_LEVEL</literal> <replaceable>level</replaceable></term> + <listitem> + <para> + Specifies the compression level to be used. This should only be + used in conjunction with the <literal>COMPRESSION</literal> option. + The value should be an integer between 1 and 9. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><literal>MAX_RATE</literal> <replaceable>rate</replaceable></term> <listitem> <para> diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 47d11289bee..1d0df346b97 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -400,21 +400,36 @@ PostgreSQL documentation <term><option>-Z <replaceable class="parameter">level</replaceable></option></term> <term><option>-Z <replaceable class="parameter">method</replaceable></option>[:<replaceable>level</replaceable>]</term> <term><option>--compress=<replaceable class="parameter">level</replaceable></option></term> - <term><option>--compress=<replaceable class="parameter">method</replaceable></option>[:<replaceable>level</replaceable>]</term> + <term><option>--compress=[[{<replaceable class="parameter">client|server</replaceable>-}]<replaceable class="parameter">method</replaceable></option>[:<replaceable>level</replaceable>]</term> <listitem> <para> - Enables compression of tar file output, and specifies the - compression level (0 through 9, 0 being no compression and 9 being best - compression). Compression is only available when using the tar - format, and the suffix <filename>.gz</filename> will - automatically be added to all tar filenames. + Requests compression of the backup. If <literal>client</literal> or + <literal>server</literal> is included, it specifies where the + compression is to be performed. Compressing on the server will reduce + transfer bandwidth but will increase server CPU consumption. The + default is <literal>client</literal> except when + <literal>--target</literal> is used. In that case, the backup is not + being sent to the client, so only server compression is sensible. + When <literal>-Xstream</literal>, which is the default, is used, + server-side compression will not be applied to the WAL. To compress + the WAL, use client-side compression, or + specify <literal>-Xfetch</literal>. </para> <para> The compression method can be set to either <literal>gzip</literal> for compression with <application>gzip</application>, or <literal>none</literal> for no compression. A compression level can be optionally specified, by appending the level number after a - colon (<literal>:</literal>). + colon (<literal>:</literal>). If no level is specified, the default + compression level will be used. If only a level is specified without + mentioning an algorithm, <literal>gzip</literal> compression will + be used if the level is greater than 0, and no compression will be + used if the level is 0. + </para> + <para> + When the tar format is used, the suffix <filename>.gz</filename> will + automatically be added to all tar filenames. Compression is not + available in plain format. </para> </listitem> </varlistentry> diff --git a/src/backend/Makefile b/src/backend/Makefile index add9560be47..4a02006788a 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -48,7 +48,7 @@ OBJS = \ LIBS := $(filter-out -lpgport -lpgcommon, $(LIBS)) $(LDAP_LIBS_BE) $(ICU_LIBS) # The backend doesn't need everything that's in LIBS, however -LIBS := $(filter-out -lz -lreadline -ledit -ltermcap -lncurses -lcurses, $(LIBS)) +LIBS := $(filter-out -lreadline -ledit -ltermcap -lncurses -lcurses, $(LIBS)) ifeq ($(with_systemd),yes) LIBS += -lsystemd diff --git a/src/backend/replication/Makefile b/src/backend/replication/Makefile index a8f4757f0c0..8ec60ded762 100644 --- a/src/backend/replication/Makefile +++ b/src/backend/replication/Makefile @@ -18,6 +18,7 @@ OBJS = \ backup_manifest.o \ basebackup.o \ basebackup_copy.o \ + basebackup_gzip.o \ basebackup_progress.o \ basebackup_server.o \ basebackup_sink.o \ diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index d32da515355..10ce2406c0f 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -61,6 +61,12 @@ typedef enum BACKUP_TARGET_SERVER } backup_target_type; +typedef enum +{ + BACKUP_COMPRESSION_NONE, + BACKUP_COMPRESSION_GZIP +} basebackup_compression_type; + typedef struct { const char *label; @@ -73,6 +79,8 @@ typedef struct backup_target_type target; char *target_detail; backup_manifest_option manifest; + basebackup_compression_type compression; + int compression_level; pg_checksum_type manifest_checksum_type; } basebackup_options; @@ -707,11 +715,14 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_target = false; bool o_target_detail = false; char *target_str = "compat"; /* placate compiler */ + bool o_compression = false; + bool o_compression_level = false; MemSet(opt, 0, sizeof(*opt)); opt->target = BACKUP_TARGET_COMPAT; opt->manifest = MANIFEST_OPTION_NO; opt->manifest_checksum_type = CHECKSUM_TYPE_CRC32C; + opt->compression = BACKUP_COMPRESSION_NONE; foreach(lopt, options) { @@ -881,7 +892,41 @@ parse_basebackup_options(List *options, basebackup_options *opt) opt->target_detail = optval; o_target_detail = true; } + else if (strcmp(defel->defname, "compression") == 0) + { + char *optval = defGetString(defel); + + if (o_compression) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + if (strcmp(optval, "none") == 0) + opt->compression = BACKUP_COMPRESSION_NONE; + else if (strcmp(optval, "gzip") == 0) + opt->compression = BACKUP_COMPRESSION_GZIP; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized compression algorithm: \"%s\"", + optval))); + o_compression = true; + } + else if (strcmp(defel->defname, "compression_level") == 0) + { + if (o_compression_level) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + opt->compression_level = defGetInt32(defel); + o_compression_level = true; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized base backup option: \"%s\"", + defel->defname))); } + if (opt->label == NULL) opt->label = "base backup"; if (opt->manifest == MANIFEST_OPTION_NO) @@ -908,6 +953,11 @@ parse_basebackup_options(List *options, basebackup_options *opt) errmsg("target '%s' does not accept a target detail", target_str))); } + + if (o_compression_level && !o_compression) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("compression level requires compression"))); } @@ -975,6 +1025,10 @@ SendBaseBackup(BaseBackupCmd *cmd) if (opt.maxrate > 0) sink = bbsink_throttle_new(sink, opt.maxrate); + /* Set up server-side compression, if client requested it */ + if (opt.compression == BACKUP_COMPRESSION_GZIP) + sink = bbsink_gzip_new(sink, opt.compression_level); + /* Set up progress reporting. */ sink = bbsink_progress_new(sink, opt.progress); diff --git a/src/backend/replication/basebackup_gzip.c b/src/backend/replication/basebackup_gzip.c new file mode 100644 index 00000000000..1e58382fa08 --- /dev/null +++ b/src/backend/replication/basebackup_gzip.c @@ -0,0 +1,309 @@ +/*------------------------------------------------------------------------- + * + * basebackup_gzip.c + * Basebackup sink implementing gzip compression. + * + * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/basebackup_gzip.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#ifdef HAVE_LIBZ +#include <zlib.h> +#endif + +#include "replication/basebackup_sink.h" + +#ifdef HAVE_LIBZ +typedef struct bbsink_gzip +{ + /* Common information for all types of sink. */ + bbsink base; + + /* Compression level. */ + int compresslevel; + + /* Compressed data stream. */ + z_stream zstream; + + /* Number of bytes staged in output buffer. */ + size_t bytes_written; +} bbsink_gzip; + +static void bbsink_gzip_begin_backup(bbsink *sink); +static void bbsink_gzip_begin_archive(bbsink *sink, const char *archive_name); +static void bbsink_gzip_archive_contents(bbsink *sink, size_t len); +static void bbsink_gzip_manifest_contents(bbsink *sink, size_t len); +static void bbsink_gzip_end_archive(bbsink *sink); +static void *gzip_palloc(void *opaque, unsigned items, unsigned size); +static void gzip_pfree(void *opaque, void *address); + +const bbsink_ops bbsink_gzip_ops = { + .begin_backup = bbsink_gzip_begin_backup, + .begin_archive = bbsink_gzip_begin_archive, + .archive_contents = bbsink_gzip_archive_contents, + .end_archive = bbsink_gzip_end_archive, + .begin_manifest = bbsink_forward_begin_manifest, + .manifest_contents = bbsink_gzip_manifest_contents, + .end_manifest = bbsink_forward_end_manifest, + .end_backup = bbsink_forward_end_backup, + .cleanup = bbsink_forward_cleanup +}; +#endif + +/* + * Create a new basebackup sink that performs gzip compression using the + * designated compression level. + */ +bbsink * +bbsink_gzip_new(bbsink *next, int compresslevel) +{ +#ifndef HAVE_LIBZ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("gzip compression is not supported by this build"))); +#else + bbsink_gzip *sink; + + Assert(next != NULL); + Assert(compresslevel >= 0 && compresslevel <= 9); + + if (compresslevel == 0) + compresslevel = Z_DEFAULT_COMPRESSION; + else if (compresslevel < 0 || compresslevel > 9) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("gzip compression level %d is out of range", + compresslevel))); + + sink = palloc0(sizeof(bbsink_gzip)); + *((const bbsink_ops **) &sink->base.bbs_ops) = &bbsink_gzip_ops; + sink->base.bbs_next = next; + sink->compresslevel = compresslevel; + + return &sink->base; +#endif +} + +#ifdef HAVE_LIBZ + +/* + * Begin backup. + */ +static void +bbsink_gzip_begin_backup(bbsink *sink) +{ + /* + * We need our own buffer, because we're going to pass different data to + * the next sink than what gets passed to us. + */ + sink->bbs_buffer = palloc(sink->bbs_buffer_length); + + /* + * Since deflate() doesn't require the output buffer to be of any + * particular size, we can just make it the same size as the input buffer. + */ + bbsink_begin_backup(sink->bbs_next, sink->bbs_state, + sink->bbs_buffer_length); +} + +/* + * Prepare to compress the next archive. + */ +static void +bbsink_gzip_begin_archive(bbsink *sink, const char *archive_name) +{ + bbsink_gzip *mysink = (bbsink_gzip *) sink; + char *gz_archive_name; + z_stream *zs = &mysink->zstream; + + /* Initialize compressor object. */ + memset(zs, 0, sizeof(z_stream)); + zs->zalloc = gzip_palloc; + zs->zfree = gzip_pfree; + zs->next_out = (uint8 *) sink->bbs_next->bbs_buffer; + zs->avail_out = sink->bbs_next->bbs_buffer_length; + + /* + * We need to use deflateInit2() rather than deflateInit() here so that + * we can request a gzip header rather than a zlib header. Otherwise, we + * want to supply the same values that would have been used by default + * if we had just called deflateInit(). + * + * Per the documentation for deflateInit2, the third argument must be + * Z_DEFLATED; the fourth argument is the number of "window bits", by + * default 15, but adding 16 gets you a gzip header rather than a zlib + * header; the fifth argument controls memory usage, and 8 is the default; + * and likewise Z_DEFAULT_STRATEGY is the default for the sixth argument. + */ + if (deflateInit2(zs, mysink->compresslevel, Z_DEFLATED, 15 + 16, 8, + Z_DEFAULT_STRATEGY) != Z_OK) + ereport(ERROR, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not initialize compression library")); + + /* + * Add ".gz" to the archive name. Note that the pg_basebackup -z + * produces archives named ".tar.gz" rather than ".tgz", so we match + * that here. + */ + gz_archive_name = psprintf("%s.gz", archive_name); + Assert(sink->bbs_next != NULL); + bbsink_begin_archive(sink->bbs_next, gz_archive_name); + pfree(gz_archive_name); +} + +/* + * Compress the input data to the output buffer until we run out of input + * data. Each time the output buffer fills up, invoke the archive_contents() + * method for then next sink. + * + * Note that since we're compressing the input, it may very commonly happen + * that we consume all the input data without filling the output buffer. In + * that case, the compressed representation of the current input data won't + * actually be sent to the next bbsink until a later call to this function, + * or perhaps even not until bbsink_gzip_end_archive() is invoked. + */ +static void +bbsink_gzip_archive_contents(bbsink *sink, size_t len) +{ + bbsink_gzip *mysink = (bbsink_gzip *) sink; + z_stream *zs = &mysink->zstream; + + /* Compress data from input buffer. */ + zs->next_in = (uint8 *) mysink->base.bbs_buffer; + zs->avail_in = len; + + while (zs->avail_in > 0) + { + int res; + + /* Write output data into unused portion of output buffer. */ + Assert(mysink->bytes_written < mysink->base.bbs_next->bbs_buffer_length); + zs->next_out = (uint8 *) + mysink->base.bbs_next->bbs_buffer + mysink->bytes_written; + zs->avail_out = + mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written; + + /* + * Try to compress. Note that this will update zs->next_in and + * zs->avail_in according to how much input data was consumed, and + * zs->next_out and zs->avail_out according to how many output bytes + * were produced. + * + * According to the zlib documentation, Z_STREAM_ERROR should only + * occur if we've made a programming error, or if say there's been a + * memory clobber; we use elog() rather than Assert() here out of an + * abundance of caution. + */ + res = deflate(zs, Z_NO_FLUSH); + if (res == Z_STREAM_ERROR) + elog(ERROR, "could not compress data: %s", zs->msg); + + /* Update our notion of how many bytes we've written. */ + mysink->bytes_written = + mysink->base.bbs_next->bbs_buffer_length - zs->avail_out; + + /* + * If the output buffer is full, it's time for the next sink to + * process the contents. + */ + if (mysink->bytes_written >= mysink->base.bbs_next->bbs_buffer_length) + { + bbsink_archive_contents(sink->bbs_next, mysink->bytes_written); + mysink->bytes_written = 0; + } + } +} + +/* + * There might be some data inside zlib's internal buffers; we need to get + * that flushed out and forwarded to the successor sink as archive content. + * + * Then we can end processing for this archive. + */ +static void +bbsink_gzip_end_archive(bbsink *sink) +{ + bbsink_gzip *mysink = (bbsink_gzip *) sink; + z_stream *zs = &mysink->zstream; + + /* There is no more data available. */ + zs->next_in = (uint8 *) mysink->base.bbs_buffer; + zs->avail_in = 0; + + while (1) + { + int res; + + /* Write output data into unused portion of output buffer. */ + Assert(mysink->bytes_written < mysink->base.bbs_next->bbs_buffer_length); + zs->next_out = (uint8 *) + mysink->base.bbs_next->bbs_buffer + mysink->bytes_written; + zs->avail_out = + mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written; + + /* + * As bbsink_gzip_archive_contents, but pass Z_FINISH since there + * is no more input. + */ + res = deflate(zs, Z_FINISH); + if (res == Z_STREAM_ERROR) + elog(ERROR, "could not compress data: %s", zs->msg); + + /* Update our notion of how many bytes we've written. */ + mysink->bytes_written = + mysink->base.bbs_next->bbs_buffer_length - zs->avail_out; + + /* + * Apparently we had no data in the output buffer and deflate() + * was not able to add any. We must be done. + */ + if (mysink->bytes_written == 0) + break; + + /* Send whatever accumulated output bytes we have. */ + bbsink_archive_contents(sink->bbs_next, mysink->bytes_written); + mysink->bytes_written = 0; + } + + /* Must also pass on the information that this archive has ended. */ + bbsink_forward_end_archive(sink); +} + +/* + * Manifest contents are not compressed, but we do need to copy them into + * the successor sink's buffer, because we have our own. + */ +static void +bbsink_gzip_manifest_contents(bbsink *sink, size_t len) +{ + memcpy(sink->bbs_next->bbs_buffer, sink->bbs_buffer, len); + bbsink_manifest_contents(sink->bbs_next, len); +} + +/* + * Wrapper function to adjust the signature of palloc to match what libz + * expects. + */ +static void * +gzip_palloc(void *opaque, unsigned items, unsigned size) +{ + return palloc(items * size); +} + +/* + * Wrapper function to adjust the signature of pfree to match what libz + * expects. + */ +static void +gzip_pfree(void *opaque, void *address) +{ + pfree(address); +} + +#endif diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 221cc4caf23..72c27c78d05 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -111,6 +111,16 @@ typedef enum STREAM_WAL } IncludeWal; +/* + * Different places to perform compression + */ +typedef enum +{ + COMPRESS_LOCATION_UNSPECIFIED, + COMPRESS_LOCATION_CLIENT, + COMPRESS_LOCATION_SERVER +} CompressionLocation; + /* Global options */ static char *basedir = NULL; static TablespaceList tablespace_dirs = {NULL, NULL}; @@ -124,6 +134,7 @@ static bool estimatesize = true; static int verbose = 0; static int compresslevel = 0; static WalCompressionMethod compressmethod = COMPRESSION_NONE; +static CompressionLocation compressloc = COMPRESS_LOCATION_UNSPECIFIED; static IncludeWal includewal = STREAM_WAL; static bool fastcheckpoint = false; static bool writerecoveryconf = false; @@ -544,6 +555,11 @@ LogStreamerMain(logstreamer_param *param) stream.walmethod = CreateWalDirectoryMethod(param->xlog, COMPRESSION_NONE, 0, stream.do_sync); + else if (compressloc != COMPRESS_LOCATION_CLIENT) + stream.walmethod = CreateWalTarMethod(param->xlog, + COMPRESSION_NONE, + compresslevel, + stream.do_sync); else stream.walmethod = CreateWalTarMethod(param->xlog, compressmethod, @@ -944,7 +960,7 @@ parse_max_rate(char *src) */ static void parse_compress_options(char *src, WalCompressionMethod *methodres, - int *levelres) + CompressionLocation *locationres, int *levelres) { char *sep; int firstlen; @@ -967,9 +983,25 @@ parse_compress_options(char *src, WalCompressionMethod *methodres, * compression method. */ if (pg_strcasecmp(firstpart, "gzip") == 0) + { + *methodres = COMPRESSION_GZIP; + *locationres = COMPRESS_LOCATION_UNSPECIFIED; + } + else if (pg_strcasecmp(firstpart, "client-gzip") == 0) + { + *methodres = COMPRESSION_GZIP; + *locationres = COMPRESS_LOCATION_CLIENT; + } + else if (pg_strcasecmp(firstpart, "server-gzip") == 0) + { *methodres = COMPRESSION_GZIP; + *locationres = COMPRESS_LOCATION_SERVER; + } else if (pg_strcasecmp(firstpart, "none") == 0) + { *methodres = COMPRESSION_NONE; + *locationres = COMPRESS_LOCATION_UNSPECIFIED; + } else { /* @@ -983,6 +1015,7 @@ parse_compress_options(char *src, WalCompressionMethod *methodres, *methodres = (*levelres > 0) ? COMPRESSION_GZIP : COMPRESSION_NONE; + *locationres = COMPRESS_LOCATION_UNSPECIFIED; free(firstpart); return; @@ -1080,7 +1113,9 @@ CreateBackupStreamer(char *archive_name, char *spclocation, bbstreamer *streamer = NULL; bbstreamer *manifest_inject_streamer = NULL; bool inject_manifest; + bool is_tar; bool must_parse_archive; + int archive_name_len = strlen(archive_name); /* * Normally, we emit the backup manifest as a separate file, but when @@ -1089,13 +1124,32 @@ CreateBackupStreamer(char *archive_name, char *spclocation, */ inject_manifest = (format == 't' && strcmp(basedir, "-") == 0 && manifest); + /* Is this a tar archive? */ + is_tar = (archive_name_len > 4 && + strcmp(archive_name + archive_name_len - 4, ".tar") == 0); + /* * We have to parse the archive if (1) we're suppose to extract it, or if * (2) we need to inject backup_manifest or recovery configuration into it. + * However, we only know how to parse tar archives. */ must_parse_archive = (format == 'p' || inject_manifest || (spclocation == NULL && writerecoveryconf)); + /* At present, we only know how to parse tar archives. */ + if (must_parse_archive && !is_tar) + { + pg_log_error("unable to parse archive: %s", archive_name); + pg_log_info("only tar archives can be parsed"); + if (format == 'p') + pg_log_info("plain format requires pg_basebackup to parse the archive"); + if (inject_manifest) + pg_log_info("using - as the output directory requires pg_basebackup to parse the archive"); + if (writerecoveryconf) + pg_log_info("the -R option requires pg_basebackup to parse the archive"); + exit(1); + } + if (format == 'p') { const char *directory; @@ -1136,7 +1190,8 @@ CreateBackupStreamer(char *archive_name, char *spclocation, archive_file = NULL; } - if (compressmethod == COMPRESSION_NONE) + if (compressmethod == COMPRESSION_NONE || + compressloc != COMPRESS_LOCATION_CLIENT) streamer = bbstreamer_plain_writer_new(archive_filename, archive_file); #ifdef HAVE_LIBZ @@ -1838,6 +1893,31 @@ BaseBackup(void) AppendStringCommandOption(&buf, use_new_option_syntax, "TARGET", "client"); + if (compressloc == COMPRESS_LOCATION_SERVER) + { + char *compressmethodstr = NULL; + + if (!use_new_option_syntax) + { + pg_log_error("server does not support server-side compression"); + exit(1); + } + switch (compressmethod) + { + case COMPRESSION_GZIP: + compressmethodstr = "gzip"; + break; + default: + Assert(false); + break; + } + AppendStringCommandOption(&buf, use_new_option_syntax, + "COMPRESSION", compressmethodstr); + if (compresslevel != 0) + AppendIntegerCommandOption(&buf, use_new_option_syntax, + "COMPRESSION_LEVEL", compresslevel); + } + if (verbose) pg_log_info("initiating base backup, waiting for checkpoint to complete"); @@ -2376,10 +2456,11 @@ main(int argc, char **argv) compresslevel = 1; /* will be rejected below */ #endif compressmethod = COMPRESSION_GZIP; + compressloc = COMPRESS_LOCATION_UNSPECIFIED; break; case 'Z': parse_compress_options(optarg, &compressmethod, - &compresslevel); + &compressloc, &compresslevel); break; case 'c': if (pg_strcasecmp(optarg, "fast") == 0) @@ -2506,14 +2587,37 @@ main(int argc, char **argv) } /* - * Compression doesn't make sense unless tar format is in use. + * If we're compressing the backup and the user has not said where to + * perform the compression, do it on the client, unless they specified + * --target, in which case the server is the only choice. */ - if (format == 'p' && compressmethod != COMPRESSION_NONE) + if (compressmethod != COMPRESSION_NONE && + compressloc == COMPRESS_LOCATION_UNSPECIFIED) { if (backup_target == NULL) - pg_log_error("only tar mode backups can be compressed"); + compressloc = COMPRESS_LOCATION_CLIENT; else - pg_log_error("client-side compression is not possible when a backup target is specfied"); + compressloc = COMPRESS_LOCATION_SERVER; + } + + /* + * Can't perform client-side compression if the backup is not being + * sent to the client. + */ + if (backup_target != NULL && compressloc == COMPRESS_LOCATION_CLIENT) + { + pg_log_error("client-side compression is not possible when a backup target is specified"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + + /* + * Compression doesn't make sense unless tar format is in use. + */ + if (format == 'p' && compressloc == COMPRESS_LOCATION_CLIENT) + { + pg_log_error("only tar mode backups can be compressed"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -2626,23 +2730,23 @@ main(int argc, char **argv) } break; case COMPRESSION_GZIP: -#ifdef HAVE_LIBZ - if (compresslevel == 0) - { - pg_log_info("no value specified for compression level, switching to default"); - compresslevel = Z_DEFAULT_COMPRESSION; - } if (compresslevel > 9) { pg_log_error("compression level %d of method %s higher than maximum of 9", compresslevel, "gzip"); exit(1); } + if (compressloc == COMPRESS_LOCATION_CLIENT) + { +#ifdef HAVE_LIBZ + if (compresslevel == 0) + compresslevel = Z_DEFAULT_COMPRESSION; #else - pg_log_error("this build does not support compression with %s", - "gzip"); - exit(1); + pg_log_error("this build does not support compression with %s", + "gzip"); + exit(1); #endif + } break; case COMPRESSION_LZ4: /* option not supported */ diff --git a/src/bin/pg_verifybackup/Makefile b/src/bin/pg_verifybackup/Makefile index c07643b1297..1ae818f9a11 100644 --- a/src/bin/pg_verifybackup/Makefile +++ b/src/bin/pg_verifybackup/Makefile @@ -3,6 +3,13 @@ PGFILEDESC = "pg_verifybackup - verify a backup against using a backup manifest" PGAPPICON = win32 +# make these available to TAP test scripts +export TAR +# Note that GZIP cannot be used directly as this environment variable is +# used by the command "gzip" to pass down options, so stick with a different +# name. +export GZIP_PROGRAM=$(GZIP) + subdir = src/bin/pg_verifybackup top_builddir = ../../.. include $(top_builddir)/src/Makefile.global diff --git a/src/bin/pg_verifybackup/t/008_untar.pl b/src/bin/pg_verifybackup/t/008_untar.pl new file mode 100644 index 00000000000..1d74a418865 --- /dev/null +++ b/src/bin/pg_verifybackup/t/008_untar.pl @@ -0,0 +1,104 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# This test case aims to verify that server-side backups and server-side +# backup compression work properly, and it also aims to verify that +# pg_verifybackup can verify a base backup that didn't start out in plain +# format. + +use strict; +use warnings; +use Config; +use File::Path qw(rmtree); +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 6; + +my $primary = PostgreSQL::Test::Cluster->new('primary'); +$primary->init(allows_streaming => 1); +$primary->start; + +my $have_zlib = check_pg_config("#define HAVE_LIBZ 1"); +my $backup_path = $primary->backup_dir . '/server-backup'; +my $extract_path = $primary->backup_dir . '/extracted-backup'; + +my @test_configuration = ( + { + 'compression_method' => 'none', + 'backup_flags' => [], + 'backup_archive' => 'base.tar', + 'enabled' => 1 + }, + { + 'compression_method' => 'gzip', + 'backup_flags' => ['--compress', 'server-gzip'], + 'backup_archive' => 'base.tar.gz', + 'decompress_program' => $ENV{'GZIP_PROGRAM'}, + 'decompress_flags' => [ '-d' ], + 'enabled' => check_pg_config("#define HAVE_LIBZ 1") + } +); + +for my $tc (@test_configuration) +{ + my $method = $tc->{'compression_method'}; + + SKIP: { + skip "$method compression not supported by this build", 3 + if ! $tc->{'enabled'}; + skip "no decompressor available for $method", 3 + if exists $tc->{'decompress_program'} && + !defined $tc->{'decompress_program'}; + + # Take a server-side backup. + my @backup = ( + 'pg_basebackup', '--no-sync', '-cfast', '--target', + "server:$backup_path", '-Xfetch' + ); + push @backup, @{$tc->{'backup_flags'}}; + $primary->command_ok(\@backup, + "server side backup, compression $method"); + + + # Verify that the we got the files we expected. + my $backup_files = join(',', + sort grep { $_ ne '.' && $_ ne '..' } slurp_dir($backup_path)); + my $expected_backup_files = join(',', + sort ('backup_manifest', $tc->{'backup_archive'})); + is($backup_files,$expected_backup_files, + "found expected backup files, compression $method"); + + # Decompress. + if (exists $tc->{'decompress_program'}) + { + my @decompress = ($tc->{'decompress_program'}); + push @decompress, @{$tc->{'decompress_flags'}} + if $tc->{'decompress_flags'}; + push @decompress, $backup_path . '/' . $tc->{'backup_archive'}; + system_or_bail(@decompress); + } + + SKIP: { + my $tar = $ENV{TAR}; + # don't check for a working tar here, to accomodate various odd + # cases such as AIX. If tar doesn't work the init_from_backup below + # will fail. + skip "no tar program available", 1 + if (!defined $tar || $tar eq ''); + + # Untar. + mkdir($extract_path); + system_or_bail($tar, 'xf', $backup_path . '/base.tar', + '-C', $extract_path); + + # Verify. + $primary->command_ok([ 'pg_verifybackup', '-n', + '-m', "$backup_path/backup_manifest", '-e', $extract_path ], + "verify backup, compression $method"); + } + + # Cleanup. + unlink($backup_path . '/backup_manifest'); + unlink($backup_path . '/base.tar'); + rmtree($extract_path); + } +} diff --git a/src/include/replication/basebackup_sink.h b/src/include/replication/basebackup_sink.h index 4acadf406dd..d3276b2487b 100644 --- a/src/include/replication/basebackup_sink.h +++ b/src/include/replication/basebackup_sink.h @@ -284,6 +284,7 @@ extern void bbsink_forward_cleanup(bbsink *sink); /* Constructors for various types of sinks. */ extern bbsink *bbsink_copystream_new(bool send_to_client); extern bbsink *bbsink_copytblspc_new(void); +extern bbsink *bbsink_gzip_new(bbsink *next, int compresslevel); extern bbsink *bbsink_progress_new(bbsink *next, bool estimate_backup_size); extern bbsink *bbsink_server_new(bbsink *next, char *pathname); extern bbsink *bbsink_throttle_new(bbsink *next, uint32 maxrate); |