diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2015-11-21 20:21:32 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2015-11-21 20:21:32 -0500 |
commit | 8f1559aa57b5038057c85be2f9ed00b641d3ac9d (patch) | |
tree | 2ab2a4f114c5e373f4b4dba57479b5820fb252c9 /src/backend/replication/basebackup.c | |
parent | 60ba32cb5128571aec344288cc26986215becd67 (diff) | |
download | postgresql-8f1559aa57b5038057c85be2f9ed00b641d3ac9d.tar.gz postgresql-8f1559aa57b5038057c85be2f9ed00b641d3ac9d.zip |
Adopt the GNU convention for handling tar-archive members exceeding 8GB.
The POSIX standard for tar headers requires archive member sizes to be
printed in octal with at most 11 digits, limiting the representable file
size to 8GB. However, GNU tar and apparently most other modern tars
support a convention in which oversized values can be stored in base-256,
allowing any practical file to be a tar member. Adopt this convention
to remove two limitations:
* pg_dump with -Ft output format failed if the contents of any one table
exceeded 8GB.
* pg_basebackup failed if the data directory contained any file exceeding
8GB. (This would be a fatal problem for installations configured with a
table segment size of 8GB or more, and it has also been seen to fail when
large core dump files exist in the data directory.)
File sizes under 8GB are still printed in octal, so that no compatibility
issues are created except in cases that would have failed entirely before.
In addition, this patch fixes several bugs in the same area:
* In 9.3 and later, we'd defined tarCreateHeader's file-size argument as
size_t, which meant that on 32-bit machines it would write a corrupt tar
header for file sizes between 4GB and 8GB, even though no error was raised.
This broke both "pg_dump -Ft" and pg_basebackup for such cases.
* pg_restore from a tar archive would fail on tables of size between 4GB
and 8GB, on machines where either "size_t" or "unsigned long" is 32 bits.
This happened even with an archive file not affected by the previous bug.
* pg_basebackup would fail if there were files of size between 4GB and 8GB,
even on 64-bit machines.
* In 9.3 and later, "pg_basebackup -Ft" failed entirely, for any file size,
on 64-bit big-endian machines.
In view of these potential data-loss bugs, back-patch to all supported
branches, even though removal of the documented 8GB limit might otherwise
be considered a new feature rather than a bug fix.
Diffstat (limited to 'src/backend/replication/basebackup.c')
-rw-r--r-- | src/backend/replication/basebackup.c | 146 |
1 files changed, 5 insertions, 141 deletions
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index a0c8903497d..3ae81790d76 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -33,6 +33,7 @@ #include "utils/elog.h" #include "utils/memutils.h" #include "utils/ps_status.h" +#include "pgtar.h" typedef struct { @@ -846,49 +847,6 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces) /* - * Utility routine to print possibly larger than 32 bit integers in a - * portable fashion. Filled with zeros. - */ -static void -print_val(char *s, uint64 val, unsigned int base, size_t len) -{ - int i; - - for (i = len; i > 0; i--) - { - int digit = val % base; - - s[i - 1] = '0' + digit; - val = val / base; - } -} - -/* - * Maximum file size for a tar member: The limit inherent in the - * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed - * what we can represent in pgoff_t. - */ -#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1) - -static int -_tarChecksum(char *header) -{ - int i, - sum; - - /* - * Per POSIX, the checksum is the simple sum of all bytes in the header, - * treating the bytes as unsigned, and treating the checksum field (at - * offset 148) as though it contained 8 spaces. - */ - sum = 8 * ' '; /* presumed value for checksum field */ - for (i = 0; i < 512; i++) - if (i < 148 || i >= 156) - sum += 0xFF & header[i]; - return sum; -} - -/* * Given the member, write the TAR header & send the file. * * If 'missing_ok' is true, will not throw an error if the file is not found. @@ -916,15 +874,6 @@ sendFile(char *readfilename, char *tarfilename, struct stat *statbuf, errmsg("could not open file \"%s\": %m", readfilename))); } - /* - * Some compilers will throw a warning knowing this test can never be true - * because pgoff_t can't exceed the compared maximum on their platform. - */ - if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN) - ereport(ERROR, - (errmsg("archive member \"%s\" too large for tar format", - tarfilename))); - _tarWriteHeader(tarfilename, NULL, statbuf); while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) @@ -979,95 +928,10 @@ _tarWriteHeader(const char *filename, const char *linktarget, { char h[512]; - /* - * Note: most of the fields in a tar header are not supposed to be - * null-terminated. We use sprintf, which will write a null after the - * required bytes; that null goes into the first byte of the next field. - * This is okay as long as we fill the fields in order. - */ - memset(h, 0, sizeof(h)); - - /* Name 100 */ - strlcpy(&h[0], filename, 100); - if (linktarget != NULL || S_ISDIR(statbuf->st_mode)) - { - /* - * We only support symbolic links to directories, and this is - * indicated in the tar format by adding a slash at the end of the - * name, the same as for regular directories. - */ - int flen = strlen(filename); - - flen = Min(flen, 99); - h[flen] = '/'; - h[flen + 1] = '\0'; - } - - /* Mode 8 */ - sprintf(&h[100], "%07o ", (int) statbuf->st_mode); - - /* User ID 8 */ - sprintf(&h[108], "%07o ", statbuf->st_uid); - - /* Group 8 */ - sprintf(&h[116], "%07o ", statbuf->st_gid); - - /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */ - if (linktarget != NULL || S_ISDIR(statbuf->st_mode)) - /* Symbolic link or directory has size zero */ - print_val(&h[124], 0, 8, 11); - else - print_val(&h[124], statbuf->st_size, 8, 11); - sprintf(&h[135], " "); - - /* Mod Time 12 */ - sprintf(&h[136], "%011o ", (int) statbuf->st_mtime); - - /* Checksum 8 cannot be calculated until we've filled all other fields */ - - if (linktarget != NULL) - { - /* Type - Symbolic link */ - sprintf(&h[156], "2"); - /* Link Name 100 */ - strlcpy(&h[157], linktarget, 100); - } - else if (S_ISDIR(statbuf->st_mode)) - /* Type - directory */ - sprintf(&h[156], "5"); - else - /* Type - regular file */ - sprintf(&h[156], "0"); - - /* Magic 6 */ - sprintf(&h[257], "ustar"); - - /* Version 2 */ - sprintf(&h[263], "00"); - - /* User 32 */ - /* XXX: Do we need to care about setting correct username? */ - strlcpy(&h[265], "postgres", 32); - - /* Group 32 */ - /* XXX: Do we need to care about setting correct group name? */ - strlcpy(&h[297], "postgres", 32); - - /* Major Dev 8 */ - sprintf(&h[329], "%07o ", 0); - - /* Minor Dev 8 */ - sprintf(&h[337], "%07o ", 0); - - /* Prefix 155 - not used, leave as nulls */ - - /* - * We mustn't overwrite the next field while inserting the checksum. - * Fortunately, the checksum can't exceed 6 octal digits, so we just write - * 6 digits, a space, and a null, which is legal per POSIX. - */ - sprintf(&h[148], "%06o ", _tarChecksum(h)); + tarCreateHeader(h, filename, linktarget, statbuf->st_size, + statbuf->st_mode, statbuf->st_uid, statbuf->st_gid, + statbuf->st_mtime, + false /* write real POSIX header */); - /* Now send the completed header. */ pq_putmessage('d', h, 512); } |