diff options
Diffstat (limited to 'src/bin')
-rw-r--r-- | src/bin/initdb/initdb.c | 58 | ||||
-rw-r--r-- | src/bin/pg_basebackup/pg_basebackup.c | 7 | ||||
-rw-r--r-- | src/bin/pg_basebackup/pg_receivewal.c | 16 | ||||
-rw-r--r-- | src/bin/pg_basebackup/receivelog.c | 36 | ||||
-rw-r--r-- | src/bin/pg_basebackup/streamutil.c | 76 | ||||
-rw-r--r-- | src/bin/pg_basebackup/streamutil.h | 2 | ||||
-rw-r--r-- | src/bin/pg_controldata/pg_controldata.c | 15 | ||||
-rw-r--r-- | src/bin/pg_resetwal/pg_resetwal.c | 55 | ||||
-rw-r--r-- | src/bin/pg_rewind/parsexlog.c | 30 | ||||
-rw-r--r-- | src/bin/pg_rewind/pg_rewind.c | 12 | ||||
-rw-r--r-- | src/bin/pg_rewind/pg_rewind.h | 1 | ||||
-rw-r--r-- | src/bin/pg_test_fsync/pg_test_fsync.c | 7 | ||||
-rw-r--r-- | src/bin/pg_upgrade/test.sh | 4 | ||||
-rw-r--r-- | src/bin/pg_waldump/pg_waldump.c | 246 |
14 files changed, 431 insertions, 134 deletions
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 9d1e5d789f6..1d4a138618b 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -59,6 +59,7 @@ #include "sys/mman.h" #endif +#include "access/xlog_internal.h" #include "catalog/catalog.h" #include "catalog/pg_authid.h" #include "catalog/pg_class.h" @@ -141,6 +142,8 @@ static bool sync_only = false; static bool show_setting = false; static bool data_checksums = false; static char *xlog_dir = NULL; +static char *str_wal_segment_size_mb = NULL; +static int wal_segment_size_mb; /* internal vars */ @@ -1000,6 +1003,23 @@ test_config_settings(void) } /* + * Calculate the default wal_size with a "pretty" unit. + */ +static char * +pretty_wal_size(int segment_count) +{ + int sz = wal_segment_size_mb * segment_count; + char *result = pg_malloc(10); + + if ((sz % 1024) == 0) + snprintf(result, 10, "%dGB", sz / 1024); + else + snprintf(result, 10, "%dMB", sz); + + return result; +} + +/* * set up all the config files */ static void @@ -1043,6 +1063,15 @@ setup_config(void) conflines = replace_token(conflines, "#port = 5432", repltok); #endif + /* set default max_wal_size and min_wal_size */ + snprintf(repltok, sizeof(repltok), "min_wal_size = %s", + pretty_wal_size(DEFAULT_MIN_WAL_SEGS)); + conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok); + + snprintf(repltok, sizeof(repltok), "max_wal_size = %s", + pretty_wal_size(DEFAULT_MAX_WAL_SEGS)); + conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok); + snprintf(repltok, sizeof(repltok), "lc_messages = '%s'", escape_quotes(lc_messages)); conflines = replace_token(conflines, "#lc_messages = 'C'", repltok); @@ -1352,8 +1381,9 @@ bootstrap_template1(void) unsetenv("PGCLIENTENCODING"); snprintf(cmd, sizeof(cmd), - "\"%s\" --boot -x1 %s %s %s", + "\"%s\" --boot -x1 -X %u %s %s %s", backend_exec, + wal_segment_size_mb * (1024 * 1024), data_checksums ? "-k" : "", boot_options, debug ? "-d 5" : ""); @@ -2293,6 +2323,7 @@ usage(const char *progname) printf(_(" -U, --username=NAME database superuser name\n")); printf(_(" -W, --pwprompt prompt for a password for the new superuser\n")); printf(_(" -X, --waldir=WALDIR location for the write-ahead log directory\n")); + printf(_(" --wal-segsize=SIZE size of wal segment size\n")); printf(_("\nLess commonly used options:\n")); printf(_(" -d, --debug generate lots of debugging output\n")); printf(_(" -k, --data-checksums use data page checksums\n")); @@ -2983,6 +3014,7 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 'N'}, {"sync-only", no_argument, NULL, 'S'}, {"waldir", required_argument, NULL, 'X'}, + {"wal-segsize", required_argument, NULL, 12}, {"data-checksums", no_argument, NULL, 'k'}, {NULL, 0, NULL, 0} }; @@ -3116,6 +3148,9 @@ main(int argc, char *argv[]) case 'X': xlog_dir = pg_strdup(optarg); break; + case 12: + str_wal_segment_size_mb = pg_strdup(optarg); + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), @@ -3178,6 +3213,27 @@ main(int argc, char *argv[]) check_need_password(authmethodlocal, authmethodhost); + /* set wal segment size */ + if (str_wal_segment_size_mb == NULL) + wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); + else + { + char *endptr; + + /* check that the argument is a number */ + wal_segment_size_mb = strtol(str_wal_segment_size_mb, &endptr, 10); + + /* verify that wal segment size is valid */ + if (*endptr != '\0' || + !IsValidWalSegSize(wal_segment_size_mb * 1024 * 1024)) + { + fprintf(stderr, + _("%s: --wal-segsize must be a power of two between 1 and 1024\n"), + progname); + exit(1); + } + } + get_restricted_token(progname); setup_pgdata(); diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 51509d150e5..2d039d5a33a 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -26,6 +26,7 @@ #include <zlib.h> #endif +#include "access/xlog_internal.h" #include "common/file_utils.h" #include "common/string.h" #include "fe_utils/string_utils.h" @@ -555,7 +556,7 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier) } param->startptr = ((uint64) hi) << 32 | lo; /* Round off to even segment position */ - param->startptr -= param->startptr % XLOG_SEG_SIZE; + param->startptr -= XLogSegmentOffset(param->startptr, WalSegSz); #ifndef WIN32 /* Create our background pipe */ @@ -2397,6 +2398,10 @@ main(int argc, char **argv) exit(1); } + /* determine remote server's xlog segment size */ + if (!RetrieveWalSegSize(conn)) + disconnect_and_exit(1); + /* Create pg_wal symlink, if required */ if (xlog_dir) { diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c index 710a33ab4d2..fbac0df93d8 100644 --- a/src/bin/pg_basebackup/pg_receivewal.c +++ b/src/bin/pg_basebackup/pg_receivewal.c @@ -191,7 +191,7 @@ close_destination_dir(DIR *dest_dir, char *dest_folder) /* * Determine starting location for streaming, based on any existing xlog * segments in the directory. We start at the end of the last one that is - * complete (size matches XLogSegSize), on the timeline with highest ID. + * complete (size matches wal segment size), on the timeline with highest ID. * * If there are no WAL files in the directory, returns InvalidXLogRecPtr. */ @@ -242,7 +242,7 @@ FindStreamingStart(uint32 *tli) /* * Looks like an xlog file. Parse its position. */ - XLogFromFileName(dirent->d_name, &tli, &segno); + XLogFromFileName(dirent->d_name, &tli, &segno, WalSegSz); /* * Check that the segment has the right size, if it's supposed to be @@ -267,7 +267,7 @@ FindStreamingStart(uint32 *tli) disconnect_and_exit(1); } - if (statbuf.st_size != XLOG_SEG_SIZE) + if (statbuf.st_size != WalSegSz) { fprintf(stderr, _("%s: segment file \"%s\" has incorrect size %d, skipping\n"), @@ -308,7 +308,7 @@ FindStreamingStart(uint32 *tli) bytes_out = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; - if (bytes_out != XLOG_SEG_SIZE) + if (bytes_out != WalSegSz) { fprintf(stderr, _("%s: compressed segment file \"%s\" has incorrect uncompressed size %d, skipping\n"), @@ -349,7 +349,7 @@ FindStreamingStart(uint32 *tli) if (!high_ispartial) high_segno++; - XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr); + XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr, WalSegSz); *tli = high_tli; return high_ptr; @@ -410,7 +410,7 @@ StreamLog(void) /* * Always start streaming at the beginning of a segment */ - stream.startpos -= stream.startpos % XLOG_SEG_SIZE; + stream.startpos -= XLogSegmentOffset(stream.startpos, WalSegSz); /* * Start the replication @@ -689,6 +689,10 @@ main(int argc, char **argv) if (!RunIdentifySystem(conn, NULL, NULL, NULL, &db_name)) disconnect_and_exit(1); + /* determine remote server's xlog segment size */ + if (!RetrieveWalSegSize(conn)) + disconnect_and_exit(1); + /* * Check that there is a database associated with connection, none should * be defined in this context. diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index 888458f4a90..65931f64541 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -95,17 +95,17 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ssize_t size; XLogSegNo segno; - XLByteToSeg(startpoint, segno); - XLogFileName(current_walfile_name, stream->timeline, segno); + XLByteToSeg(startpoint, segno, WalSegSz); + XLogFileName(current_walfile_name, stream->timeline, segno, WalSegSz); snprintf(fn, sizeof(fn), "%s%s", current_walfile_name, stream->partial_suffix ? stream->partial_suffix : ""); /* * When streaming to files, if an existing file exists we verify that it's - * either empty (just created), or a complete XLogSegSize segment (in - * which case it has been created and padded). Anything else indicates a - * corrupt file. + * either empty (just created), or a complete WalSegSz segment (in which + * case it has been created and padded). Anything else indicates a corrupt + * file. * * When streaming to tar, no file with this name will exist before, so we * never have to verify a size. @@ -120,7 +120,7 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) progname, fn, stream->walmethod->getlasterror()); return false; } - if (size == XLogSegSize) + if (size == WalSegSz) { /* Already padded file. Open it for use */ f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, 0); @@ -154,7 +154,7 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ngettext("%s: write-ahead log file \"%s\" has %d byte, should be 0 or %d\n", "%s: write-ahead log file \"%s\" has %d bytes, should be 0 or %d\n", size), - progname, fn, (int) size, XLogSegSize); + progname, fn, (int) size, WalSegSz); return false; } /* File existed and was empty, so fall through and open */ @@ -162,7 +162,8 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) /* No file existed, so create one */ - f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, XLogSegSize); + f = stream->walmethod->open_for_write(current_walfile_name, + stream->partial_suffix, WalSegSz); if (f == NULL) { fprintf(stderr, @@ -203,7 +204,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) if (stream->partial_suffix) { - if (currpos == XLOG_SEG_SIZE) + if (currpos == WalSegSz) r = stream->walmethod->close(walfile, CLOSE_NORMAL); else { @@ -231,7 +232,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) * new node. This is in line with walreceiver.c always doing a * XLogArchiveForceDone() after a complete segment. */ - if (currpos == XLOG_SEG_SIZE && stream->mark_done) + if (currpos == WalSegSz && stream->mark_done) { /* writes error message if failed */ if (!mark_file_as_archived(stream, current_walfile_name)) @@ -676,7 +677,8 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream) * start streaming at the beginning of a segment. */ stream->timeline = newtimeline; - stream->startpos = stream->startpos - (stream->startpos % XLOG_SEG_SIZE); + stream->startpos = stream->startpos - + XLogSegmentOffset(stream->startpos, WalSegSz); continue; } else if (PQresultStatus(res) == PGRES_COMMAND_OK) @@ -1111,7 +1113,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, *blockpos = fe_recvint64(©buf[1]); /* Extract WAL location for this block */ - xlogoff = *blockpos % XLOG_SEG_SIZE; + xlogoff = XLogSegmentOffset(*blockpos, WalSegSz); /* * Verify that the initial location in the stream matches where we think @@ -1148,11 +1150,11 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, int bytes_to_write; /* - * If crossing a WAL boundary, only write up until we reach - * XLOG_SEG_SIZE. + * If crossing a WAL boundary, only write up until we reach wal + * segment size. */ - if (xlogoff + bytes_left > XLOG_SEG_SIZE) - bytes_to_write = XLOG_SEG_SIZE - xlogoff; + if (xlogoff + bytes_left > WalSegSz) + bytes_to_write = WalSegSz - xlogoff; else bytes_to_write = bytes_left; @@ -1182,7 +1184,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, xlogoff += bytes_to_write; /* Did we reach the end of a WAL segment? */ - if (*blockpos % XLOG_SEG_SIZE == 0) + if (XLogSegmentOffset(*blockpos, WalSegSz) == 0) { if (!close_walfile(stream, *blockpos)) /* Error message written in close_walfile() */ diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c index 9d40744a349..df17f60596a 100644 --- a/src/bin/pg_basebackup/streamutil.c +++ b/src/bin/pg_basebackup/streamutil.c @@ -25,12 +25,18 @@ #include "receivelog.h" #include "streamutil.h" +#include "access/xlog_internal.h" #include "pqexpbuffer.h" #include "common/fe_memutils.h" #include "datatype/timestamp.h" #define ERRCODE_DUPLICATE_OBJECT "42710" +uint32 WalSegSz; + +/* SHOW command for replication connection was introduced in version 10 */ +#define MINIMUM_VERSION_FOR_SHOW_CMD 100000 + const char *progname; char *connection_string = NULL; char *dbhost = NULL; @@ -232,6 +238,76 @@ GetConnection(void) } /* + * From version 10, explicitly set wal segment size using SHOW wal_segment_size + * since ControlFile is not accessible here. + */ +bool +RetrieveWalSegSize(PGconn *conn) +{ + PGresult *res; + char xlog_unit[3]; + int xlog_val, + multiplier = 1; + + /* check connection existence */ + Assert(conn != NULL); + + /* for previous versions set the default xlog seg size */ + if (PQserverVersion(conn) < MINIMUM_VERSION_FOR_SHOW_CMD) + { + WalSegSz = DEFAULT_XLOG_SEG_SIZE; + return true; + } + + res = PQexec(conn, "SHOW wal_segment_size"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, _("%s: could not send replication command \"%s\": %s\n"), + progname, "SHOW wal_segment_size", PQerrorMessage(conn)); + + PQclear(res); + return false; + } + if (PQntuples(res) != 1 || PQnfields(res) < 1) + { + fprintf(stderr, + _("%s: could not fetch WAL segment size: got %d rows and %d fields, expected %d rows and %d or more fields\n"), + progname, PQntuples(res), PQnfields(res), 1, 1); + + PQclear(res); + return false; + } + + /* fetch xlog value and unit from the result */ + if (sscanf(PQgetvalue(res, 0, 0), "%d%s", &xlog_val, xlog_unit) != 2) + { + fprintf(stderr, _("%s: WAL segment size could not be parsed\n"), + progname); + return false; + } + + /* set the multiplier based on unit to convert xlog_val to bytes */ + if (strcmp(xlog_unit, "MB") == 0) + multiplier = 1024 * 1024; + else if (strcmp(xlog_unit, "GB") == 0) + multiplier = 1024 * 1024 * 1024; + + /* convert and set WalSegSz */ + WalSegSz = xlog_val * multiplier; + + if (!IsValidWalSegSize(WalSegSz)) + { + fprintf(stderr, + _("%s: WAL segment size must be a power of two between 1MB and 1GB, but the remote server reported a value of %d bytes\n"), + progname, WalSegSz); + return false; + } + + PQclear(res); + return true; +} + +/* * Run IDENTIFY_SYSTEM through a given connection and give back to caller * some result information if requested: * - System identifier diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h index 6f6878679fc..ec227712d56 100644 --- a/src/bin/pg_basebackup/streamutil.h +++ b/src/bin/pg_basebackup/streamutil.h @@ -24,6 +24,7 @@ extern char *dbuser; extern char *dbport; extern char *dbname; extern int dbgetpassword; +extern uint32 WalSegSz; /* Connection kept global so we can disconnect easily */ extern PGconn *conn; @@ -39,6 +40,7 @@ extern bool RunIdentifySystem(PGconn *conn, char **sysid, TimeLineID *starttli, XLogRecPtr *startpos, char **db_name); +extern bool RetrieveWalSegSize(PGconn *conn); extern TimestampTz feGetCurrentTimestamp(void); extern void feTimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs); diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 2ea893179ab..8cc4fb03419 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -99,6 +99,7 @@ main(int argc, char *argv[]) char xlogfilename[MAXFNAMELEN]; int c; int i; + int WalSegSz; set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_controldata")); @@ -164,6 +165,15 @@ main(int argc, char *argv[]) "Either the file is corrupt, or it has a different layout than this program\n" "is expecting. The results below are untrustworthy.\n\n")); + /* set wal segment size */ + WalSegSz = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + fprintf(stderr, + _("WARNING: WAL segment size specified, %d bytes, is not a power of two between 1MB and 1GB.\n" + "The file is corrupt and the results below are untrustworthy.\n"), + WalSegSz); + /* * This slightly-chintzy coding will work as long as the control file * timestamps are within the range of time_t; that should be the case in @@ -184,8 +194,9 @@ main(int argc, char *argv[]) * Calculate name of the WAL file containing the latest checkpoint's REDO * start point. */ - XLByteToSeg(ControlFile->checkPointCopy.redo, segno); - XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, segno); + XLByteToSeg(ControlFile->checkPointCopy.redo, segno, WalSegSz); + XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, + segno, WalSegSz); /* * Format system_identifier and mock_authentication_nonce separately to diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index ac678317795..25d5547b36d 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -70,6 +70,7 @@ static MultiXactId set_mxid = 0; static MultiXactOffset set_mxoff = (MultiXactOffset) -1; static uint32 minXlogTli = 0; static XLogSegNo minXlogSegNo = 0; +static int WalSegSz; static void CheckDataVersion(void); static bool ReadControlFile(void); @@ -94,6 +95,7 @@ main(int argc, char *argv[]) char *endptr; char *endptr2; char *DataDir = NULL; + char *log_fname = NULL; int fd; set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetwal")); @@ -265,7 +267,12 @@ main(int argc, char *argv[]) fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } - XLogFromFileName(optarg, &minXlogTli, &minXlogSegNo); + + /* + * XLogFromFileName requires wal segment size which is not yet + * set. Hence wal details are set later on. + */ + log_fname = pg_strdup(optarg); break; default: @@ -350,6 +357,9 @@ main(int argc, char *argv[]) if (!ReadControlFile()) GuessControlValues(); + if (log_fname != NULL) + XLogFromFileName(log_fname, &minXlogTli, &minXlogSegNo, WalSegSz); + /* * Also look at existing segment files to set up newXlogSegNo */ @@ -573,18 +583,27 @@ ReadControlFile(void) offsetof(ControlFileData, crc)); FIN_CRC32C(crc); - if (EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc)) + if (!EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc)) { - /* Valid data... */ - memcpy(&ControlFile, buffer, sizeof(ControlFile)); - return true; + /* We will use the data but treat it as guessed. */ + fprintf(stderr, + _("%s: pg_control exists but has invalid CRC; proceed with caution\n"), + progname); + guessed = true; } - fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"), - progname); - /* We will use the data anyway, but treat it as guessed. */ memcpy(&ControlFile, buffer, sizeof(ControlFile)); - guessed = true; + WalSegSz = ControlFile.xlog_seg_size; + + /* return false if WalSegSz is not valid */ + if (!IsValidWalSegSize(WalSegSz)) + { + fprintf(stderr, + _("%s: pg_control specifies invalid WAL segment size (%d bytes); proceed with caution \n"), + progname, WalSegSz); + guessed = true; + } + return true; } @@ -660,7 +679,7 @@ GuessControlValues(void) ControlFile.blcksz = BLCKSZ; ControlFile.relseg_size = RELSEG_SIZE; ControlFile.xlog_blcksz = XLOG_BLCKSZ; - ControlFile.xlog_seg_size = XLOG_SEG_SIZE; + ControlFile.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE; ControlFile.nameDataLen = NAMEDATALEN; ControlFile.indexMaxKeys = INDEX_MAX_KEYS; ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE; @@ -773,7 +792,8 @@ PrintNewControlValues(void) /* This will be always printed in order to keep format same. */ printf(_("\n\nValues to be changed:\n\n")); - XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo); + XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, + newXlogSegNo, WalSegSz); printf(_("First log segment after reset: %s\n"), fname); if (set_mxid != 0) @@ -850,7 +870,7 @@ RewriteControlFile(void) * newXlogSegNo. */ XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD, - ControlFile.checkPointCopy.redo); + ControlFile.checkPointCopy.redo, WalSegSz); ControlFile.checkPointCopy.time = (pg_time_t) time(NULL); ControlFile.state = DB_SHUTDOWNED; @@ -877,7 +897,7 @@ RewriteControlFile(void) ControlFile.max_locks_per_xact = 64; /* Now we can force the recorded xlog seg size to the right thing. */ - ControlFile.xlog_seg_size = XLogSegSize; + ControlFile.xlog_seg_size = WalSegSz; /* Contents are protected with a CRC */ INIT_CRC32C(ControlFile.crc); @@ -1014,7 +1034,7 @@ FindEndOfXLOG(void) * are in virgin territory. */ xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size; - newXlogSegNo = (xlogbytepos + XLogSegSize - 1) / XLogSegSize; + newXlogSegNo = (xlogbytepos + WalSegSz - 1) / WalSegSz; newXlogSegNo++; } @@ -1151,7 +1171,7 @@ WriteEmptyXLOG(void) page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD; longpage = (XLogLongPageHeader) page; longpage->xlp_sysid = ControlFile.system_identifier; - longpage->xlp_seg_size = XLogSegSize; + longpage->xlp_seg_size = WalSegSz; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; /* Insert the initial checkpoint record */ @@ -1176,7 +1196,8 @@ WriteEmptyXLOG(void) record->xl_crc = crc; /* Write the first page */ - XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo); + XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, + newXlogSegNo, WalSegSz); unlink(path); @@ -1202,7 +1223,7 @@ WriteEmptyXLOG(void) /* Fill the rest of the file with zeroes */ memset(buffer, 0, XLOG_BLCKSZ); - for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) + for (nbytes = XLOG_BLCKSZ; nbytes < WalSegSz; nbytes += XLOG_BLCKSZ) { errno = 0; if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 1befdbdeea3..0fc71d2a135 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -69,7 +69,8 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -122,7 +123,8 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex) private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -170,11 +172,17 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, * header in that case to find the next record. */ if (forkptr % XLOG_BLCKSZ == 0) - forkptr += (forkptr % XLogSegSize == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD; + { + if (XLogSegmentOffset(forkptr, WalSegSz) == 0) + forkptr += SizeOfXLogLongPHD; + else + forkptr += SizeOfXLogShortPHD; + } private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -239,21 +247,22 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, XLogRecPtr targetSegEnd; XLogSegNo targetSegNo; - XLByteToSeg(targetPagePtr, targetSegNo); - XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd); - targetPageOff = targetPagePtr % XLogSegSize; + XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz); + XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz); + targetPageOff = XLogSegmentOffset(targetPagePtr, WalSegSz); /* * See if we need to switch to a new segment because the requested record * is not in the currently open one. */ - if (xlogreadfd >= 0 && !XLByteInSeg(targetPagePtr, xlogreadsegno)) + if (xlogreadfd >= 0 && + !XLByteInSeg(targetPagePtr, xlogreadsegno, WalSegSz)) { close(xlogreadfd); xlogreadfd = -1; } - XLByteToSeg(targetPagePtr, xlogreadsegno); + XLByteToSeg(targetPagePtr, xlogreadsegno, WalSegSz); if (xlogreadfd < 0) { @@ -272,7 +281,8 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, targetHistory[private->tliIndex].begin >= targetSegEnd) private->tliIndex--; - XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, xlogreadsegno); + XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, + xlogreadsegno, WalSegSz); snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s", private->datadir, xlogfname); diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 4bd1a759734..6079156e802 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -44,6 +44,7 @@ static ControlFileData ControlFile_target; static ControlFileData ControlFile_source; const char *progname; +int WalSegSz; /* Configuration options */ char *datadir_target = NULL; @@ -572,8 +573,8 @@ createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpo char buf[1000]; int len; - XLByteToSeg(startpoint, startsegno); - XLogFileName(xlogfilename, starttli, startsegno); + XLByteToSeg(startpoint, startsegno, WalSegSz); + XLogFileName(xlogfilename, starttli, startsegno, WalSegSz); /* * Construct backup label file @@ -631,6 +632,13 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) memcpy(ControlFile, src, sizeof(ControlFileData)); + /* set and validate WalSegSz */ + WalSegSz = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + pg_fatal("WAL segment size must be a power of two between 1MB and 1GB, but the control file specifies %d bytes\n", + WalSegSz); + /* Additional checks on control file */ checkControlFile(ControlFile); } diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index 31353dd3548..7bec34ff55d 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -24,6 +24,7 @@ extern char *connstr_source; extern bool debug; extern bool showprogress; extern bool dry_run; +extern int WalSegSz; /* Target history */ extern TimeLineHistoryEntry *targetHistory; diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c index c607b5371c0..e6f7ef85579 100644 --- a/src/bin/pg_test_fsync/pg_test_fsync.c +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -64,7 +64,7 @@ static const char *progname; static int secs_per_test = 5; static int needs_unlink = 0; -static char full_buf[XLOG_SEG_SIZE], +static char full_buf[DEFAULT_XLOG_SEG_SIZE], *buf, *filename = FSYNC_FILENAME; static struct timeval start_t, @@ -209,7 +209,7 @@ prepare_buf(void) int ops; /* write random data into buffer */ - for (ops = 0; ops < XLOG_SEG_SIZE; ops++) + for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++) full_buf[ops] = random(); buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); @@ -226,7 +226,8 @@ test_open(void) if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) die("could not open output file"); needs_unlink = 1; - if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) + if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) != + DEFAULT_XLOG_SEG_SIZE) die("write failed"); /* fsync now so that dirty buffers don't skew later tests */ diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh index f4556341f32..1bacf066aaf 100644 --- a/src/bin/pg_upgrade/test.sh +++ b/src/bin/pg_upgrade/test.sh @@ -20,7 +20,9 @@ unset MAKELEVEL # Run a given "initdb" binary and overlay the regression testing # authentication configuration. standard_initdb() { - "$1" -N + # To increase coverage of non-standard segment size without + # increase test runtime, run these tests with a lower setting. + "$1" -N --wal-segsize 1 if [ -n "$TEMP_CONFIG" -a -r "$TEMP_CONFIG" ] then cat "$TEMP_CONFIG" >> "$PGDATA/postgresql.conf" diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 5aa3233bd3d..53eca4c8e02 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -13,6 +13,7 @@ #include "postgres.h" #include <dirent.h> +#include <sys/stat.h> #include <unistd.h> #include "access/xlogreader.h" @@ -26,6 +27,8 @@ static const char *progname; +static int WalSegSz; + typedef struct XLogDumpPrivate { TimeLineID timeline; @@ -144,77 +147,166 @@ split_path(const char *path, char **dir, char **fname) } /* - * Try to find the file in several places: - * if directory == NULL: - * fname - * XLOGDIR / fname - * $PGDATA / XLOGDIR / fname - * else - * directory / fname - * directory / XLOGDIR / fname + * Open the file in the valid target directory. * * return a read only fd */ static int -fuzzy_open_file(const char *directory, const char *fname) +open_file_in_directory(const char *directory, const char *fname) { int fd = -1; char fpath[MAXPGPATH]; - if (directory == NULL) + Assert(directory != NULL); + + snprintf(fpath, MAXPGPATH, "%s/%s", directory, fname); + fd = open(fpath, O_RDONLY | PG_BINARY, 0); + + if (fd < 0 && errno != ENOENT) + fatal_error("could not open file \"%s\": %s", + fname, strerror(errno)); + return fd; +} + +/* + * Try to find fname in the given directory. Returns true if it is found, + * false otherwise. If fname is NULL, search the complete directory for any + * file with a valid WAL file name. If file is successfully opened, set the + * wal segment size. + */ +static bool +search_directory(char *directory, char *fname) +{ + int fd = -1; + DIR *xldir; + + /* open file if valid filename is provided */ + if (fname != NULL) + fd = open_file_in_directory(directory, fname); + + /* + * A valid file name is not passed, so search the complete directory. If + * we find any file whose name is a valid WAL file name then try to open + * it. If we cannot open it, bail out. + */ + else if ((xldir = opendir(directory)) != NULL) + { + struct dirent *xlde; + + while ((xlde = readdir(xldir)) != NULL) + { + if (IsXLogFileName(xlde->d_name)) + { + fd = open_file_in_directory(directory, xlde->d_name); + fname = xlde->d_name; + break; + } + } + + closedir(xldir); + } + + /* set WalSegSz if file is successfully opened */ + if (fd >= 0) + { + char buf[XLOG_BLCKSZ]; + + if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ) + { + XLogLongPageHeader longhdr = (XLogLongPageHeader) buf; + + WalSegSz = longhdr->xlp_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + fatal_error("WAL segment size must be a power of two between 1MB and 1GB, but the WAL file \"%s\" header specifies %d bytes", + fname, WalSegSz); + } + else + { + if (errno != 0) + fatal_error("could not read file \"%s\": %s", + fname, strerror(errno)); + else + fatal_error("not enough data in file \"%s\"", fname); + } + close(fd); + return true; + } + + return false; +} + +/* + * Identify the target directory and set WalSegSz. + * + * Try to find the file in several places: + * if directory != NULL: + * directory / + * directory / XLOGDIR / + * else + * . + * XLOGDIR / + * $PGDATA / XLOGDIR / + * + * Set the valid target directory in private->inpath. + */ +static void +identify_target_directory(XLogDumpPrivate *private, char *directory, + char *fname) +{ + char fpath[MAXPGPATH]; + + if (directory != NULL) + { + if (search_directory(directory, fname)) + { + private->inpath = strdup(directory); + return; + } + + /* directory / XLOGDIR */ + snprintf(fpath, MAXPGPATH, "%s/%s", directory, XLOGDIR); + if (search_directory(fpath, fname)) + { + private->inpath = strdup(fpath); + return; + } + } + else { const char *datadir; - /* fname */ - fd = open(fname, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - - /* XLOGDIR / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s", - XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; + /* current directory */ + if (search_directory(".", fname)) + { + private->inpath = strdup("."); + return; + } + /* XLOGDIR */ + if (search_directory(XLOGDIR, fname)) + { + private->inpath = strdup(XLOGDIR); + return; + } datadir = getenv("PGDATA"); - /* $PGDATA / XLOGDIR / fname */ + /* $PGDATA / XLOGDIR */ if (datadir != NULL) { - snprintf(fpath, MAXPGPATH, "%s/%s/%s", - datadir, XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; + snprintf(fpath, MAXPGPATH, "%s/%s", datadir, XLOGDIR); + if (search_directory(fpath, fname)) + { + private->inpath = strdup(fpath); + return; + } } } + + /* could not locate WAL file */ + if (fname) + fatal_error("could not locate WAL file \"%s\"", fname); else - { - /* directory / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s", - directory, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - - /* directory / XLOGDIR / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s/%s", - directory, XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - } - return -1; + fatal_error("could not find any WAL file"); } /* @@ -244,9 +336,9 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int segbytes; int readbytes; - startoff = recptr % XLogSegSize; + startoff = XLogSegmentOffset(recptr, WalSegSz); - if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo)) + if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, WalSegSz)) { char fname[MAXFNAMELEN]; int tries; @@ -255,9 +347,9 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, if (sendFile >= 0) close(sendFile); - XLByteToSeg(recptr, sendSegNo); + XLByteToSeg(recptr, sendSegNo, WalSegSz); - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); /* * In follow mode there is a short period of time after the server @@ -267,7 +359,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, */ for (tries = 0; tries < 10; tries++) { - sendFile = fuzzy_open_file(directory, fname); + sendFile = open_file_in_directory(directory, fname); if (sendFile >= 0) break; if (errno == ENOENT) @@ -298,7 +390,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int err = errno; char fname[MAXPGPATH]; - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); fatal_error("could not seek in log file %s to offset %u: %s", fname, startoff, strerror(err)); @@ -307,8 +399,8 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, } /* How many bytes are within this segment? */ - if (nbytes > (XLogSegSize - startoff)) - segbytes = XLogSegSize - startoff; + if (nbytes > (WalSegSz - startoff)) + segbytes = WalSegSz - startoff; else segbytes = nbytes; @@ -318,7 +410,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int err = errno; char fname[MAXPGPATH]; - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); fatal_error("could not read from log file %s, offset %u, length %d: %s", fname, sendOff, segbytes, strerror(err)); @@ -935,17 +1027,18 @@ main(int argc, char **argv) private.inpath, strerror(errno)); } - fd = fuzzy_open_file(private.inpath, fname); + identify_target_directory(&private, private.inpath, fname); + fd = open_file_in_directory(private.inpath, fname); if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); /* parse position from file */ - XLogFromFileName(fname, &private.timeline, &segno); + XLogFromFileName(fname, &private.timeline, &segno, WalSegSz); if (XLogRecPtrIsInvalid(private.startptr)) - XLogSegNoOffsetToRecPtr(segno, 0, private.startptr); - else if (!XLByteInSeg(private.startptr, segno)) + XLogSegNoOffsetToRecPtr(segno, 0, private.startptr, WalSegSz); + else if (!XLByteInSeg(private.startptr, segno, WalSegSz)) { fprintf(stderr, _("%s: start WAL location %X/%X is not inside file \"%s\"\n"), @@ -958,7 +1051,7 @@ main(int argc, char **argv) /* no second file specified, set end position */ if (!(optind + 1 < argc) && XLogRecPtrIsInvalid(private.endptr)) - XLogSegNoOffsetToRecPtr(segno + 1, 0, private.endptr); + XLogSegNoOffsetToRecPtr(segno + 1, 0, private.endptr, WalSegSz); /* parse ENDSEG if passed */ if (optind + 1 < argc) @@ -968,28 +1061,29 @@ main(int argc, char **argv) /* ignore directory, already have that */ split_path(argv[optind + 1], &directory, &fname); - fd = fuzzy_open_file(private.inpath, fname); + fd = open_file_in_directory(private.inpath, fname); if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); /* parse position from file */ - XLogFromFileName(fname, &private.timeline, &endsegno); + XLogFromFileName(fname, &private.timeline, &endsegno, WalSegSz); if (endsegno < segno) fatal_error("ENDSEG %s is before STARTSEG %s", argv[optind + 1], argv[optind]); if (XLogRecPtrIsInvalid(private.endptr)) - XLogSegNoOffsetToRecPtr(endsegno + 1, 0, private.endptr); + XLogSegNoOffsetToRecPtr(endsegno + 1, 0, private.endptr, + WalSegSz); /* set segno to endsegno for check of --end */ segno = endsegno; } - if (!XLByteInSeg(private.endptr, segno) && - private.endptr != (segno + 1) * XLogSegSize) + if (!XLByteInSeg(private.endptr, segno, WalSegSz) && + private.endptr != (segno + 1) * WalSegSz) { fprintf(stderr, _("%s: end WAL location %X/%X is not inside file \"%s\"\n"), @@ -1000,6 +1094,8 @@ main(int argc, char **argv) goto bad_argument; } } + else + identify_target_directory(&private, private.inpath, NULL); /* we don't know what to print */ if (XLogRecPtrIsInvalid(private.startptr)) @@ -1011,7 +1107,8 @@ main(int argc, char **argv) /* done with argument parsing, do the actual work */ /* we have everything we need, start reading */ - xlogreader_state = XLogReaderAllocate(XLogDumpReadPage, &private); + xlogreader_state = XLogReaderAllocate(WalSegSz, XLogDumpReadPage, + &private); if (!xlogreader_state) fatal_error("out of memory"); @@ -1028,7 +1125,8 @@ main(int argc, char **argv) * to the start of a record and also wasn't a pointer to the beginning of * a segment (e.g. we were used in file mode). */ - if (first_record != private.startptr && (private.startptr % XLogSegSize) != 0) + if (first_record != private.startptr && + XLogSegmentOffset(private.startptr, WalSegSz) != 0) printf(ngettext("first record is after %X/%X, at %X/%X, skipping over %u byte\n", "first record is after %X/%X, at %X/%X, skipping over %u bytes\n", (first_record - private.startptr)), |