diff options
Diffstat (limited to 'src')
39 files changed, 774 insertions, 379 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index ae832917ce2..bfd800bc16b 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1299,7 +1299,8 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len) XLogReaderState *xlogreader; char *errormsg; - xlogreader = XLogReaderAllocate(&read_local_xlog_page, NULL); + xlogreader = XLogReaderAllocate(wal_segment_size, &read_local_xlog_page, + NULL); if (!xlogreader) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 96ebf32a58a..051347163b5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -110,6 +110,8 @@ int wal_retrieve_retry_interval = 5000; bool XLOG_DEBUG = false; #endif +int wal_segment_size = DEFAULT_XLOG_SEG_SIZE; + /* * Number of WAL insertion locks to use. A higher value allows more insertions * to happen concurrently, but adds some CPU overhead to flushing the WAL, @@ -731,14 +733,16 @@ static ControlFileData *ControlFile = NULL; (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1)) /* - * These are the number of bytes in a WAL page and segment usable for WAL data. + * These are the number of bytes in a WAL page usable for WAL data. */ #define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD) -#define UsableBytesInSegment ((XLOG_SEG_SIZE / XLOG_BLCKSZ) * UsableBytesInPage - (SizeOfXLogLongPHD - SizeOfXLogShortPHD)) /* Convert min_wal_size_mb and max wal_size_mb to equivalent segment count */ -#define ConvertToXSegs(x) \ - (x / (XLOG_SEG_SIZE / (1024 * 1024))) +#define ConvertToXSegs(x, segsize) \ + (x / ((segsize) / (1024 * 1024))) + +/* The number of bytes in a WAL segment usable for WAL data. */ +static int UsableBytesInSegment; /* * Private, possibly out-of-date copy of shared LogwrtResult. @@ -1137,7 +1141,9 @@ XLogInsertRecord(XLogRecData *rdata, EndPos = StartPos + SizeOfXLogRecord; if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ) { - if (EndPos % XLOG_SEG_SIZE == EndPos % XLOG_BLCKSZ) + uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size); + + if (offset == EndPos % XLOG_BLCKSZ) EndPos += SizeOfXLogLongPHD; else EndPos += SizeOfXLogShortPHD; @@ -1170,7 +1176,7 @@ XLogInsertRecord(XLogRecData *rdata, appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len); if (!debug_reader) - debug_reader = XLogReaderAllocate(NULL, NULL); + debug_reader = XLogReaderAllocate(wal_segment_size, NULL, NULL); if (!debug_reader) { @@ -1296,7 +1302,7 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr) startbytepos = Insert->CurrBytePos; ptr = XLogBytePosToEndRecPtr(startbytepos); - if (ptr % XLOG_SEG_SIZE == 0) + if (XLogSegmentOffset(ptr, wal_segment_size) == 0) { SpinLockRelease(&Insert->insertpos_lck); *EndPos = *StartPos = ptr; @@ -1309,8 +1315,8 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr) *StartPos = XLogBytePosToRecPtr(startbytepos); *EndPos = XLogBytePosToEndRecPtr(endbytepos); - segleft = XLOG_SEG_SIZE - ((*EndPos) % XLOG_SEG_SIZE); - if (segleft != XLOG_SEG_SIZE) + segleft = wal_segment_size - XLogSegmentOffset(*EndPos, wal_segment_size); + if (segleft != wal_segment_size) { /* consume the rest of the segment */ *EndPos += segleft; @@ -1323,7 +1329,7 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr) *PrevPtr = XLogBytePosToRecPtr(prevbytepos); - Assert((*EndPos) % XLOG_SEG_SIZE == 0); + Assert(XLogSegmentOffset(*EndPos, wal_segment_size) == 0); Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos); Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos); Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos); @@ -1501,7 +1507,7 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD; /* skip over the page header */ - if (CurrPos % XLogSegSize == 0) + if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0) { CurrPos += SizeOfXLogLongPHD; currpos += SizeOfXLogLongPHD; @@ -1532,16 +1538,16 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, * allocated and zeroed in the WAL buffers so that when the caller (or * someone else) does XLogWrite(), it can really write out all the zeros. */ - if (isLogSwitch && CurrPos % XLOG_SEG_SIZE != 0) + if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0) { /* An xlog-switch record doesn't contain any data besides the header */ Assert(write_len == SizeOfXLogRecord); /* * We do this one page at a time, to make sure we don't deadlock - * against ourselves if wal_buffers < XLOG_SEG_SIZE. + * against ourselves if wal_buffers < wal_segment_size. */ - Assert(EndPos % XLogSegSize == 0); + Assert(XLogSegmentOffset(EndPos, wal_segment_size) == 0); /* Use up all the remaining space on the first page */ CurrPos += freespace; @@ -1866,10 +1872,10 @@ GetXLogBuffer(XLogRecPtr ptr) * the page header. */ if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD && - ptr % XLOG_SEG_SIZE > XLOG_BLCKSZ) + XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ) initializedUpto = ptr - SizeOfXLogShortPHD; else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD && - ptr % XLOG_SEG_SIZE < XLOG_BLCKSZ) + XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ) initializedUpto = ptr - SizeOfXLogLongPHD; else initializedUpto = ptr; @@ -1939,7 +1945,7 @@ XLogBytePosToRecPtr(uint64 bytepos) seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD; } - XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, result); + XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, result, wal_segment_size); return result; } @@ -1985,7 +1991,7 @@ XLogBytePosToEndRecPtr(uint64 bytepos) seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD; } - XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, result); + XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, result, wal_segment_size); return result; } @@ -2001,9 +2007,9 @@ XLogRecPtrToBytePos(XLogRecPtr ptr) uint32 offset; uint64 result; - XLByteToSeg(ptr, fullsegs); + XLByteToSeg(ptr, fullsegs, wal_segment_size); - fullpages = (ptr % XLOG_SEG_SIZE) / XLOG_BLCKSZ; + fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ; offset = ptr % XLOG_BLCKSZ; if (fullpages == 0) @@ -2168,12 +2174,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic) /* * If first page of an XLOG segment file, make it a long header. */ - if ((NewPage->xlp_pageaddr % XLogSegSize) == 0) + if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0) { XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage; NewLongPage->xlp_sysid = ControlFile->system_identifier; - NewLongPage->xlp_seg_size = XLogSegSize; + NewLongPage->xlp_seg_size = wal_segment_size; NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ; NewPage->xlp_info |= XLP_LONG_HEADER; } @@ -2220,7 +2226,8 @@ CalculateCheckpointSegments(void) * number of segments consumed between checkpoints. *------- */ - target = (double) ConvertToXSegs(max_wal_size_mb) / (2.0 + CheckPointCompletionTarget); + target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) / + (2.0 + CheckPointCompletionTarget); /* round down */ CheckPointSegments = (int) target; @@ -2260,8 +2267,10 @@ XLOGfileslop(XLogRecPtr PriorRedoPtr) * correspond to. Always recycle enough segments to meet the minimum, and * remove enough segments to stay below the maximum. */ - minSegNo = PriorRedoPtr / XLOG_SEG_SIZE + ConvertToXSegs(min_wal_size_mb) - 1; - maxSegNo = PriorRedoPtr / XLOG_SEG_SIZE + ConvertToXSegs(max_wal_size_mb) - 1; + minSegNo = PriorRedoPtr / wal_segment_size + + ConvertToXSegs(min_wal_size_mb, wal_segment_size) - 1; + maxSegNo = PriorRedoPtr / wal_segment_size + + ConvertToXSegs(max_wal_size_mb, wal_segment_size) - 1; /* * Between those limits, recycle enough segments to get us through to the @@ -2290,7 +2299,8 @@ XLOGfileslop(XLogRecPtr PriorRedoPtr) /* add 10% for good measure. */ distance *= 1.10; - recycleSegNo = (XLogSegNo) ceil(((double) PriorRedoPtr + distance) / XLOG_SEG_SIZE); + recycleSegNo = (XLogSegNo) ceil(((double) PriorRedoPtr + distance) / + wal_segment_size); if (recycleSegNo < minSegNo) recycleSegNo = minSegNo; @@ -2314,7 +2324,7 @@ XLogCheckpointNeeded(XLogSegNo new_segno) { XLogSegNo old_segno; - XLByteToSeg(RedoRecPtr, old_segno); + XLByteToSeg(RedoRecPtr, old_segno, wal_segment_size); if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1)) return true; @@ -2392,7 +2402,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) LogwrtResult.Write = EndPtr; ispartialpage = WriteRqst.Write < LogwrtResult.Write; - if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo)) + if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo, + wal_segment_size)) { /* * Switch to new logfile segment. We cannot have any pending @@ -2401,7 +2412,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) Assert(npages == 0); if (openLogFile >= 0) XLogFileClose(); - XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo); + XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo, + wal_segment_size); /* create/use new log file */ use_existent = true; @@ -2412,7 +2424,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) /* Make sure we have the current logfile open */ if (openLogFile < 0) { - XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo); + XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo, + wal_segment_size); openLogFile = XLogFileOpen(openLogSegNo); openLogOff = 0; } @@ -2422,7 +2435,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) { /* first of group */ startidx = curridx; - startoffset = (LogwrtResult.Write - XLOG_BLCKSZ) % XLogSegSize; + startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ, + wal_segment_size); } npages++; @@ -2435,7 +2449,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) last_iteration = WriteRqst.Write <= LogwrtResult.Write; finishing_seg = !ispartialpage && - (startoffset + npages * XLOG_BLCKSZ) >= XLogSegSize; + (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size; if (last_iteration || curridx == XLogCtl->XLogCacheBlck || @@ -2562,11 +2576,13 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) sync_method != SYNC_METHOD_OPEN_DSYNC) { if (openLogFile >= 0 && - !XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo)) + !XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo, + wal_segment_size)) XLogFileClose(); if (openLogFile < 0) { - XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo); + XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo, + wal_segment_size); openLogFile = XLogFileOpen(openLogSegNo); openLogOff = 0; } @@ -2982,7 +2998,8 @@ XLogBackgroundFlush(void) { if (openLogFile >= 0) { - if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo)) + if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo, + wal_segment_size)) { XLogFileClose(); } @@ -3161,7 +3178,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) int fd; int nbytes; - XLogFilePath(path, ThisTimeLineID, logsegno); + XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); /* * Try to use existent file (checkpoint maker may have created it already) @@ -3215,7 +3232,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) */ zbuffer = (char *) MAXALIGN(zbuffer_raw); memset(zbuffer, 0, XLOG_BLCKSZ); - for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) + for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) { errno = 0; pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); @@ -3332,7 +3349,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, /* * Open the source file */ - XLogFilePath(path, srcTLI, srcsegno); + XLogFilePath(path, srcTLI, srcsegno, wal_segment_size); srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0); if (srcfd < 0) ereport(ERROR, @@ -3357,7 +3374,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, /* * Do the data copying. */ - for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer)) + for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer)) { int nread; @@ -3467,7 +3484,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, char path[MAXPGPATH]; struct stat stat_buf; - XLogFilePath(path, ThisTimeLineID, *segno); + XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size); /* * We want to be sure that only one process does this at a time. @@ -3493,7 +3510,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, return false; } (*segno)++; - XLogFilePath(path, ThisTimeLineID, *segno); + XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size); } } @@ -3524,7 +3541,7 @@ XLogFileOpen(XLogSegNo segno) char path[MAXPGPATH]; int fd; - XLogFilePath(path, ThisTimeLineID, segno); + XLogFilePath(path, ThisTimeLineID, segno, wal_segment_size); fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method), S_IRUSR | S_IWUSR); @@ -3551,7 +3568,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, char path[MAXPGPATH]; int fd; - XLogFileName(xlogfname, tli, segno); + XLogFileName(xlogfname, tli, segno, wal_segment_size); switch (source) { @@ -3563,7 +3580,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, restoredFromArchive = RestoreArchivedFile(path, xlogfname, "RECOVERYXLOG", - XLogSegSize, + wal_segment_size, InRedo); if (!restoredFromArchive) return -1; @@ -3571,7 +3588,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, case XLOG_FROM_PG_WAL: case XLOG_FROM_STREAM: - XLogFilePath(path, tli, segno); + XLogFilePath(path, tli, segno, wal_segment_size); restoredFromArchive = false; break; @@ -3690,7 +3707,7 @@ XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source) } /* Couldn't find it. For simplicity, complain about front timeline */ - XLogFilePath(path, recoveryTargetTLI, segno); + XLogFilePath(path, recoveryTargetTLI, segno, wal_segment_size); errno = ENOENT; ereport(emode, (errcode_for_file_access(), @@ -3741,9 +3758,11 @@ PreallocXlogFiles(XLogRecPtr endptr) XLogSegNo _logSegNo; int lf; bool use_existent; + uint64 offset; - XLByteToPrevSeg(endptr, _logSegNo); - if ((endptr - 1) % XLogSegSize >= (uint32) (0.75 * XLogSegSize)) + XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size); + offset = XLogSegmentOffset(endptr - 1, wal_segment_size); + if (offset >= (uint32) (0.75 * wal_segment_size)) { _logSegNo++; use_existent = true; @@ -3774,7 +3793,7 @@ CheckXLogRemoved(XLogSegNo segno, TimeLineID tli) { char filename[MAXFNAMELEN]; - XLogFileName(filename, tli, segno); + XLogFileName(filename, tli, segno, wal_segment_size); ereport(ERROR, (errcode_for_file_access(), errmsg("requested WAL segment %s has already been removed", @@ -3811,7 +3830,7 @@ UpdateLastRemovedPtr(char *filename) uint32 tli; XLogSegNo segno; - XLogFromFileName(filename, &tli, &segno); + XLogFromFileName(filename, &tli, &segno, wal_segment_size); SpinLockAcquire(&XLogCtl->info_lck); if (segno > XLogCtl->lastRemovedSegNo) @@ -3845,7 +3864,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) * doesn't matter, we ignore that in the comparison. (During recovery, * ThisTimeLineID isn't set, so we can't use that.) */ - XLogFileName(lastoff, 0, segno); + XLogFileName(lastoff, 0, segno, wal_segment_size); elog(DEBUG2, "attempting to remove WAL segments older than log file %s", lastoff); @@ -3906,7 +3925,7 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI) char switchseg[MAXFNAMELEN]; XLogSegNo endLogSegNo; - XLByteToPrevSeg(switchpoint, endLogSegNo); + XLByteToPrevSeg(switchpoint, endLogSegNo, wal_segment_size); xldir = AllocateDir(XLOGDIR); if (xldir == NULL) @@ -3918,7 +3937,7 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI) /* * Construct a filename of the last segment to be kept. */ - XLogFileName(switchseg, newTLI, endLogSegNo); + XLogFileName(switchseg, newTLI, endLogSegNo, wal_segment_size); elog(DEBUG2, "attempting to remove WAL segments newer than log file %s", switchseg); @@ -3974,7 +3993,7 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) /* * Initialize info about where to try to recycle to. */ - XLByteToSeg(endptr, endlogSegNo); + XLByteToSeg(endptr, endlogSegNo, wal_segment_size); if (PriorRedoPtr == InvalidXLogRecPtr) recycleSegNo = endlogSegNo + 10; else @@ -4192,9 +4211,11 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, XLogSegNo segno; int32 offset; - XLByteToSeg(xlogreader->latestPagePtr, segno); - offset = xlogreader->latestPagePtr % XLogSegSize; - XLogFileName(fname, xlogreader->readPageTLI, segno); + XLByteToSeg(xlogreader->latestPagePtr, segno, wal_segment_size); + offset = XLogSegmentOffset(xlogreader->latestPagePtr, + wal_segment_size); + XLogFileName(fname, xlogreader->readPageTLI, segno, + wal_segment_size); ereport(emode_for_corrupt_record(emode, RecPtr ? RecPtr : EndRecPtr), (errmsg("unexpected timeline ID %u in log segment %s, offset %u", @@ -4399,7 +4420,7 @@ WriteControlFile(void) ControlFile->blcksz = BLCKSZ; ControlFile->relseg_size = RELSEG_SIZE; ControlFile->xlog_blcksz = XLOG_BLCKSZ; - ControlFile->xlog_seg_size = XLOG_SEG_SIZE; + ControlFile->xlog_seg_size = wal_segment_size; ControlFile->nameDataLen = NAMEDATALEN; ControlFile->indexMaxKeys = INDEX_MAX_KEYS; @@ -4467,6 +4488,7 @@ ReadControlFile(void) { pg_crc32c crc; int fd; + static char wal_segsz_str[20]; /* * Read data... @@ -4569,13 +4591,6 @@ ReadControlFile(void) " but the server was compiled with XLOG_BLCKSZ %d.", ControlFile->xlog_blcksz, XLOG_BLCKSZ), errhint("It looks like you need to recompile or initdb."))); - if (ControlFile->xlog_seg_size != XLOG_SEG_SIZE) - ereport(FATAL, - (errmsg("database files are incompatible with server"), - errdetail("The database cluster was initialized with XLOG_SEG_SIZE %d," - " but the server was compiled with XLOG_SEG_SIZE %d.", - ControlFile->xlog_seg_size, XLOG_SEG_SIZE), - errhint("It looks like you need to recompile or initdb."))); if (ControlFile->nameDataLen != NAMEDATALEN) ereport(FATAL, (errmsg("database files are incompatible with server"), @@ -4637,6 +4652,32 @@ ReadControlFile(void) errhint("It looks like you need to recompile or initdb."))); #endif + wal_segment_size = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(wal_segment_size)) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("WAL segment size must be a power of two between 1MB and 1GB, but the control file specifies %d bytes", + wal_segment_size))); + + snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size); + SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL, + PGC_S_OVERRIDE); + + /* check and update variables dependent on wal_segment_size */ + if (ConvertToXSegs(min_wal_size_mb, wal_segment_size) < 2) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"min_wal_size\" must be at least twice \"wal_segment_size\"."))); + + if (ConvertToXSegs(max_wal_size_mb, wal_segment_size) < 2) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"max_wal_size\" must be at least twice \"wal_segment_size\"."))); + + UsableBytesInSegment = + (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) - + (SizeOfXLogLongPHD - SizeOfXLogShortPHD); + + CalculateCheckpointSegments(); + /* Make the initdb settings visible as GUC variables, too */ SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no", PGC_INTERNAL, PGC_S_OVERRIDE); @@ -4757,8 +4798,8 @@ XLOGChooseNumBuffers(void) int xbuffers; xbuffers = NBuffers / 32; - if (xbuffers > XLOG_SEG_SIZE / XLOG_BLCKSZ) - xbuffers = XLOG_SEG_SIZE / XLOG_BLCKSZ; + if (xbuffers > (wal_segment_size / XLOG_BLCKSZ)) + xbuffers = (wal_segment_size / XLOG_BLCKSZ); if (xbuffers < 8) xbuffers = 8; return xbuffers; @@ -5034,7 +5075,7 @@ BootStrapXLOG(void) * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not * used, so that we can use 0/0 to mean "before any valid WAL segment". */ - checkPoint.redo = XLogSegSize + SizeOfXLogLongPHD; + checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD; checkPoint.ThisTimeLineID = ThisTimeLineID; checkPoint.PrevTimeLineID = ThisTimeLineID; checkPoint.fullPageWrites = fullPageWrites; @@ -5065,10 +5106,10 @@ BootStrapXLOG(void) page->xlp_magic = XLOG_PAGE_MAGIC; page->xlp_info = XLP_LONG_HEADER; page->xlp_tli = ThisTimeLineID; - page->xlp_pageaddr = XLogSegSize; + page->xlp_pageaddr = wal_segment_size; longpage = (XLogLongPageHeader) page; longpage->xlp_sysid = sysidentifier; - longpage->xlp_seg_size = XLogSegSize; + longpage->xlp_seg_size = wal_segment_size; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; /* Insert the initial checkpoint record */ @@ -5550,8 +5591,8 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog) * they are the same, but if the switch happens exactly at a segment * boundary, startLogSegNo will be endLogSegNo + 1. */ - XLByteToPrevSeg(endOfLog, endLogSegNo); - XLByteToSeg(endOfLog, startLogSegNo); + XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size); + XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size); /* * Initialize the starting WAL segment for the new timeline. If the switch @@ -5569,7 +5610,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog) * avoid emplacing a bogus file. */ XLogFileCopy(endLogSegNo, endTLI, endLogSegNo, - endOfLog % XLOG_SEG_SIZE); + XLogSegmentOffset(endOfLog, wal_segment_size)); } else { @@ -5593,7 +5634,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog) * Let's just make real sure there are not .ready or .done flags posted * for the new segment. */ - XLogFileName(xlogfname, ThisTimeLineID, startLogSegNo); + XLogFileName(xlogfname, ThisTimeLineID, startLogSegNo, wal_segment_size); XLogArchiveCleanup(xlogfname); /* @@ -6390,7 +6431,7 @@ StartupXLOG(void) /* Set up XLOG reader facility */ MemSet(&private, 0, sizeof(XLogPageReadPrivate)); - xlogreader = XLogReaderAllocate(&XLogPageRead, &private); + xlogreader = XLogReaderAllocate(wal_segment_size, &XLogPageRead, &private); if (!xlogreader) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), @@ -7523,7 +7564,7 @@ StartupXLOG(void) XLogRecPtr pageBeginPtr; pageBeginPtr = EndOfLog - (EndOfLog % XLOG_BLCKSZ); - Assert(readOff == pageBeginPtr % XLogSegSize); + Assert(readOff == XLogSegmentOffset(pageBeginPtr, wal_segment_size)); firstIdx = XLogRecPtrToBufIdx(EndOfLog); @@ -7672,13 +7713,14 @@ StartupXLOG(void) * restored from the archive to begin with, it's expected to have a * .done file). */ - if (EndOfLog % XLOG_SEG_SIZE != 0 && XLogArchivingActive()) + if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 && + XLogArchivingActive()) { char origfname[MAXFNAMELEN]; XLogSegNo endLogSegNo; - XLByteToPrevSeg(EndOfLog, endLogSegNo); - XLogFileName(origfname, EndOfLogTLI, endLogSegNo); + XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size); + XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size); if (!XLogArchiveIsReadyOrDone(origfname)) { @@ -7686,7 +7728,7 @@ StartupXLOG(void) char partialfname[MAXFNAMELEN]; char partialpath[MAXPGPATH]; - XLogFilePath(origpath, EndOfLogTLI, endLogSegNo); + XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size); snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname); snprintf(partialpath, MAXPGPATH, "%s.partial", origpath); @@ -8192,6 +8234,9 @@ InitXLOGAccess(void) ThisTimeLineID = XLogCtl->ThisTimeLineID; Assert(ThisTimeLineID != 0 || IsBootstrapProcessingMode()); + /* set wal_segment_size */ + wal_segment_size = ControlFile->xlog_seg_size; + /* Use GetRedoRecPtr to copy the RedoRecPtr safely */ (void) GetRedoRecPtr(); /* Also update our copy of doPageWrites. */ @@ -8522,7 +8567,7 @@ UpdateCheckPointDistanceEstimate(uint64 nbytes) * more. * * When checkpoints are triggered by max_wal_size, this should converge to - * CheckpointSegments * XLOG_SEG_SIZE, + * CheckpointSegments * wal_segment_size, * * Note: This doesn't pay any attention to what caused the checkpoint. * Checkpoints triggered manually with CHECKPOINT command, or by e.g. @@ -8721,7 +8766,7 @@ CreateCheckPoint(int flags) freespace = INSERT_FREESPACE(curInsert); if (freespace == 0) { - if (curInsert % XLogSegSize == 0) + if (XLogSegmentOffset(curInsert, wal_segment_size) == 0) curInsert += SizeOfXLogLongPHD; else curInsert += SizeOfXLogShortPHD; @@ -8955,7 +9000,7 @@ CreateCheckPoint(int flags) /* Update the average distance between checkpoints. */ UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr); - XLByteToSeg(PriorRedoPtr, _logSegNo); + XLByteToSeg(PriorRedoPtr, _logSegNo, wal_segment_size); KeepLogSeg(recptr, &_logSegNo); _logSegNo--; RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr); @@ -9283,7 +9328,7 @@ CreateRestartPoint(int flags) /* Update the average distance between checkpoints/restartpoints. */ UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr); - XLByteToSeg(PriorRedoPtr, _logSegNo); + XLByteToSeg(PriorRedoPtr, _logSegNo, wal_segment_size); /* * Get the current end of xlog replayed or received, whichever is @@ -9378,7 +9423,7 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) XLogSegNo segno; XLogRecPtr keep; - XLByteToSeg(recptr, segno); + XLByteToSeg(recptr, segno, wal_segment_size); keep = XLogGetReplicationSlotMinimumLSN(); /* compute limit for wal_keep_segments first */ @@ -9396,7 +9441,7 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) { XLogSegNo slotSegNo; - XLByteToSeg(keep, slotSegNo); + XLByteToSeg(keep, slotSegNo, wal_segment_size); if (slotSegNo <= 0) segno = 1; @@ -10179,7 +10224,7 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno) { char *result = palloc(MAXFNAMELEN); - XLogFileName(result, tli, segno); + XLogFileName(result, tli, segno, wal_segment_size); return result; } @@ -10433,8 +10478,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, WALInsertLockRelease(); } while (!gotUniqueStartpoint); - XLByteToSeg(startpoint, _logSegNo); - XLogFileName(xlogfilename, starttli, _logSegNo); + XLByteToSeg(startpoint, _logSegNo, wal_segment_size); + XLogFileName(xlogfilename, starttli, _logSegNo, wal_segment_size); /* * Construct tablespace_map file @@ -10985,8 +11030,8 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) */ RequestXLogSwitch(false); - XLByteToPrevSeg(stoppoint, _logSegNo); - XLogFileName(stopxlogfilename, stoptli, _logSegNo); + XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size); + XLogFileName(stopxlogfilename, stoptli, _logSegNo, wal_segment_size); /* Use the log timezone here, not the session timezone */ stamp_time = (pg_time_t) time(NULL); @@ -10997,9 +11042,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) /* * Write the backup history file */ - XLByteToSeg(startpoint, _logSegNo); + XLByteToSeg(startpoint, _logSegNo, wal_segment_size); BackupHistoryFilePath(histfilepath, stoptli, _logSegNo, - (uint32) (startpoint % XLogSegSize)); + startpoint, wal_segment_size); fp = AllocateFile(histfilepath, "w"); if (!fp) ereport(ERROR, @@ -11053,12 +11098,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ((!backup_started_in_recovery && XLogArchivingActive()) || (backup_started_in_recovery && XLogArchivingAlways()))) { - XLByteToPrevSeg(stoppoint, _logSegNo); - XLogFileName(lastxlogfilename, stoptli, _logSegNo); + XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size); + XLogFileName(lastxlogfilename, stoptli, _logSegNo, wal_segment_size); - XLByteToSeg(startpoint, _logSegNo); + XLByteToSeg(startpoint, _logSegNo, wal_segment_size); BackupHistoryFileName(histfilename, stoptli, _logSegNo, - (uint32) (startpoint % XLogSegSize)); + startpoint, wal_segment_size); seconds_before_warning = 60; waits = 0; @@ -11501,14 +11546,15 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, uint32 targetPageOff; XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY; - XLByteToSeg(targetPagePtr, targetSegNo); - targetPageOff = targetPagePtr % XLogSegSize; + XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size); + targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size); /* * See if we need to switch to a new segment because the requested record * is not in the currently open one. */ - if (readFile >= 0 && !XLByteInSeg(targetPagePtr, readSegNo)) + if (readFile >= 0 && + !XLByteInSeg(targetPagePtr, readSegNo, wal_segment_size)) { /* * Request a restartpoint if we've replayed too much xlog since the @@ -11529,7 +11575,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, readSource = 0; } - XLByteToSeg(targetPagePtr, readSegNo); + XLByteToSeg(targetPagePtr, readSegNo, wal_segment_size); retry: /* See if we need to retrieve more data */ @@ -11569,7 +11615,8 @@ retry: if (((targetPagePtr) / XLOG_BLCKSZ) != (receivedUpto / XLOG_BLCKSZ)) readLen = XLOG_BLCKSZ; else - readLen = receivedUpto % XLogSegSize - targetPageOff; + readLen = XLogSegmentOffset(receivedUpto, wal_segment_size) - + targetPageOff; } else readLen = XLOG_BLCKSZ; @@ -11580,7 +11627,7 @@ retry: { char fname[MAXFNAMELEN]; - XLogFileName(fname, curFileTLI, readSegNo); + XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size); ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen), (errcode_for_file_access(), errmsg("could not seek in log segment %s to offset %u: %m", @@ -11594,7 +11641,7 @@ retry: char fname[MAXFNAMELEN]; pgstat_report_wait_end(); - XLogFileName(fname, curFileTLI, readSegNo); + XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size); ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen), (errcode_for_file_access(), errmsg("could not read from log segment %s, offset %u: %m", diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index 7afb73579b0..c723c931d89 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -134,13 +134,14 @@ RestoreArchivedFile(char *path, const char *xlogfname, if (cleanupEnabled) { GetOldestRestartPoint(&restartRedoPtr, &restartTli); - XLByteToSeg(restartRedoPtr, restartSegNo); - XLogFileName(lastRestartPointFname, restartTli, restartSegNo); + XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size); + XLogFileName(lastRestartPointFname, restartTli, restartSegNo, + wal_segment_size); /* we shouldn't need anything earlier than last restart point */ Assert(strcmp(lastRestartPointFname, xlogfname) <= 0); } else - XLogFileName(lastRestartPointFname, 0, 0L); + XLogFileName(lastRestartPointFname, 0, 0L, wal_segment_size); /* * construct the command to be executed @@ -347,8 +348,9 @@ ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal) * archive, though there is no requirement to do so. */ GetOldestRestartPoint(&restartRedoPtr, &restartTli); - XLByteToSeg(restartRedoPtr, restartSegNo); - XLogFileName(lastRestartPointFname, restartTli, restartSegNo); + XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size); + XLogFileName(lastRestartPointFname, restartTli, restartSegNo, + wal_segment_size); /* * construct the command to be executed @@ -547,7 +549,7 @@ XLogArchiveNotifySeg(XLogSegNo segno) { char xlog[MAXFNAMELEN]; - XLogFileName(xlog, ThisTimeLineID, segno); + XLogFileName(xlog, ThisTimeLineID, segno, wal_segment_size); XLogArchiveNotify(xlog); } diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index f9b49ba4984..443ccd64112 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -489,8 +489,8 @@ pg_walfile_name_offset(PG_FUNCTION_ARGS) /* * xlogfilename */ - XLByteToPrevSeg(locationpoint, xlogsegno); - XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno); + XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size); + XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size); values[0] = CStringGetTextDatum(xlogfilename); isnull[0] = false; @@ -498,7 +498,7 @@ pg_walfile_name_offset(PG_FUNCTION_ARGS) /* * offset */ - xrecoff = locationpoint % XLogSegSize; + xrecoff = XLogSegmentOffset(locationpoint, wal_segment_size); values[1] = UInt32GetDatum(xrecoff); isnull[1] = false; @@ -530,8 +530,8 @@ pg_walfile_name(PG_FUNCTION_ARGS) errmsg("recovery is in progress"), errhint("pg_walfile_name() cannot be executed during recovery."))); - XLByteToPrevSeg(locationpoint, xlogsegno); - XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno); + XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size); + XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size); PG_RETURN_TEXT_P(cstring_to_text(xlogfilename)); } diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 0781a7b9de9..b1f9b90c50f 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -64,7 +64,8 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...) * Returns NULL if the xlogreader couldn't be allocated. */ XLogReaderState * -XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data) +XLogReaderAllocate(int wal_segment_size, XLogPageReadCB pagereadfunc, + void *private_data) { XLogReaderState *state; @@ -91,6 +92,7 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data) return NULL; } + state->wal_segment_size = wal_segment_size; state->read_page = pagereadfunc; /* system_identifier initialized to zeroes above */ state->private_data = private_data; @@ -466,8 +468,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg) (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH) { /* Pretend it extends to end of segment */ - state->EndRecPtr += XLogSegSize - 1; - state->EndRecPtr -= state->EndRecPtr % XLogSegSize; + state->EndRecPtr += state->wal_segment_size - 1; + state->EndRecPtr -= XLogSegmentOffset(state->EndRecPtr, state->wal_segment_size); } if (DecodeXLogRecord(state, record, errormsg)) @@ -509,8 +511,8 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) Assert((pageptr % XLOG_BLCKSZ) == 0); - XLByteToSeg(pageptr, targetSegNo); - targetPageOff = (pageptr % XLogSegSize); + XLByteToSeg(pageptr, targetSegNo, state->wal_segment_size); + targetPageOff = XLogSegmentOffset(pageptr, state->wal_segment_size); /* check whether we have all the requested data already */ if (targetSegNo == state->readSegNo && targetPageOff == state->readOff && @@ -719,16 +721,16 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, Assert((recptr % XLOG_BLCKSZ) == 0); - XLByteToSeg(recptr, segno); - offset = recptr % XLogSegSize; + XLByteToSeg(recptr, segno, state->wal_segment_size); + offset = XLogSegmentOffset(recptr, state->wal_segment_size); - XLogSegNoOffsetToRecPtr(segno, offset, recaddr); + XLogSegNoOffsetToRecPtr(segno, offset, recaddr, state->wal_segment_size); if (hdr->xlp_magic != XLOG_PAGE_MAGIC) { char fname[MAXFNAMELEN]; - XLogFileName(fname, state->readPageTLI, segno); + XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); report_invalid_record(state, "invalid magic number %04X in log segment %s, offset %u", @@ -742,7 +744,7 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, { char fname[MAXFNAMELEN]; - XLogFileName(fname, state->readPageTLI, segno); + XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); report_invalid_record(state, "invalid info bits %04X in log segment %s, offset %u", @@ -775,10 +777,10 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, fhdrident_str, sysident_str); return false; } - else if (longhdr->xlp_seg_size != XLogSegSize) + else if (longhdr->xlp_seg_size != state->wal_segment_size) { report_invalid_record(state, - "WAL file is from different database system: incorrect XLOG_SEG_SIZE in page header"); + "WAL file is from different database system: incorrect segment size in page header"); return false; } else if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ) @@ -792,7 +794,7 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, { char fname[MAXFNAMELEN]; - XLogFileName(fname, state->readPageTLI, segno); + XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); /* hmm, first page of file doesn't have a long header? */ report_invalid_record(state, @@ -807,7 +809,7 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, { char fname[MAXFNAMELEN]; - XLogFileName(fname, state->readPageTLI, segno); + XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); report_invalid_record(state, "unexpected pageaddr %X/%X in log segment %s, offset %u", @@ -832,7 +834,7 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr, { char fname[MAXFNAMELEN]; - XLogFileName(fname, state->readPageTLI, segno); + XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size); report_invalid_record(state, "out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u", diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index bbae733d658..b11c94c9b68 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -654,7 +654,8 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, * frontend). Probably these should be merged at some point. */ static void -XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) +XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr, + Size count) { char *p; XLogRecPtr recptr; @@ -666,6 +667,8 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) static TimeLineID sendTLI = 0; static uint32 sendOff = 0; + Assert(segsize == wal_segment_size); + p = buf; recptr = startptr; nbytes = count; @@ -676,10 +679,10 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) int segbytes; int readbytes; - startoff = recptr % XLogSegSize; + startoff = XLogSegmentOffset(recptr, segsize); /* Do we need to switch to a different xlog segment? */ - if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo) || + if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, segsize) || sendTLI != tli) { char path[MAXPGPATH]; @@ -687,9 +690,9 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) if (sendFile >= 0) close(sendFile); - XLByteToSeg(recptr, sendSegNo); + XLByteToSeg(recptr, sendSegNo, segsize); - XLogFilePath(path, tli, sendSegNo); + XLogFilePath(path, tli, sendSegNo, segsize); sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); @@ -717,7 +720,7 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) { char path[MAXPGPATH]; - XLogFilePath(path, tli, sendSegNo); + XLogFilePath(path, tli, sendSegNo, segsize); ereport(ERROR, (errcode_for_file_access(), @@ -728,8 +731,8 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) } /* How many bytes are within this segment? */ - if (nbytes > (XLogSegSize - startoff)) - segbytes = XLogSegSize - startoff; + if (nbytes > (segsize - startoff)) + segbytes = segsize - startoff; else segbytes = nbytes; @@ -740,7 +743,7 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) { char path[MAXPGPATH]; - XLogFilePath(path, tli, sendSegNo); + XLogFilePath(path, tli, sendSegNo, segsize); ereport(ERROR, (errcode_for_file_access(), @@ -798,7 +801,8 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) void XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength) { - const XLogRecPtr lastReadPage = state->readSegNo * XLogSegSize + state->readOff; + const XLogRecPtr lastReadPage = state->readSegNo * + state->wal_segment_size + state->readOff; Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0); Assert(wantLength <= XLOG_BLCKSZ); @@ -842,7 +846,8 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa if (state->currTLIValidUntil != InvalidXLogRecPtr && state->currTLI != ThisTimeLineID && state->currTLI != 0 && - (wantPage + wantLength) / XLogSegSize < state->currTLIValidUntil / XLogSegSize) + ((wantPage + wantLength) / state->wal_segment_size) < + (state->currTLIValidUntil / state->wal_segment_size)) return; /* @@ -864,9 +869,11 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa */ List *timelineHistory = readTimeLineHistory(ThisTimeLineID); - XLogRecPtr endOfSegment = (((wantPage / XLogSegSize) + 1) * XLogSegSize) - 1; + XLogRecPtr endOfSegment = (((wantPage / state->wal_segment_size) + 1) + * state->wal_segment_size) - 1; - Assert(wantPage / XLogSegSize == endOfSegment / XLogSegSize); + Assert(wantPage / state->wal_segment_size == + endOfSegment / state->wal_segment_size); /* * Find the timeline of the last LSN on the segment containing @@ -1014,7 +1021,8 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, * as 'count', read the whole page anyway. It's guaranteed to be * zero-padded up to the page boundary if it's incomplete. */ - XLogRead(cur_page, *pageTLI, targetPagePtr, XLOG_BLCKSZ); + XLogRead(cur_page, state->wal_segment_size, *pageTLI, targetPagePtr, + XLOG_BLCKSZ); /* number of valid bytes in the buffer */ return count; diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 0453fd4ac10..b14e6f79244 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -19,6 +19,7 @@ #include "access/htup_details.h" #include "access/xact.h" +#include "access/xlog_internal.h" #include "bootstrap/bootstrap.h" #include "catalog/index.h" #include "catalog/pg_collation.h" @@ -222,7 +223,7 @@ AuxiliaryProcessMain(int argc, char *argv[]) /* If no -x argument, we are a CheckerProcess */ MyAuxProcType = CheckerProcess; - while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:x:-:")) != -1) + while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:x:X:-:")) != -1) { switch (flag) { @@ -257,6 +258,18 @@ AuxiliaryProcessMain(int argc, char *argv[]) case 'x': MyAuxProcType = atoi(optarg); break; + case 'X': + { + int WalSegSz = strtoul(optarg, NULL, 0); + + if (!IsValidWalSegSize(WalSegSz)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("-X requires a power of 2 value between 1MB and 1GB"))); + SetConfigOption("wal_segment_size", optarg, PGC_INTERNAL, + PGC_S_OVERRIDE); + } + break; case 'c': case '-': { diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index e48ebd557ff..7e0af10c4dc 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -624,7 +624,7 @@ CheckArchiveTimeout(void) * If the returned pointer points exactly to a segment boundary, * assume nothing happened. */ - if ((switchpoint % XLogSegSize) != 0) + if (XLogSegmentOffset(switchpoint, wal_segment_size) != 0) elog(DEBUG1, "write-ahead log switch forced (archive_timeout=%d)", XLogArchiveTimeout); } @@ -782,7 +782,8 @@ IsCheckpointOnSchedule(double progress) recptr = GetXLogReplayRecPtr(NULL); else recptr = GetInsertRecPtr(); - elapsed_xlogs = (((double) (recptr - ckpt_start_recptr)) / XLogSegSize) / CheckPointSegments; + elapsed_xlogs = (((double) (recptr - ckpt_start_recptr)) / + wal_segment_size) / CheckPointSegments; if (progress < elapsed_xlogs) { diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 12a16bd773d..c3b9bddc8fe 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -357,10 +357,10 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) * shouldn't be such files, but if there are, there's little harm in * including them. */ - XLByteToSeg(startptr, startsegno); - XLogFileName(firstoff, ThisTimeLineID, startsegno); - XLByteToPrevSeg(endptr, endsegno); - XLogFileName(lastoff, ThisTimeLineID, endsegno); + XLByteToSeg(startptr, startsegno, wal_segment_size); + XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size); + XLByteToPrevSeg(endptr, endsegno, wal_segment_size); + XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size); dir = AllocateDir("pg_wal"); if (!dir) @@ -415,12 +415,13 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) * Sanity check: the first and last segment should cover startptr and * endptr, with no gaps in between. */ - XLogFromFileName(walFiles[0], &tli, &segno); + XLogFromFileName(walFiles[0], &tli, &segno, wal_segment_size); if (segno != startsegno) { char startfname[MAXFNAMELEN]; - XLogFileName(startfname, ThisTimeLineID, startsegno); + XLogFileName(startfname, ThisTimeLineID, startsegno, + wal_segment_size); ereport(ERROR, (errmsg("could not find WAL file \"%s\"", startfname))); } @@ -429,12 +430,13 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) XLogSegNo currsegno = segno; XLogSegNo nextsegno = segno + 1; - XLogFromFileName(walFiles[i], &tli, &segno); + XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size); if (!(nextsegno == segno || currsegno == segno)) { char nextfname[MAXFNAMELEN]; - XLogFileName(nextfname, ThisTimeLineID, nextsegno); + XLogFileName(nextfname, ThisTimeLineID, nextsegno, + wal_segment_size); ereport(ERROR, (errmsg("could not find WAL file \"%s\"", nextfname))); } @@ -443,7 +445,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) { char endfname[MAXFNAMELEN]; - XLogFileName(endfname, ThisTimeLineID, endsegno); + XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size); ereport(ERROR, (errmsg("could not find WAL file \"%s\"", endfname))); } @@ -457,7 +459,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) pgoff_t len = 0; snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]); - XLogFromFileName(walFiles[i], &tli, &segno); + XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size); fp = AllocateFile(pathbuf, "rb"); if (fp == NULL) @@ -479,7 +481,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathbuf))); - if (statbuf.st_size != XLogSegSize) + if (statbuf.st_size != wal_segment_size) { CheckXLogRemoved(segno, tli); ereport(ERROR, @@ -490,7 +492,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) /* send the WAL file itself */ _tarWriteHeader(pathbuf, NULL, &statbuf, false); - while ((cnt = fread(buf, 1, Min(sizeof(buf), XLogSegSize - len), fp)) > 0) + while ((cnt = fread(buf, 1, + Min(sizeof(buf), wal_segment_size - len), + fp)) > 0) { CheckXLogRemoved(segno, tli); /* Send the chunk as a CopyData message */ @@ -501,11 +505,11 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) len += cnt; throttle(cnt); - if (len == XLogSegSize) + if (len == wal_segment_size) break; } - if (len != XLogSegSize) + if (len != wal_segment_size) { CheckXLogRemoved(segno, tli); ereport(ERROR, @@ -513,7 +517,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) errmsg("unexpected WAL file size \"%s\"", walFiles[i]))); } - /* XLogSegSize is a multiple of 512, so no need for padding */ + /* wal_segment_size is a multiple of 512, so no need for padding */ FreeFile(fp); diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index efb9785f25e..bca585fc27c 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -163,7 +163,7 @@ StartupDecodingContext(List *output_plugin_options, ctx->slot = slot; - ctx->reader = XLogReaderAllocate(read_page, ctx); + ctx->reader = XLogReaderAllocate(wal_segment_size, read_page, ctx); if (!ctx->reader) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 657bafae579..68766d522d5 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2083,15 +2083,16 @@ ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) * store in segment in which it belongs by start lsn, don't split over * multiple segments tho */ - if (fd == -1 || !XLByteInSeg(change->lsn, curOpenSegNo)) + if (fd == -1 || + !XLByteInSeg(change->lsn, curOpenSegNo, wal_segment_size)) { XLogRecPtr recptr; if (fd != -1) CloseTransientFile(fd); - XLByteToSeg(change->lsn, curOpenSegNo); - XLogSegNoOffsetToRecPtr(curOpenSegNo, 0, recptr); + XLByteToSeg(change->lsn, curOpenSegNo, wal_segment_size); + XLogSegNoOffsetToRecPtr(curOpenSegNo, 0, recptr, wal_segment_size); /* * No need to care about TLIs here, only used during a single run, @@ -2319,7 +2320,7 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, txn->nentries_mem = 0; Assert(dlist_is_empty(&txn->changes)); - XLByteToSeg(txn->final_lsn, last_segno); + XLByteToSeg(txn->final_lsn, last_segno, wal_segment_size); while (restored < max_changes_in_memory && *segno <= last_segno) { @@ -2334,11 +2335,11 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, /* first time in */ if (*segno == 0) { - XLByteToSeg(txn->first_lsn, *segno); + XLByteToSeg(txn->first_lsn, *segno, wal_segment_size); } Assert(*segno != 0 || dlist_is_empty(&txn->changes)); - XLogSegNoOffsetToRecPtr(*segno, 0, recptr); + XLogSegNoOffsetToRecPtr(*segno, 0, recptr, wal_segment_size); /* * No need to care about TLIs here, only used during a single run, @@ -2575,8 +2576,8 @@ ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn) Assert(txn->first_lsn != InvalidXLogRecPtr); Assert(txn->final_lsn != InvalidXLogRecPtr); - XLByteToSeg(txn->first_lsn, first); - XLByteToSeg(txn->final_lsn, last); + XLByteToSeg(txn->first_lsn, first, wal_segment_size); + XLByteToSeg(txn->final_lsn, last, wal_segment_size); /* iterate over all possible filenames, and delete them */ for (cur = first; cur <= last; cur++) @@ -2584,7 +2585,7 @@ ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn) char path[MAXPGPATH]; XLogRecPtr recptr; - XLogSegNoOffsetToRecPtr(cur, 0, recptr); + XLogSegNoOffsetToRecPtr(cur, 0, recptr, wal_segment_size); sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap", NameStr(MyReplicationSlot->data.name), txn->xid, diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index a8a16f55e98..23de2577eff 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1039,7 +1039,7 @@ ReplicationSlotReserveWal(void) * the new restart_lsn above, so normally we should never need to loop * more than twice. */ - XLByteToSeg(slot->data.restart_lsn, segno); + XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size); if (XLogGetLastRemovedSegno() < segno) break; } diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index ea9d21a46b3..3474514adcc 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -613,7 +613,7 @@ WalReceiverMain(void) * Create .done file forcibly to prevent the streamed segment from * being archived later. */ - XLogFileName(xlogfname, recvFileTLI, recvSegNo); + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) XLogArchiveForceDone(xlogfname); else @@ -943,7 +943,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) { int segbytes; - if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo)) + if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) { bool use_existent; @@ -972,7 +972,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) * Create .done file forcibly to prevent the streamed segment * from being archived later. */ - XLogFileName(xlogfname, recvFileTLI, recvSegNo); + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) XLogArchiveForceDone(xlogfname); else @@ -981,7 +981,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) recvFile = -1; /* Create/use new log file */ - XLByteToSeg(recptr, recvSegNo); + XLByteToSeg(recptr, recvSegNo, wal_segment_size); use_existent = true; recvFile = XLogFileInit(recvSegNo, &use_existent, true); recvFileTLI = ThisTimeLineID; @@ -989,10 +989,10 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) } /* Calculate the start offset of the received logs */ - startoff = recptr % XLogSegSize; + startoff = XLogSegmentOffset(recptr, wal_segment_size); - if (startoff + nbytes > XLogSegSize) - segbytes = XLogSegSize - startoff; + if (startoff + nbytes > wal_segment_size) + segbytes = wal_segment_size - startoff; else segbytes = nbytes; diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c index 8ed7254b5c6..78f8693ece7 100644 --- a/src/backend/replication/walreceiverfuncs.c +++ b/src/backend/replication/walreceiverfuncs.c @@ -233,8 +233,8 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo, * being created by XLOG streaming, which might cause trouble later on if * the segment is e.g archived. */ - if (recptr % XLogSegSize != 0) - recptr -= recptr % XLogSegSize; + if (XLogSegmentOffset(recptr, wal_segment_size) != 0) + recptr -= XLogSegmentOffset(recptr, wal_segment_size); SpinLockAcquire(&walrcv->mutex); diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 1fbe8ed71b0..56999e93157 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -2316,9 +2316,9 @@ retry: int segbytes; int readbytes; - startoff = recptr % XLogSegSize; + startoff = XLogSegmentOffset(recptr, wal_segment_size); - if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo)) + if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, wal_segment_size)) { char path[MAXPGPATH]; @@ -2326,7 +2326,7 @@ retry: if (sendFile >= 0) close(sendFile); - XLByteToSeg(recptr, sendSegNo); + XLByteToSeg(recptr, sendSegNo, wal_segment_size); /*------- * When reading from a historic timeline, and there is a timeline @@ -2359,12 +2359,12 @@ retry: { XLogSegNo endSegNo; - XLByteToSeg(sendTimeLineValidUpto, endSegNo); + XLByteToSeg(sendTimeLineValidUpto, endSegNo, wal_segment_size); if (sendSegNo == endSegNo) curFileTimeLine = sendTimeLineNextTLI; } - XLogFilePath(path, curFileTimeLine, sendSegNo); + XLogFilePath(path, curFileTimeLine, sendSegNo, wal_segment_size); sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); if (sendFile < 0) @@ -2401,8 +2401,8 @@ retry: } /* How many bytes are within this segment? */ - if (nbytes > (XLogSegSize - startoff)) - segbytes = XLogSegSize - startoff; + if (nbytes > (wal_segment_size - startoff)) + segbytes = wal_segment_size - startoff; else segbytes = nbytes; @@ -2433,7 +2433,7 @@ retry: * read() succeeds in that case, but the data we tried to read might * already have been overwritten with new WAL records. */ - XLByteToSeg(startptr, segno); + XLByteToSeg(startptr, segno, wal_segment_size); CheckXLogRemoved(segno, ThisTimeLineID); /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index bc9f09a0868..e1fd446ce51 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -514,7 +514,6 @@ static int block_size; static int segment_size; static int wal_block_size; static bool data_checksums; -static int wal_segment_size; static bool integer_datetimes; static bool assert_enabled; @@ -714,9 +713,6 @@ typedef struct #if XLOG_BLCKSZ < 1024 || XLOG_BLCKSZ > (1024*1024) #error XLOG_BLCKSZ must be between 1KB and 1MB #endif -#if XLOG_SEG_SIZE < (1024*1024) || XLOG_SEG_SIZE > (1024*1024*1024) -#error XLOG_SEG_SIZE must be between 1MB and 1GB -#endif static const char *memory_units_hint = gettext_noop("Valid units for this parameter are \"kB\", \"MB\", \"GB\", and \"TB\"."); @@ -2264,7 +2260,8 @@ static struct config_int ConfigureNamesInt[] = GUC_UNIT_MB }, &min_wal_size_mb, - 5 * (XLOG_SEG_SIZE / (1024 * 1024)), 2, MAX_KILOBYTES, + DEFAULT_MIN_WAL_SEGS * (DEFAULT_XLOG_SEG_SIZE / (1024 * 1024)), + 2, MAX_KILOBYTES, NULL, NULL, NULL }, @@ -2275,7 +2272,8 @@ static struct config_int ConfigureNamesInt[] = GUC_UNIT_MB }, &max_wal_size_mb, - 64 * (XLOG_SEG_SIZE / (1024 * 1024)), 2, MAX_KILOBYTES, + DEFAULT_MAX_WAL_SEGS * (DEFAULT_XLOG_SEG_SIZE / (1024 * 1024)), + 2, MAX_KILOBYTES, NULL, assign_max_wal_size, NULL }, @@ -2637,14 +2635,14 @@ static struct config_int ConfigureNamesInt[] = { {"wal_segment_size", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("Shows the number of pages per write ahead log segment."), + gettext_noop("Shows the size of write ahead log segments."), NULL, - GUC_UNIT_XBLOCKS | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE + GUC_UNIT_BYTE | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE }, &wal_segment_size, - (XLOG_SEG_SIZE / XLOG_BLCKSZ), - (XLOG_SEG_SIZE / XLOG_BLCKSZ), - (XLOG_SEG_SIZE / XLOG_BLCKSZ), + DEFAULT_XLOG_SEG_SIZE, + WalSegMinSize, + WalSegMaxSize, NULL, NULL, NULL }, diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index 0dbfe7f952d..bc2ca8731d7 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -141,8 +141,9 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) * Calculate name of the WAL file containing the latest checkpoint's REDO * start point. */ - XLByteToSeg(ControlFile->checkPointCopy.redo, segno); - XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, segno); + XLByteToSeg(ControlFile->checkPointCopy.redo, segno, wal_segment_size); + XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, + segno, wal_segment_size); /* Populate the values and null arrays */ values[0] = LSNGetDatum(ControlFile->checkPoint); diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 53aa006df53..8ba6b1d08a5 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -234,7 +234,7 @@ #max_wal_senders = 10 # max number of walsender processes # (change requires restart) -#wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables +#wal_keep_segments = 0 # in logfile segments; 0 disables #wal_sender_timeout = 60s # in milliseconds; 0 disables #max_replication_slots = 10 # max number of replication slots diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 9d1e5d789f6..1d4a138618b 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -59,6 +59,7 @@ #include "sys/mman.h" #endif +#include "access/xlog_internal.h" #include "catalog/catalog.h" #include "catalog/pg_authid.h" #include "catalog/pg_class.h" @@ -141,6 +142,8 @@ static bool sync_only = false; static bool show_setting = false; static bool data_checksums = false; static char *xlog_dir = NULL; +static char *str_wal_segment_size_mb = NULL; +static int wal_segment_size_mb; /* internal vars */ @@ -1000,6 +1003,23 @@ test_config_settings(void) } /* + * Calculate the default wal_size with a "pretty" unit. + */ +static char * +pretty_wal_size(int segment_count) +{ + int sz = wal_segment_size_mb * segment_count; + char *result = pg_malloc(10); + + if ((sz % 1024) == 0) + snprintf(result, 10, "%dGB", sz / 1024); + else + snprintf(result, 10, "%dMB", sz); + + return result; +} + +/* * set up all the config files */ static void @@ -1043,6 +1063,15 @@ setup_config(void) conflines = replace_token(conflines, "#port = 5432", repltok); #endif + /* set default max_wal_size and min_wal_size */ + snprintf(repltok, sizeof(repltok), "min_wal_size = %s", + pretty_wal_size(DEFAULT_MIN_WAL_SEGS)); + conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok); + + snprintf(repltok, sizeof(repltok), "max_wal_size = %s", + pretty_wal_size(DEFAULT_MAX_WAL_SEGS)); + conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok); + snprintf(repltok, sizeof(repltok), "lc_messages = '%s'", escape_quotes(lc_messages)); conflines = replace_token(conflines, "#lc_messages = 'C'", repltok); @@ -1352,8 +1381,9 @@ bootstrap_template1(void) unsetenv("PGCLIENTENCODING"); snprintf(cmd, sizeof(cmd), - "\"%s\" --boot -x1 %s %s %s", + "\"%s\" --boot -x1 -X %u %s %s %s", backend_exec, + wal_segment_size_mb * (1024 * 1024), data_checksums ? "-k" : "", boot_options, debug ? "-d 5" : ""); @@ -2293,6 +2323,7 @@ usage(const char *progname) printf(_(" -U, --username=NAME database superuser name\n")); printf(_(" -W, --pwprompt prompt for a password for the new superuser\n")); printf(_(" -X, --waldir=WALDIR location for the write-ahead log directory\n")); + printf(_(" --wal-segsize=SIZE size of wal segment size\n")); printf(_("\nLess commonly used options:\n")); printf(_(" -d, --debug generate lots of debugging output\n")); printf(_(" -k, --data-checksums use data page checksums\n")); @@ -2983,6 +3014,7 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 'N'}, {"sync-only", no_argument, NULL, 'S'}, {"waldir", required_argument, NULL, 'X'}, + {"wal-segsize", required_argument, NULL, 12}, {"data-checksums", no_argument, NULL, 'k'}, {NULL, 0, NULL, 0} }; @@ -3116,6 +3148,9 @@ main(int argc, char *argv[]) case 'X': xlog_dir = pg_strdup(optarg); break; + case 12: + str_wal_segment_size_mb = pg_strdup(optarg); + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), @@ -3178,6 +3213,27 @@ main(int argc, char *argv[]) check_need_password(authmethodlocal, authmethodhost); + /* set wal segment size */ + if (str_wal_segment_size_mb == NULL) + wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); + else + { + char *endptr; + + /* check that the argument is a number */ + wal_segment_size_mb = strtol(str_wal_segment_size_mb, &endptr, 10); + + /* verify that wal segment size is valid */ + if (*endptr != '\0' || + !IsValidWalSegSize(wal_segment_size_mb * 1024 * 1024)) + { + fprintf(stderr, + _("%s: --wal-segsize must be a power of two between 1 and 1024\n"), + progname); + exit(1); + } + } + get_restricted_token(progname); setup_pgdata(); diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 51509d150e5..2d039d5a33a 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -26,6 +26,7 @@ #include <zlib.h> #endif +#include "access/xlog_internal.h" #include "common/file_utils.h" #include "common/string.h" #include "fe_utils/string_utils.h" @@ -555,7 +556,7 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier) } param->startptr = ((uint64) hi) << 32 | lo; /* Round off to even segment position */ - param->startptr -= param->startptr % XLOG_SEG_SIZE; + param->startptr -= XLogSegmentOffset(param->startptr, WalSegSz); #ifndef WIN32 /* Create our background pipe */ @@ -2397,6 +2398,10 @@ main(int argc, char **argv) exit(1); } + /* determine remote server's xlog segment size */ + if (!RetrieveWalSegSize(conn)) + disconnect_and_exit(1); + /* Create pg_wal symlink, if required */ if (xlog_dir) { diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c index 710a33ab4d2..fbac0df93d8 100644 --- a/src/bin/pg_basebackup/pg_receivewal.c +++ b/src/bin/pg_basebackup/pg_receivewal.c @@ -191,7 +191,7 @@ close_destination_dir(DIR *dest_dir, char *dest_folder) /* * Determine starting location for streaming, based on any existing xlog * segments in the directory. We start at the end of the last one that is - * complete (size matches XLogSegSize), on the timeline with highest ID. + * complete (size matches wal segment size), on the timeline with highest ID. * * If there are no WAL files in the directory, returns InvalidXLogRecPtr. */ @@ -242,7 +242,7 @@ FindStreamingStart(uint32 *tli) /* * Looks like an xlog file. Parse its position. */ - XLogFromFileName(dirent->d_name, &tli, &segno); + XLogFromFileName(dirent->d_name, &tli, &segno, WalSegSz); /* * Check that the segment has the right size, if it's supposed to be @@ -267,7 +267,7 @@ FindStreamingStart(uint32 *tli) disconnect_and_exit(1); } - if (statbuf.st_size != XLOG_SEG_SIZE) + if (statbuf.st_size != WalSegSz) { fprintf(stderr, _("%s: segment file \"%s\" has incorrect size %d, skipping\n"), @@ -308,7 +308,7 @@ FindStreamingStart(uint32 *tli) bytes_out = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; - if (bytes_out != XLOG_SEG_SIZE) + if (bytes_out != WalSegSz) { fprintf(stderr, _("%s: compressed segment file \"%s\" has incorrect uncompressed size %d, skipping\n"), @@ -349,7 +349,7 @@ FindStreamingStart(uint32 *tli) if (!high_ispartial) high_segno++; - XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr); + XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr, WalSegSz); *tli = high_tli; return high_ptr; @@ -410,7 +410,7 @@ StreamLog(void) /* * Always start streaming at the beginning of a segment */ - stream.startpos -= stream.startpos % XLOG_SEG_SIZE; + stream.startpos -= XLogSegmentOffset(stream.startpos, WalSegSz); /* * Start the replication @@ -689,6 +689,10 @@ main(int argc, char **argv) if (!RunIdentifySystem(conn, NULL, NULL, NULL, &db_name)) disconnect_and_exit(1); + /* determine remote server's xlog segment size */ + if (!RetrieveWalSegSize(conn)) + disconnect_and_exit(1); + /* * Check that there is a database associated with connection, none should * be defined in this context. diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index 888458f4a90..65931f64541 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -95,17 +95,17 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ssize_t size; XLogSegNo segno; - XLByteToSeg(startpoint, segno); - XLogFileName(current_walfile_name, stream->timeline, segno); + XLByteToSeg(startpoint, segno, WalSegSz); + XLogFileName(current_walfile_name, stream->timeline, segno, WalSegSz); snprintf(fn, sizeof(fn), "%s%s", current_walfile_name, stream->partial_suffix ? stream->partial_suffix : ""); /* * When streaming to files, if an existing file exists we verify that it's - * either empty (just created), or a complete XLogSegSize segment (in - * which case it has been created and padded). Anything else indicates a - * corrupt file. + * either empty (just created), or a complete WalSegSz segment (in which + * case it has been created and padded). Anything else indicates a corrupt + * file. * * When streaming to tar, no file with this name will exist before, so we * never have to verify a size. @@ -120,7 +120,7 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) progname, fn, stream->walmethod->getlasterror()); return false; } - if (size == XLogSegSize) + if (size == WalSegSz) { /* Already padded file. Open it for use */ f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, 0); @@ -154,7 +154,7 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ngettext("%s: write-ahead log file \"%s\" has %d byte, should be 0 or %d\n", "%s: write-ahead log file \"%s\" has %d bytes, should be 0 or %d\n", size), - progname, fn, (int) size, XLogSegSize); + progname, fn, (int) size, WalSegSz); return false; } /* File existed and was empty, so fall through and open */ @@ -162,7 +162,8 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) /* No file existed, so create one */ - f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, XLogSegSize); + f = stream->walmethod->open_for_write(current_walfile_name, + stream->partial_suffix, WalSegSz); if (f == NULL) { fprintf(stderr, @@ -203,7 +204,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) if (stream->partial_suffix) { - if (currpos == XLOG_SEG_SIZE) + if (currpos == WalSegSz) r = stream->walmethod->close(walfile, CLOSE_NORMAL); else { @@ -231,7 +232,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) * new node. This is in line with walreceiver.c always doing a * XLogArchiveForceDone() after a complete segment. */ - if (currpos == XLOG_SEG_SIZE && stream->mark_done) + if (currpos == WalSegSz && stream->mark_done) { /* writes error message if failed */ if (!mark_file_as_archived(stream, current_walfile_name)) @@ -676,7 +677,8 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream) * start streaming at the beginning of a segment. */ stream->timeline = newtimeline; - stream->startpos = stream->startpos - (stream->startpos % XLOG_SEG_SIZE); + stream->startpos = stream->startpos - + XLogSegmentOffset(stream->startpos, WalSegSz); continue; } else if (PQresultStatus(res) == PGRES_COMMAND_OK) @@ -1111,7 +1113,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, *blockpos = fe_recvint64(©buf[1]); /* Extract WAL location for this block */ - xlogoff = *blockpos % XLOG_SEG_SIZE; + xlogoff = XLogSegmentOffset(*blockpos, WalSegSz); /* * Verify that the initial location in the stream matches where we think @@ -1148,11 +1150,11 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, int bytes_to_write; /* - * If crossing a WAL boundary, only write up until we reach - * XLOG_SEG_SIZE. + * If crossing a WAL boundary, only write up until we reach wal + * segment size. */ - if (xlogoff + bytes_left > XLOG_SEG_SIZE) - bytes_to_write = XLOG_SEG_SIZE - xlogoff; + if (xlogoff + bytes_left > WalSegSz) + bytes_to_write = WalSegSz - xlogoff; else bytes_to_write = bytes_left; @@ -1182,7 +1184,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, xlogoff += bytes_to_write; /* Did we reach the end of a WAL segment? */ - if (*blockpos % XLOG_SEG_SIZE == 0) + if (XLogSegmentOffset(*blockpos, WalSegSz) == 0) { if (!close_walfile(stream, *blockpos)) /* Error message written in close_walfile() */ diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c index 9d40744a349..df17f60596a 100644 --- a/src/bin/pg_basebackup/streamutil.c +++ b/src/bin/pg_basebackup/streamutil.c @@ -25,12 +25,18 @@ #include "receivelog.h" #include "streamutil.h" +#include "access/xlog_internal.h" #include "pqexpbuffer.h" #include "common/fe_memutils.h" #include "datatype/timestamp.h" #define ERRCODE_DUPLICATE_OBJECT "42710" +uint32 WalSegSz; + +/* SHOW command for replication connection was introduced in version 10 */ +#define MINIMUM_VERSION_FOR_SHOW_CMD 100000 + const char *progname; char *connection_string = NULL; char *dbhost = NULL; @@ -232,6 +238,76 @@ GetConnection(void) } /* + * From version 10, explicitly set wal segment size using SHOW wal_segment_size + * since ControlFile is not accessible here. + */ +bool +RetrieveWalSegSize(PGconn *conn) +{ + PGresult *res; + char xlog_unit[3]; + int xlog_val, + multiplier = 1; + + /* check connection existence */ + Assert(conn != NULL); + + /* for previous versions set the default xlog seg size */ + if (PQserverVersion(conn) < MINIMUM_VERSION_FOR_SHOW_CMD) + { + WalSegSz = DEFAULT_XLOG_SEG_SIZE; + return true; + } + + res = PQexec(conn, "SHOW wal_segment_size"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, _("%s: could not send replication command \"%s\": %s\n"), + progname, "SHOW wal_segment_size", PQerrorMessage(conn)); + + PQclear(res); + return false; + } + if (PQntuples(res) != 1 || PQnfields(res) < 1) + { + fprintf(stderr, + _("%s: could not fetch WAL segment size: got %d rows and %d fields, expected %d rows and %d or more fields\n"), + progname, PQntuples(res), PQnfields(res), 1, 1); + + PQclear(res); + return false; + } + + /* fetch xlog value and unit from the result */ + if (sscanf(PQgetvalue(res, 0, 0), "%d%s", &xlog_val, xlog_unit) != 2) + { + fprintf(stderr, _("%s: WAL segment size could not be parsed\n"), + progname); + return false; + } + + /* set the multiplier based on unit to convert xlog_val to bytes */ + if (strcmp(xlog_unit, "MB") == 0) + multiplier = 1024 * 1024; + else if (strcmp(xlog_unit, "GB") == 0) + multiplier = 1024 * 1024 * 1024; + + /* convert and set WalSegSz */ + WalSegSz = xlog_val * multiplier; + + if (!IsValidWalSegSize(WalSegSz)) + { + fprintf(stderr, + _("%s: WAL segment size must be a power of two between 1MB and 1GB, but the remote server reported a value of %d bytes\n"), + progname, WalSegSz); + return false; + } + + PQclear(res); + return true; +} + +/* * Run IDENTIFY_SYSTEM through a given connection and give back to caller * some result information if requested: * - System identifier diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h index 6f6878679fc..ec227712d56 100644 --- a/src/bin/pg_basebackup/streamutil.h +++ b/src/bin/pg_basebackup/streamutil.h @@ -24,6 +24,7 @@ extern char *dbuser; extern char *dbport; extern char *dbname; extern int dbgetpassword; +extern uint32 WalSegSz; /* Connection kept global so we can disconnect easily */ extern PGconn *conn; @@ -39,6 +40,7 @@ extern bool RunIdentifySystem(PGconn *conn, char **sysid, TimeLineID *starttli, XLogRecPtr *startpos, char **db_name); +extern bool RetrieveWalSegSize(PGconn *conn); extern TimestampTz feGetCurrentTimestamp(void); extern void feTimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs); diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 2ea893179ab..8cc4fb03419 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -99,6 +99,7 @@ main(int argc, char *argv[]) char xlogfilename[MAXFNAMELEN]; int c; int i; + int WalSegSz; set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_controldata")); @@ -164,6 +165,15 @@ main(int argc, char *argv[]) "Either the file is corrupt, or it has a different layout than this program\n" "is expecting. The results below are untrustworthy.\n\n")); + /* set wal segment size */ + WalSegSz = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + fprintf(stderr, + _("WARNING: WAL segment size specified, %d bytes, is not a power of two between 1MB and 1GB.\n" + "The file is corrupt and the results below are untrustworthy.\n"), + WalSegSz); + /* * This slightly-chintzy coding will work as long as the control file * timestamps are within the range of time_t; that should be the case in @@ -184,8 +194,9 @@ main(int argc, char *argv[]) * Calculate name of the WAL file containing the latest checkpoint's REDO * start point. */ - XLByteToSeg(ControlFile->checkPointCopy.redo, segno); - XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, segno); + XLByteToSeg(ControlFile->checkPointCopy.redo, segno, WalSegSz); + XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID, + segno, WalSegSz); /* * Format system_identifier and mock_authentication_nonce separately to diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index ac678317795..25d5547b36d 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -70,6 +70,7 @@ static MultiXactId set_mxid = 0; static MultiXactOffset set_mxoff = (MultiXactOffset) -1; static uint32 minXlogTli = 0; static XLogSegNo minXlogSegNo = 0; +static int WalSegSz; static void CheckDataVersion(void); static bool ReadControlFile(void); @@ -94,6 +95,7 @@ main(int argc, char *argv[]) char *endptr; char *endptr2; char *DataDir = NULL; + char *log_fname = NULL; int fd; set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetwal")); @@ -265,7 +267,12 @@ main(int argc, char *argv[]) fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } - XLogFromFileName(optarg, &minXlogTli, &minXlogSegNo); + + /* + * XLogFromFileName requires wal segment size which is not yet + * set. Hence wal details are set later on. + */ + log_fname = pg_strdup(optarg); break; default: @@ -350,6 +357,9 @@ main(int argc, char *argv[]) if (!ReadControlFile()) GuessControlValues(); + if (log_fname != NULL) + XLogFromFileName(log_fname, &minXlogTli, &minXlogSegNo, WalSegSz); + /* * Also look at existing segment files to set up newXlogSegNo */ @@ -573,18 +583,27 @@ ReadControlFile(void) offsetof(ControlFileData, crc)); FIN_CRC32C(crc); - if (EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc)) + if (!EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc)) { - /* Valid data... */ - memcpy(&ControlFile, buffer, sizeof(ControlFile)); - return true; + /* We will use the data but treat it as guessed. */ + fprintf(stderr, + _("%s: pg_control exists but has invalid CRC; proceed with caution\n"), + progname); + guessed = true; } - fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"), - progname); - /* We will use the data anyway, but treat it as guessed. */ memcpy(&ControlFile, buffer, sizeof(ControlFile)); - guessed = true; + WalSegSz = ControlFile.xlog_seg_size; + + /* return false if WalSegSz is not valid */ + if (!IsValidWalSegSize(WalSegSz)) + { + fprintf(stderr, + _("%s: pg_control specifies invalid WAL segment size (%d bytes); proceed with caution \n"), + progname, WalSegSz); + guessed = true; + } + return true; } @@ -660,7 +679,7 @@ GuessControlValues(void) ControlFile.blcksz = BLCKSZ; ControlFile.relseg_size = RELSEG_SIZE; ControlFile.xlog_blcksz = XLOG_BLCKSZ; - ControlFile.xlog_seg_size = XLOG_SEG_SIZE; + ControlFile.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE; ControlFile.nameDataLen = NAMEDATALEN; ControlFile.indexMaxKeys = INDEX_MAX_KEYS; ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE; @@ -773,7 +792,8 @@ PrintNewControlValues(void) /* This will be always printed in order to keep format same. */ printf(_("\n\nValues to be changed:\n\n")); - XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo); + XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, + newXlogSegNo, WalSegSz); printf(_("First log segment after reset: %s\n"), fname); if (set_mxid != 0) @@ -850,7 +870,7 @@ RewriteControlFile(void) * newXlogSegNo. */ XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD, - ControlFile.checkPointCopy.redo); + ControlFile.checkPointCopy.redo, WalSegSz); ControlFile.checkPointCopy.time = (pg_time_t) time(NULL); ControlFile.state = DB_SHUTDOWNED; @@ -877,7 +897,7 @@ RewriteControlFile(void) ControlFile.max_locks_per_xact = 64; /* Now we can force the recorded xlog seg size to the right thing. */ - ControlFile.xlog_seg_size = XLogSegSize; + ControlFile.xlog_seg_size = WalSegSz; /* Contents are protected with a CRC */ INIT_CRC32C(ControlFile.crc); @@ -1014,7 +1034,7 @@ FindEndOfXLOG(void) * are in virgin territory. */ xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size; - newXlogSegNo = (xlogbytepos + XLogSegSize - 1) / XLogSegSize; + newXlogSegNo = (xlogbytepos + WalSegSz - 1) / WalSegSz; newXlogSegNo++; } @@ -1151,7 +1171,7 @@ WriteEmptyXLOG(void) page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD; longpage = (XLogLongPageHeader) page; longpage->xlp_sysid = ControlFile.system_identifier; - longpage->xlp_seg_size = XLogSegSize; + longpage->xlp_seg_size = WalSegSz; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; /* Insert the initial checkpoint record */ @@ -1176,7 +1196,8 @@ WriteEmptyXLOG(void) record->xl_crc = crc; /* Write the first page */ - XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo); + XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, + newXlogSegNo, WalSegSz); unlink(path); @@ -1202,7 +1223,7 @@ WriteEmptyXLOG(void) /* Fill the rest of the file with zeroes */ memset(buffer, 0, XLOG_BLCKSZ); - for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) + for (nbytes = XLOG_BLCKSZ; nbytes < WalSegSz; nbytes += XLOG_BLCKSZ) { errno = 0; if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 1befdbdeea3..0fc71d2a135 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -69,7 +69,8 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -122,7 +123,8 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex) private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -170,11 +172,17 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, * header in that case to find the next record. */ if (forkptr % XLOG_BLCKSZ == 0) - forkptr += (forkptr % XLogSegSize == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD; + { + if (XLogSegmentOffset(forkptr, WalSegSz) == 0) + forkptr += SizeOfXLogLongPHD; + else + forkptr += SizeOfXLogShortPHD; + } private.datadir = datadir; private.tliIndex = tliIndex; - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead, + &private); if (xlogreader == NULL) pg_fatal("out of memory\n"); @@ -239,21 +247,22 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, XLogRecPtr targetSegEnd; XLogSegNo targetSegNo; - XLByteToSeg(targetPagePtr, targetSegNo); - XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd); - targetPageOff = targetPagePtr % XLogSegSize; + XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz); + XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz); + targetPageOff = XLogSegmentOffset(targetPagePtr, WalSegSz); /* * See if we need to switch to a new segment because the requested record * is not in the currently open one. */ - if (xlogreadfd >= 0 && !XLByteInSeg(targetPagePtr, xlogreadsegno)) + if (xlogreadfd >= 0 && + !XLByteInSeg(targetPagePtr, xlogreadsegno, WalSegSz)) { close(xlogreadfd); xlogreadfd = -1; } - XLByteToSeg(targetPagePtr, xlogreadsegno); + XLByteToSeg(targetPagePtr, xlogreadsegno, WalSegSz); if (xlogreadfd < 0) { @@ -272,7 +281,8 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, targetHistory[private->tliIndex].begin >= targetSegEnd) private->tliIndex--; - XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, xlogreadsegno); + XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, + xlogreadsegno, WalSegSz); snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s", private->datadir, xlogfname); diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 4bd1a759734..6079156e802 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -44,6 +44,7 @@ static ControlFileData ControlFile_target; static ControlFileData ControlFile_source; const char *progname; +int WalSegSz; /* Configuration options */ char *datadir_target = NULL; @@ -572,8 +573,8 @@ createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpo char buf[1000]; int len; - XLByteToSeg(startpoint, startsegno); - XLogFileName(xlogfilename, starttli, startsegno); + XLByteToSeg(startpoint, startsegno, WalSegSz); + XLogFileName(xlogfilename, starttli, startsegno, WalSegSz); /* * Construct backup label file @@ -631,6 +632,13 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) memcpy(ControlFile, src, sizeof(ControlFileData)); + /* set and validate WalSegSz */ + WalSegSz = ControlFile->xlog_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + pg_fatal("WAL segment size must be a power of two between 1MB and 1GB, but the control file specifies %d bytes\n", + WalSegSz); + /* Additional checks on control file */ checkControlFile(ControlFile); } diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index 31353dd3548..7bec34ff55d 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -24,6 +24,7 @@ extern char *connstr_source; extern bool debug; extern bool showprogress; extern bool dry_run; +extern int WalSegSz; /* Target history */ extern TimeLineHistoryEntry *targetHistory; diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c index c607b5371c0..e6f7ef85579 100644 --- a/src/bin/pg_test_fsync/pg_test_fsync.c +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -64,7 +64,7 @@ static const char *progname; static int secs_per_test = 5; static int needs_unlink = 0; -static char full_buf[XLOG_SEG_SIZE], +static char full_buf[DEFAULT_XLOG_SEG_SIZE], *buf, *filename = FSYNC_FILENAME; static struct timeval start_t, @@ -209,7 +209,7 @@ prepare_buf(void) int ops; /* write random data into buffer */ - for (ops = 0; ops < XLOG_SEG_SIZE; ops++) + for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++) full_buf[ops] = random(); buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); @@ -226,7 +226,8 @@ test_open(void) if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) die("could not open output file"); needs_unlink = 1; - if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) + if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) != + DEFAULT_XLOG_SEG_SIZE) die("write failed"); /* fsync now so that dirty buffers don't skew later tests */ diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh index f4556341f32..1bacf066aaf 100644 --- a/src/bin/pg_upgrade/test.sh +++ b/src/bin/pg_upgrade/test.sh @@ -20,7 +20,9 @@ unset MAKELEVEL # Run a given "initdb" binary and overlay the regression testing # authentication configuration. standard_initdb() { - "$1" -N + # To increase coverage of non-standard segment size without + # increase test runtime, run these tests with a lower setting. + "$1" -N --wal-segsize 1 if [ -n "$TEMP_CONFIG" -a -r "$TEMP_CONFIG" ] then cat "$TEMP_CONFIG" >> "$PGDATA/postgresql.conf" diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 5aa3233bd3d..53eca4c8e02 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -13,6 +13,7 @@ #include "postgres.h" #include <dirent.h> +#include <sys/stat.h> #include <unistd.h> #include "access/xlogreader.h" @@ -26,6 +27,8 @@ static const char *progname; +static int WalSegSz; + typedef struct XLogDumpPrivate { TimeLineID timeline; @@ -144,77 +147,166 @@ split_path(const char *path, char **dir, char **fname) } /* - * Try to find the file in several places: - * if directory == NULL: - * fname - * XLOGDIR / fname - * $PGDATA / XLOGDIR / fname - * else - * directory / fname - * directory / XLOGDIR / fname + * Open the file in the valid target directory. * * return a read only fd */ static int -fuzzy_open_file(const char *directory, const char *fname) +open_file_in_directory(const char *directory, const char *fname) { int fd = -1; char fpath[MAXPGPATH]; - if (directory == NULL) + Assert(directory != NULL); + + snprintf(fpath, MAXPGPATH, "%s/%s", directory, fname); + fd = open(fpath, O_RDONLY | PG_BINARY, 0); + + if (fd < 0 && errno != ENOENT) + fatal_error("could not open file \"%s\": %s", + fname, strerror(errno)); + return fd; +} + +/* + * Try to find fname in the given directory. Returns true if it is found, + * false otherwise. If fname is NULL, search the complete directory for any + * file with a valid WAL file name. If file is successfully opened, set the + * wal segment size. + */ +static bool +search_directory(char *directory, char *fname) +{ + int fd = -1; + DIR *xldir; + + /* open file if valid filename is provided */ + if (fname != NULL) + fd = open_file_in_directory(directory, fname); + + /* + * A valid file name is not passed, so search the complete directory. If + * we find any file whose name is a valid WAL file name then try to open + * it. If we cannot open it, bail out. + */ + else if ((xldir = opendir(directory)) != NULL) + { + struct dirent *xlde; + + while ((xlde = readdir(xldir)) != NULL) + { + if (IsXLogFileName(xlde->d_name)) + { + fd = open_file_in_directory(directory, xlde->d_name); + fname = xlde->d_name; + break; + } + } + + closedir(xldir); + } + + /* set WalSegSz if file is successfully opened */ + if (fd >= 0) + { + char buf[XLOG_BLCKSZ]; + + if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ) + { + XLogLongPageHeader longhdr = (XLogLongPageHeader) buf; + + WalSegSz = longhdr->xlp_seg_size; + + if (!IsValidWalSegSize(WalSegSz)) + fatal_error("WAL segment size must be a power of two between 1MB and 1GB, but the WAL file \"%s\" header specifies %d bytes", + fname, WalSegSz); + } + else + { + if (errno != 0) + fatal_error("could not read file \"%s\": %s", + fname, strerror(errno)); + else + fatal_error("not enough data in file \"%s\"", fname); + } + close(fd); + return true; + } + + return false; +} + +/* + * Identify the target directory and set WalSegSz. + * + * Try to find the file in several places: + * if directory != NULL: + * directory / + * directory / XLOGDIR / + * else + * . + * XLOGDIR / + * $PGDATA / XLOGDIR / + * + * Set the valid target directory in private->inpath. + */ +static void +identify_target_directory(XLogDumpPrivate *private, char *directory, + char *fname) +{ + char fpath[MAXPGPATH]; + + if (directory != NULL) + { + if (search_directory(directory, fname)) + { + private->inpath = strdup(directory); + return; + } + + /* directory / XLOGDIR */ + snprintf(fpath, MAXPGPATH, "%s/%s", directory, XLOGDIR); + if (search_directory(fpath, fname)) + { + private->inpath = strdup(fpath); + return; + } + } + else { const char *datadir; - /* fname */ - fd = open(fname, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - - /* XLOGDIR / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s", - XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; + /* current directory */ + if (search_directory(".", fname)) + { + private->inpath = strdup("."); + return; + } + /* XLOGDIR */ + if (search_directory(XLOGDIR, fname)) + { + private->inpath = strdup(XLOGDIR); + return; + } datadir = getenv("PGDATA"); - /* $PGDATA / XLOGDIR / fname */ + /* $PGDATA / XLOGDIR */ if (datadir != NULL) { - snprintf(fpath, MAXPGPATH, "%s/%s/%s", - datadir, XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; + snprintf(fpath, MAXPGPATH, "%s/%s", datadir, XLOGDIR); + if (search_directory(fpath, fname)) + { + private->inpath = strdup(fpath); + return; + } } } + + /* could not locate WAL file */ + if (fname) + fatal_error("could not locate WAL file \"%s\"", fname); else - { - /* directory / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s", - directory, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - - /* directory / XLOGDIR / fname */ - snprintf(fpath, MAXPGPATH, "%s/%s/%s", - directory, XLOGDIR, fname); - fd = open(fpath, O_RDONLY | PG_BINARY, 0); - if (fd < 0 && errno != ENOENT) - return -1; - else if (fd >= 0) - return fd; - } - return -1; + fatal_error("could not find any WAL file"); } /* @@ -244,9 +336,9 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int segbytes; int readbytes; - startoff = recptr % XLogSegSize; + startoff = XLogSegmentOffset(recptr, WalSegSz); - if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo)) + if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, WalSegSz)) { char fname[MAXFNAMELEN]; int tries; @@ -255,9 +347,9 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, if (sendFile >= 0) close(sendFile); - XLByteToSeg(recptr, sendSegNo); + XLByteToSeg(recptr, sendSegNo, WalSegSz); - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); /* * In follow mode there is a short period of time after the server @@ -267,7 +359,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, */ for (tries = 0; tries < 10; tries++) { - sendFile = fuzzy_open_file(directory, fname); + sendFile = open_file_in_directory(directory, fname); if (sendFile >= 0) break; if (errno == ENOENT) @@ -298,7 +390,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int err = errno; char fname[MAXPGPATH]; - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); fatal_error("could not seek in log file %s to offset %u: %s", fname, startoff, strerror(err)); @@ -307,8 +399,8 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, } /* How many bytes are within this segment? */ - if (nbytes > (XLogSegSize - startoff)) - segbytes = XLogSegSize - startoff; + if (nbytes > (WalSegSz - startoff)) + segbytes = WalSegSz - startoff; else segbytes = nbytes; @@ -318,7 +410,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id, int err = errno; char fname[MAXPGPATH]; - XLogFileName(fname, timeline_id, sendSegNo); + XLogFileName(fname, timeline_id, sendSegNo, WalSegSz); fatal_error("could not read from log file %s, offset %u, length %d: %s", fname, sendOff, segbytes, strerror(err)); @@ -935,17 +1027,18 @@ main(int argc, char **argv) private.inpath, strerror(errno)); } - fd = fuzzy_open_file(private.inpath, fname); + identify_target_directory(&private, private.inpath, fname); + fd = open_file_in_directory(private.inpath, fname); if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); /* parse position from file */ - XLogFromFileName(fname, &private.timeline, &segno); + XLogFromFileName(fname, &private.timeline, &segno, WalSegSz); if (XLogRecPtrIsInvalid(private.startptr)) - XLogSegNoOffsetToRecPtr(segno, 0, private.startptr); - else if (!XLByteInSeg(private.startptr, segno)) + XLogSegNoOffsetToRecPtr(segno, 0, private.startptr, WalSegSz); + else if (!XLByteInSeg(private.startptr, segno, WalSegSz)) { fprintf(stderr, _("%s: start WAL location %X/%X is not inside file \"%s\"\n"), @@ -958,7 +1051,7 @@ main(int argc, char **argv) /* no second file specified, set end position */ if (!(optind + 1 < argc) && XLogRecPtrIsInvalid(private.endptr)) - XLogSegNoOffsetToRecPtr(segno + 1, 0, private.endptr); + XLogSegNoOffsetToRecPtr(segno + 1, 0, private.endptr, WalSegSz); /* parse ENDSEG if passed */ if (optind + 1 < argc) @@ -968,28 +1061,29 @@ main(int argc, char **argv) /* ignore directory, already have that */ split_path(argv[optind + 1], &directory, &fname); - fd = fuzzy_open_file(private.inpath, fname); + fd = open_file_in_directory(private.inpath, fname); if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); /* parse position from file */ - XLogFromFileName(fname, &private.timeline, &endsegno); + XLogFromFileName(fname, &private.timeline, &endsegno, WalSegSz); if (endsegno < segno) fatal_error("ENDSEG %s is before STARTSEG %s", argv[optind + 1], argv[optind]); if (XLogRecPtrIsInvalid(private.endptr)) - XLogSegNoOffsetToRecPtr(endsegno + 1, 0, private.endptr); + XLogSegNoOffsetToRecPtr(endsegno + 1, 0, private.endptr, + WalSegSz); /* set segno to endsegno for check of --end */ segno = endsegno; } - if (!XLByteInSeg(private.endptr, segno) && - private.endptr != (segno + 1) * XLogSegSize) + if (!XLByteInSeg(private.endptr, segno, WalSegSz) && + private.endptr != (segno + 1) * WalSegSz) { fprintf(stderr, _("%s: end WAL location %X/%X is not inside file \"%s\"\n"), @@ -1000,6 +1094,8 @@ main(int argc, char **argv) goto bad_argument; } } + else + identify_target_directory(&private, private.inpath, NULL); /* we don't know what to print */ if (XLogRecPtrIsInvalid(private.startptr)) @@ -1011,7 +1107,8 @@ main(int argc, char **argv) /* done with argument parsing, do the actual work */ /* we have everything we need, start reading */ - xlogreader_state = XLogReaderAllocate(XLogDumpReadPage, &private); + xlogreader_state = XLogReaderAllocate(WalSegSz, XLogDumpReadPage, + &private); if (!xlogreader_state) fatal_error("out of memory"); @@ -1028,7 +1125,8 @@ main(int argc, char **argv) * to the start of a record and also wasn't a pointer to the beginning of * a segment (e.g. we were used in file mode). */ - if (first_record != private.startptr && (private.startptr % XLogSegSize) != 0) + if (first_record != private.startptr && + XLogSegmentOffset(private.startptr, WalSegSz) != 0) printf(ngettext("first record is after %X/%X, at %X/%X, skipping over %u byte\n", "first record is after %X/%X, at %X/%X, skipping over %u bytes\n", (first_record - private.startptr)), diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 7213af0e813..0f2b8bd53fb 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -94,6 +94,7 @@ extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd; extern bool reachedConsistency; /* these variables are GUC parameters related to XLOG */ +extern int wal_segment_size; extern int min_wal_size_mb; extern int max_wal_size_mb; extern int wal_keep_segments; diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 7453dcbd0eb..22a8e63658e 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -85,15 +85,27 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; #define XLogPageHeaderSize(hdr) \ (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) -/* - * The XLOG is split into WAL segments (physical files) of the size indicated - * by XLOG_SEG_SIZE. - */ -#define XLogSegSize ((uint32) XLOG_SEG_SIZE) -#define XLogSegmentsPerXLogId (UINT64CONST(0x100000000) / XLOG_SEG_SIZE) +/* wal_segment_size can range from 1MB to 1GB */ +#define WalSegMinSize 1024 * 1024 +#define WalSegMaxSize 1024 * 1024 * 1024 +/* default number of min and max wal segments */ +#define DEFAULT_MIN_WAL_SEGS 5 +#define DEFAULT_MAX_WAL_SEGS 64 + +/* check that the given size is a valid wal_segment_size */ +#define IsPowerOf2(x) (x > 0 && ((x) & ((x)-1)) == 0) +#define IsValidWalSegSize(size) \ + (IsPowerOf2(size) && \ + ((size) >= WalSegMinSize && (size) <= WalSegMaxSize)) + +#define XLogSegmentsPerXLogId(wal_segsz_bytes) \ + (UINT64CONST(0x100000000) / (wal_segsz_bytes)) + +#define XLogSegNoOffsetToRecPtr(segno, offset, dest, wal_segsz_bytes) \ + (dest) = (segno) * (wal_segsz_bytes) + (offset) -#define XLogSegNoOffsetToRecPtr(segno, offset, dest) \ - (dest) = (segno) * XLOG_SEG_SIZE + (offset) +#define XLogSegmentOffset(xlogptr, wal_segsz_bytes) \ + ((xlogptr) & ((wal_segsz_bytes) - 1)) /* * Compute a segment number from an XLogRecPtr. @@ -103,11 +115,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; * for deciding which segment to write given a pointer to a record end, * for example. */ -#define XLByteToSeg(xlrp, logSegNo) \ - logSegNo = (xlrp) / XLogSegSize +#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes) \ + logSegNo = (xlrp) / (wal_segsz_bytes) -#define XLByteToPrevSeg(xlrp, logSegNo) \ - logSegNo = ((xlrp) - 1) / XLogSegSize +#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes) \ + logSegNo = ((xlrp) - 1) / (wal_segsz_bytes) /* * Is an XLogRecPtr within a particular XLOG segment? @@ -115,11 +127,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; * For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg, * a boundary byte is taken to be in the previous segment. */ -#define XLByteInSeg(xlrp, logSegNo) \ - (((xlrp) / XLogSegSize) == (logSegNo)) +#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes) \ + (((xlrp) / (wal_segsz_bytes)) == (logSegNo)) -#define XLByteInPrevSeg(xlrp, logSegNo) \ - ((((xlrp) - 1) / XLogSegSize) == (logSegNo)) +#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes) \ + ((((xlrp) - 1) / (wal_segsz_bytes)) == (logSegNo)) /* Check if an XLogRecPtr value is in a plausible range */ #define XRecOffIsValid(xlrp) \ @@ -140,10 +152,10 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; /* Length of XLog file name */ #define XLOG_FNAME_LEN 24 -#define XLogFileName(fname, tli, logSegNo) \ +#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes) \ snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, \ - (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \ - (uint32) ((logSegNo) % XLogSegmentsPerXLogId)) + (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes))) #define XLogFileNameById(fname, tli, log, seg) \ snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, log, seg) @@ -162,18 +174,18 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ strcmp((fname) + XLOG_FNAME_LEN, ".partial") == 0) -#define XLogFromFileName(fname, tli, logSegNo) \ +#define XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \ do { \ uint32 log; \ uint32 seg; \ sscanf(fname, "%08X%08X%08X", tli, &log, &seg); \ - *logSegNo = (uint64) log * XLogSegmentsPerXLogId + seg; \ + *logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg; \ } while (0) -#define XLogFilePath(path, tli, logSegNo) \ - snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X", tli, \ - (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \ - (uint32) ((logSegNo) % XLogSegmentsPerXLogId)) +#define XLogFilePath(path, tli, logSegNo, wal_segsz_bytes) \ + snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X", tli, \ + (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes))) #define TLHistoryFileName(fname, tli) \ snprintf(fname, MAXFNAMELEN, "%08X.history", tli) @@ -189,20 +201,22 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; #define StatusFilePath(path, xlog, suffix) \ snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix) -#define BackupHistoryFileName(fname, tli, logSegNo, offset) \ +#define BackupHistoryFileName(fname, tli, logSegNo, startpoint, wal_segsz_bytes) \ snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, \ - (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \ - (uint32) ((logSegNo) % XLogSegmentsPerXLogId), offset) + (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) (XLogSegmentOffset(startpoint, wal_segsz_bytes))) #define IsBackupHistoryFileName(fname) \ (strlen(fname) > XLOG_FNAME_LEN && \ strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ strcmp((fname) + strlen(fname) - strlen(".backup"), ".backup") == 0) -#define BackupHistoryFilePath(path, tli, logSegNo, offset) \ +#define BackupHistoryFilePath(path, tli, logSegNo, startpoint, wal_segsz_bytes) \ snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X.%08X.backup", tli, \ - (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \ - (uint32) ((logSegNo) % XLogSegmentsPerXLogId), offset) + (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)), \ + (uint32) (XLogSegmentOffset((startpoint), wal_segsz_bytes))) /* * Information logged when we detect a change in one of the parameters diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 76715983349..3a9ebd43543 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -74,6 +74,11 @@ struct XLogReaderState */ /* + * Segment size of the to-be-parsed data (mandatory). + */ + int wal_segment_size; + + /* * Data input callback (mandatory). * * This callback shall read at least reqLen valid bytes of the xlog page @@ -189,7 +194,8 @@ struct XLogReaderState }; /* Get a new XLogReader */ -extern XLogReaderState *XLogReaderAllocate(XLogPageReadCB pagereadfunc, +extern XLogReaderState *XLogReaderAllocate(int wal_segment_size, + XLogPageReadCB pagereadfunc, void *private_data); /* Free an XLogReader */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 1ec03caf5fb..3fed3b64318 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -21,7 +21,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 1002 +#define PG_CONTROL_VERSION 1003 /* Nonce key length, see below */ #define MOCK_AUTH_NONCE_LEN 32 diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 579d195663c..85deb29d831 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -895,11 +895,6 @@ */ #undef XLOG_BLCKSZ -/* XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2 - and larger than XLOG_BLCKSZ (preferably, a great deal larger than - XLOG_BLCKSZ). Changing XLOG_SEG_SIZE requires an initdb. */ -#undef XLOG_SEG_SIZE - /* Number of bits in a file offset, on hosts where this is settable. */ diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index f3b35297d18..9615a389af2 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -14,6 +14,12 @@ */ /* + * This is default value for wal_segment_size to be used at intidb when run + * without --walsegsize option. Must be a valid segment size. + */ +#define DEFAULT_XLOG_SEG_SIZE (16*1024*1024) + +/* * Maximum length for identifiers (e.g. table names, column names, * function names). Names actually are limited to one less byte than this, * because the length must include a trailing zero byte. diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index 19a95ddc0e7..5d5f716b6f0 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -179,8 +179,6 @@ s{PG_VERSION_STR "[^"]+"}{PG_VERSION_STR "PostgreSQL $self->{strver}$extraver, c 1024, "\n"; print $o "#define XLOG_BLCKSZ ", 1024 * $self->{options}->{wal_blocksize}, "\n"; - print $o "#define XLOG_SEG_SIZE (", $self->{options}->{wal_segsize}, - " * 1024 * 1024)\n"; if ($self->{options}->{float4byval}) { |