aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/Makefile4
-rw-r--r--contrib/README10
-rw-r--r--contrib/pg_resetxlog/Makefile37
-rw-r--r--contrib/pg_resetxlog/README.pg_resetxlog39
-rw-r--r--contrib/pg_resetxlog/pg_resetxlog.c991
5 files changed, 1079 insertions, 2 deletions
diff --git a/contrib/Makefile b/contrib/Makefile
index 05aa567a176..819e5046ad8 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -1,4 +1,4 @@
-# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.17 2001/03/13 19:28:02 petere Exp $
+# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.18 2001/03/14 00:57:43 tgl Exp $
subdir = contrib
top_builddir = ..
@@ -18,8 +18,10 @@ WANTED_DIRS = \
miscutil \
noupdate \
oid2name \
+ pg_controldata \
pg_dumplo \
pg_logger \
+ pg_resetxlog \
pgbench \
pgcrypto \
rserv \
diff --git a/contrib/README b/contrib/README
index 79d95b2e3e1..c0470a8b0a4 100644
--- a/contrib/README
+++ b/contrib/README
@@ -52,6 +52,10 @@ intarray -
by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov
<oleg@sai.msu.su>.
+ipc_check -
+ Simple test script to help in configuring IPC.
+ FreeBSD only, for now.
+
isbn_issn -
PostgreSQL type extensions for ISBN (books) and ISSN (serials)
by Garrett A. Wollman <wollman@khavrinen.lcs.mit.edu>
@@ -86,7 +90,7 @@ oid2name -
by B Palmer <bpalmer@crimelabs.net>
pg_controldata -
- Dump internal database site structures
+ Dump contents of pg_control (database master file)
by Oliver Elphick <olly@lfix.co.uk>
pg_dumplo -
@@ -97,6 +101,10 @@ pg_logger -
Stdin-to-syslog gateway for PostgreSQL
by Nathan Myers <ncm@nospam.cantrip.org>
+pg_resetxlog -
+ Reset the WAL log (pg_xlog) to recover from crash or format change
+ by Tom Lane <tgl@sss.pgh.pa.us>
+
pgbench -
TPC-B like benchmarking tool
by Tatsuo Ishii <t-ishii@sra.co.jp>
diff --git a/contrib/pg_resetxlog/Makefile b/contrib/pg_resetxlog/Makefile
new file mode 100644
index 00000000000..2169d7a81e1
--- /dev/null
+++ b/contrib/pg_resetxlog/Makefile
@@ -0,0 +1,37 @@
+#
+# $Header: /cvsroot/pgsql/contrib/pg_resetxlog/Attic/Makefile,v 1.1 2001/03/14 00:57:43 tgl Exp $
+#
+
+subdir = contrib/pg_resetxlog
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = pg_resetxlog.o pg_crc.o
+
+all: pg_resetxlog
+
+pg_resetxlog: $(OBJS)
+ $(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) $(LIBS) -o $@
+
+pg_crc.c: $(top_builddir)/src/backend/utils/hash/pg_crc.c
+ rm -f $@ && $(LN_S) $< .
+
+install: all installdirs
+ $(INSTALL_PROGRAM) pg_resetxlog$(X) $(bindir)
+ $(INSTALL_DATA) README.pg_resetxlog $(docdir)/contrib
+
+installdirs:
+ $(mkinstalldirs) $(bindir) $(docdir)/contrib
+
+uninstall:
+ rm -f $(bindir)/pg_resetxlog$(X) $(docdir)/contrib/README.pg_resetxlog
+
+clean distclean maintainer-clean:
+ rm -f pg_resetxlog$(X) $(OBJS) pg_crc.c
+
+depend dep:
+ $(CC) -MM -MG $(CFLAGS) *.c > depend
+
+ifeq (depend,$(wildcard depend))
+include depend
+endif
diff --git a/contrib/pg_resetxlog/README.pg_resetxlog b/contrib/pg_resetxlog/README.pg_resetxlog
new file mode 100644
index 00000000000..f9521ee0d63
--- /dev/null
+++ b/contrib/pg_resetxlog/README.pg_resetxlog
@@ -0,0 +1,39 @@
+pg_resetxlog is a program to clear the WAL transaction log (stored in
+$PGDATA/pg_xlog/), replacing whatever had been in it with just a dummy
+shutdown-checkpoint record. It also regenerates the pg_control file
+if necessary.
+
+THIS PROGRAM WILL DESTROY VALUABLE LOG DATA!!! Don't run it unless you
+really need it!!!
+
+pg_resetxlog is primarily intended for disaster recovery --- that is,
+if your pg_control and/or xlog are hosed badly enough that Postgres refuses
+to start up, this program will get you past that problem and let you get to
+your data files. But realize that without the xlog, your data files may be
+corrupt due to partially-applied transactions, incomplete index-file
+updates, etc. You should dump your data, check it for accuracy, then initdb
+and reload.
+
+A secondary purpose is to cope with xlog format changes without requiring
+initdb. To use pg_resetxlog for this purpose, just be sure that you have
+cleanly shut down your old postmaster (if you're not sure, see the contrib
+module pg_controldata and run it to be sure the DB state is SHUTDOWN).
+Then run pg_resetxlog, and finally install and start the new version of
+the database software.
+
+To run the program, make sure your postmaster is not running, then
+(as the Postgres admin user) do
+
+ pg_resetxlog $PGDATA
+
+As a safety measure, the target data directory must be specified on the
+command line, it cannot be defaulted.
+
+If pg_resetxlog complains that it can't reconstruct valid data for pg_control,
+you can force it to invent plausible data values with
+
+ pg_resetxlog -f $PGDATA
+
+If this turns out to be necessary then you *definitely* should plan on
+immediate dump, initdb, reload --- any modifications you do to the database
+after "pg_resetxlog -f" would be likely to corrupt things even worse.
diff --git a/contrib/pg_resetxlog/pg_resetxlog.c b/contrib/pg_resetxlog/pg_resetxlog.c
new file mode 100644
index 00000000000..0e6f747e8ea
--- /dev/null
+++ b/contrib/pg_resetxlog/pg_resetxlog.c
@@ -0,0 +1,991 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_resetxlog.c
+ * A utility to "zero out" the xlog when it's corrupt beyond recovery.
+ * Can also rebuild pg_control if needed.
+ *
+ * The theory of operation is fairly simple:
+ * 1. Read the existing pg_control (which will include the last
+ * checkpoint record). If it is an old format then update to
+ * current format.
+ * 2. If pg_control is corrupt, attempt to intuit reasonable values,
+ * by scanning the old xlog if necessary.
+ * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
+ * record at the start of xlog.
+ * 4. Flush the existing xlog files and write a new segment 0 with
+ * just a checkpoint record in it.
+ * This is all pretty straightforward except for the intuition part of
+ * step 2 ...
+ *
+ *
+ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $Header: /cvsroot/pgsql/contrib/pg_resetxlog/Attic/pg_resetxlog.c,v 1.1 2001/03/14 00:57:43 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#ifdef USE_LOCALE
+#include <locale.h>
+#endif
+
+#include "access/xlog.h"
+#include "catalog/catversion.h"
+#include "catalog/pg_control.h"
+
+
+/******************** stuff copied from xlog.c ********************/
+
+/* Increment an xlogid/segment pair */
+#define NextLogSeg(logId, logSeg) \
+ do { \
+ if ((logSeg) >= XLogSegsPerFile-1) \
+ { \
+ (logId)++; \
+ (logSeg) = 0; \
+ } \
+ else \
+ (logSeg)++; \
+ } while (0)
+
+/*
+ * Compute ID and segment from an XLogRecPtr.
+ *
+ * For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg,
+ * a boundary byte is taken to be in the previous segment. This is suitable
+ * for deciding which segment to write given a pointer to a record end,
+ * for example.
+ */
+#define XLByteToSeg(xlrp, logId, logSeg) \
+ ( logId = (xlrp).xlogid, \
+ logSeg = (xlrp).xrecoff / XLogSegSize \
+ )
+#define XLByteToPrevSeg(xlrp, logId, logSeg) \
+ ( logId = (xlrp).xlogid, \
+ logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
+ )
+
+/*
+ * Is an XLogRecPtr within a particular XLOG segment?
+ *
+ * For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg,
+ * a boundary byte is taken to be in the previous segment.
+ */
+#define XLByteInSeg(xlrp, logId, logSeg) \
+ ((xlrp).xlogid == (logId) && \
+ (xlrp).xrecoff / XLogSegSize == (logSeg))
+
+#define XLByteInPrevSeg(xlrp, logId, logSeg) \
+ ((xlrp).xlogid == (logId) && \
+ ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
+
+
+#define XLogFileName(path, log, seg) \
+ snprintf(path, MAXPGPATH, "%s%c%08X%08X", \
+ XLogDir, SEP_CHAR, log, seg)
+
+/*
+ * _INTL_MAXLOGRECSZ: max space needed for a record including header and
+ * any backup-block data.
+ */
+#define _INTL_MAXLOGRECSZ (SizeOfXLogRecord + MAXLOGRECSZ + \
+ XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+
+/******************** end of stuff copied from xlog.c ********************/
+
+
+static char *DataDir; /* locations of important stuff */
+static char XLogDir[MAXPGPATH];
+static char ControlFilePath[MAXPGPATH];
+
+static ControlFileData ControlFile; /* pg_control values */
+static bool guessed = false; /* T if we had to guess at any values */
+
+
+static bool CheckControlVersion0(char *buffer, int len);
+
+
+static int
+XLogFileOpen(uint32 log, uint32 seg)
+{
+ char path[MAXPGPATH];
+ int fd;
+
+ XLogFileName(path, log, seg);
+
+ fd = open(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
+ return (fd);
+}
+
+
+/*
+ * Try to read the existing pg_control file.
+ *
+ * This routine is also responsible for updating old pg_control versions
+ * to the current format.
+ */
+static bool
+ReadControlFile(void)
+{
+ int fd;
+ int len;
+ char *buffer;
+ crc64 crc;
+
+ if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
+ {
+ /*
+ * If pg_control is not there at all, or we can't read it,
+ * the odds are we've been handed a bad DataDir path, so give up.
+ * User can do "touch pg_control" to force us to proceed.
+ */
+ perror("Failed to open $PGDATA/global/pg_control for reading");
+ if (errno == ENOENT)
+ fprintf(stderr, "If you're sure the PGDATA path is correct, do\n"
+ " touch %s\n"
+ "and try again.\n", ControlFilePath);
+ exit(1);
+ }
+
+ /* Use malloc to ensure we have a maxaligned buffer */
+ buffer = (char *) malloc(BLCKSZ);
+
+ len = read(fd, buffer, BLCKSZ);
+ if (len < 0)
+ {
+ perror("Failed to read $PGDATA/global/pg_control");
+ exit(1);
+ }
+ close(fd);
+
+ if (len >= sizeof(ControlFileData) &&
+ ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
+ {
+ /* Seems to be current version --- check the CRC. */
+ INIT_CRC64(crc);
+ COMP_CRC64(crc,
+ buffer + sizeof(crc64),
+ sizeof(ControlFileData) - sizeof(crc64));
+ FIN_CRC64(crc);
+
+ if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
+ {
+ /* Valid data... */
+ memcpy(&ControlFile, buffer, sizeof(ControlFile));
+ return true;
+ }
+
+ fprintf(stderr, "pg_control exists but has invalid CRC; proceed with caution.\n");
+ /* We will use the data anyway, but treat it as guessed. */
+ memcpy(&ControlFile, buffer, sizeof(ControlFile));
+ guessed = true;
+ return true;
+ }
+ /*
+ * Maybe it's a 7.1beta pg_control.
+ */
+ if (CheckControlVersion0(buffer, len))
+ return true;
+
+ /* Looks like it's a mess. */
+ fprintf(stderr, "pg_control exists but is broken or unknown version; ignoring it.\n");
+ return false;
+}
+
+
+/******************* routines for old XLOG format *******************/
+
+
+/*
+ * This format was in use in 7.1 beta releases through 7.1beta5. The
+ * pg_control layout was different, and so were the XLOG page headers.
+ * The XLOG record header format was physically the same as 7.1 release,
+ * but interpretation of the xl_len field was not.
+ */
+
+typedef struct crc64V0
+{
+ uint32 crc1;
+ uint32 crc2;
+} crc64V0;
+
+static uint32 crc_tableV0[] = {
+0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
+0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
+0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+#define INIT_CRC64V0(crc) ((crc).crc1 = 0xffffffff, (crc).crc2 = 0xffffffff)
+#define FIN_CRC64V0(crc) ((crc).crc1 ^= 0xffffffff, (crc).crc2 ^= 0xffffffff)
+#define COMP_CRC64V0(crc, data, len) \
+{\
+ uint32 __c1 = (crc).crc1;\
+ uint32 __c2 = (crc).crc2;\
+ char *__data = (char *) (data);\
+ uint32 __len = (len);\
+\
+ while (__len >= 2)\
+ {\
+ __c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
+ __c2 = crc_tableV0[(__c2 ^ *__data++) & 0xff] ^ (__c2 >> 8);\
+ __len -= 2;\
+ }\
+ if (__len > 0)\
+ __c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
+ (crc).crc1 = __c1;\
+ (crc).crc2 = __c2;\
+}
+
+#define EQ_CRC64V0(c1,c2) ((c1).crc1 == (c2).crc1 && (c1).crc2 == (c2).crc2)
+
+
+#define LOCALE_NAME_BUFLEN_V0 128
+
+typedef struct ControlFileDataV0
+{
+ crc64V0 crc;
+ uint32 logId; /* current log file id */
+ uint32 logSeg; /* current log file segment (1-based) */
+ XLogRecPtr checkPoint; /* last check point record ptr */
+ time_t time; /* time stamp of last modification */
+ DBState state; /* see enum above */
+ uint32 blcksz; /* block size for this DB */
+ uint32 relseg_size; /* blocks per segment of large relation */
+ uint32 catalog_version_no; /* internal version number */
+ char lc_collate[LOCALE_NAME_BUFLEN_V0];
+ char lc_ctype[LOCALE_NAME_BUFLEN_V0];
+ char archdir[MAXPGPATH]; /* where to move offline log files */
+} ControlFileDataV0;
+
+typedef struct CheckPointV0
+{
+ XLogRecPtr redo; /* next RecPtr available when we */
+ /* began to create CheckPoint */
+ /* (i.e. REDO start point) */
+ XLogRecPtr undo; /* first record of oldest in-progress */
+ /* transaction when we started */
+ /* (i.e. UNDO end point) */
+ StartUpID ThisStartUpID;
+ TransactionId nextXid;
+ Oid nextOid;
+ bool Shutdown;
+} CheckPointV0;
+
+typedef struct XLogRecordV0
+{
+ crc64V0 xl_crc;
+ XLogRecPtr xl_prev; /* ptr to previous record in log */
+ XLogRecPtr xl_xact_prev; /* ptr to previous record of this xact */
+ TransactionId xl_xid; /* xact id */
+ uint16 xl_len; /* total len of record *data* */
+ uint8 xl_info;
+ RmgrId xl_rmid; /* resource manager inserted this record */
+} XLogRecordV0;
+
+#define SizeOfXLogRecordV0 DOUBLEALIGN(sizeof(XLogRecordV0))
+
+typedef struct XLogContRecordV0
+{
+ uint16 xl_len; /* len of data left */
+} XLogContRecordV0;
+
+#define SizeOfXLogContRecordV0 DOUBLEALIGN(sizeof(XLogContRecordV0))
+
+#define XLOG_PAGE_MAGIC_V0 0x17345168
+
+typedef struct XLogPageHeaderDataV0
+{
+ uint32 xlp_magic;
+ uint16 xlp_info;
+} XLogPageHeaderDataV0;
+
+#define SizeOfXLogPHDV0 DOUBLEALIGN(sizeof(XLogPageHeaderDataV0))
+
+typedef XLogPageHeaderDataV0 *XLogPageHeaderV0;
+
+
+static bool RecordIsValidV0(XLogRecordV0 *record);
+static XLogRecordV0 *ReadRecordV0(XLogRecPtr *RecPtr, char *buffer);
+static bool ValidXLOGHeaderV0(XLogPageHeaderV0 hdr);
+
+
+/*
+ * Try to interpret pg_control contents as "version 0" format.
+ */
+static bool
+CheckControlVersion0(char *buffer, int len)
+{
+ crc64V0 crc;
+ ControlFileDataV0 *oldfile;
+ XLogRecordV0 *record;
+ CheckPointV0 *oldchkpt;
+
+ if (len < sizeof(ControlFileDataV0))
+ return false;
+ /* Check CRC the version-0 way. */
+ INIT_CRC64V0(crc);
+ COMP_CRC64V0(crc,
+ buffer + sizeof(crc64V0),
+ sizeof(ControlFileDataV0) - sizeof(crc64V0));
+ FIN_CRC64V0(crc);
+
+ if (!EQ_CRC64V0(crc, ((ControlFileDataV0 *) buffer)->crc))
+ return false;
+
+ /* Valid data, convert useful fields to new-style pg_control format */
+ oldfile = (ControlFileDataV0 *) buffer;
+
+ memset(&ControlFile, 0, sizeof(ControlFile));
+
+ ControlFile.pg_control_version = PG_CONTROL_VERSION;
+ ControlFile.catalog_version_no = oldfile->catalog_version_no;
+
+ ControlFile.state = oldfile->state;
+
+ ControlFile.blcksz = oldfile->blcksz;
+ ControlFile.relseg_size = oldfile->relseg_size;
+ strcpy(ControlFile.lc_collate, oldfile->lc_collate);
+ strcpy(ControlFile.lc_ctype, oldfile->lc_ctype);
+
+ /*
+ * Since this format did not include a copy of the latest checkpoint
+ * record, we have to go rooting in the old XLOG to get that.
+ */
+ record = ReadRecordV0(&oldfile->checkPoint,
+ (char *) malloc(_INTL_MAXLOGRECSZ));
+ if (record == NULL)
+ {
+ /*
+ * We have to guess at the checkpoint contents.
+ */
+ guessed = true;
+ ControlFile.checkPointCopy.ThisStartUpID = 0;
+ ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
+ ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
+ return true;
+ }
+ oldchkpt = (CheckPointV0 *) XLogRecGetData(record);
+
+ ControlFile.checkPointCopy.ThisStartUpID = oldchkpt->ThisStartUpID;
+ ControlFile.checkPointCopy.nextXid = oldchkpt->nextXid;
+ ControlFile.checkPointCopy.nextOid = oldchkpt->nextOid;
+
+ return true;
+}
+
+/*
+ * CRC-check an XLOG V0 record. We do not believe the contents of an XLOG
+ * record (other than to the minimal extent of computing the amount of
+ * data to read in) until we've checked the CRCs.
+ *
+ * We assume all of the record has been read into memory at *record.
+ */
+static bool
+RecordIsValidV0(XLogRecordV0 *record)
+{
+ crc64V0 crc;
+ uint32 len = record->xl_len;
+
+ /*
+ * NB: this code is not right for V0 records containing backup blocks,
+ * but for now it's only going to be applied to checkpoint records,
+ * so I'm not going to worry about it...
+ */
+ INIT_CRC64V0(crc);
+ COMP_CRC64V0(crc, XLogRecGetData(record), len);
+ COMP_CRC64V0(crc, (char*) record + sizeof(crc64V0),
+ SizeOfXLogRecordV0 - sizeof(crc64V0));
+ FIN_CRC64V0(crc);
+
+ if (!EQ_CRC64V0(record->xl_crc, crc))
+ return false;
+
+ return(true);
+}
+
+/*
+ * Attempt to read an XLOG V0 record at recptr.
+ *
+ * If no valid record is available, returns NULL.
+ *
+ * buffer is a workspace at least _INTL_MAXLOGRECSZ bytes long. It is needed
+ * to reassemble a record that crosses block boundaries. Note that on
+ * successful return, the returned record pointer always points at buffer.
+ */
+static XLogRecordV0 *
+ReadRecordV0(XLogRecPtr *RecPtr, char *buffer)
+{
+ static int readFile = -1;
+ static uint32 readId = 0;
+ static uint32 readSeg = 0;
+ static uint32 readOff = 0;
+ static char *readBuf = NULL;
+
+ XLogRecordV0 *record;
+ uint32 len,
+ total_len;
+ uint32 targetPageOff;
+
+ if (readBuf == NULL)
+ readBuf = (char *) malloc(BLCKSZ);
+
+ XLByteToSeg(*RecPtr, readId, readSeg);
+ if (readFile < 0)
+ {
+ readFile = XLogFileOpen(readId, readSeg);
+ if (readFile < 0)
+ goto next_record_is_invalid;
+ readOff = (uint32) (-1); /* force read to occur below */
+ }
+
+ targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
+ if (readOff != targetPageOff)
+ {
+ readOff = targetPageOff;
+ if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
+ goto next_record_is_invalid;
+ if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
+ goto next_record_is_invalid;
+ if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
+ goto next_record_is_invalid;
+ }
+ if ((((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
+ RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHDV0)
+ goto next_record_is_invalid;
+ record = (XLogRecordV0 *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);
+
+ if (record->xl_len == 0)
+ goto next_record_is_invalid;
+ /*
+ * Compute total length of record including any appended backup blocks.
+ */
+ total_len = SizeOfXLogRecordV0 + record->xl_len;
+ /*
+ * Make sure it will fit in buffer (currently, it is mechanically
+ * impossible for this test to fail, but it seems like a good idea
+ * anyway).
+ */
+ if (total_len > _INTL_MAXLOGRECSZ)
+ goto next_record_is_invalid;
+ len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
+ if (total_len > len)
+ {
+ /* Need to reassemble record */
+ XLogContRecordV0 *contrecord;
+ uint32 gotlen = len;
+
+ memcpy(buffer, record, len);
+ record = (XLogRecordV0 *) buffer;
+ buffer += len;
+ for (;;)
+ {
+ readOff += BLCKSZ;
+ if (readOff >= XLogSegSize)
+ {
+ close(readFile);
+ readFile = -1;
+ NextLogSeg(readId, readSeg);
+ readFile = XLogFileOpen(readId, readSeg);
+ if (readFile < 0)
+ goto next_record_is_invalid;
+ readOff = 0;
+ }
+ if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
+ goto next_record_is_invalid;
+ if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
+ goto next_record_is_invalid;
+ if (!(((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
+ goto next_record_is_invalid;
+ contrecord = (XLogContRecordV0 *) ((char *) readBuf + SizeOfXLogPHDV0);
+ if (contrecord->xl_len == 0 ||
+ total_len != (contrecord->xl_len + gotlen))
+ goto next_record_is_invalid;
+ len = BLCKSZ - SizeOfXLogPHDV0 - SizeOfXLogContRecordV0;
+ if (contrecord->xl_len > len)
+ {
+ memcpy(buffer, (char *)contrecord + SizeOfXLogContRecordV0, len);
+ gotlen += len;
+ buffer += len;
+ continue;
+ }
+ memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0,
+ contrecord->xl_len);
+ break;
+ }
+ if (!RecordIsValidV0(record))
+ goto next_record_is_invalid;
+ return record;
+ }
+
+ /* Record does not cross a page boundary */
+ if (!RecordIsValidV0(record))
+ goto next_record_is_invalid;
+ memcpy(buffer, record, total_len);
+ return (XLogRecordV0 *) buffer;
+
+next_record_is_invalid:;
+ close(readFile);
+ readFile = -1;
+ return NULL;
+}
+
+/*
+ * Check whether the xlog header of a page just read in looks valid.
+ *
+ * This is just a convenience subroutine to avoid duplicated code in
+ * ReadRecord. It's not intended for use from anywhere else.
+ */
+static bool
+ValidXLOGHeaderV0(XLogPageHeaderV0 hdr)
+{
+ if (hdr->xlp_magic != XLOG_PAGE_MAGIC_V0)
+ return false;
+ if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
+ return false;
+ return true;
+}
+
+/******************* end of routines for old XLOG format *******************/
+
+
+/*
+ * Guess at pg_control values when we can't read the old ones.
+ */
+static void
+GuessControlValues(void)
+{
+#ifdef USE_LOCALE
+ char *localeptr;
+#endif
+
+ /*
+ * Set up a completely default set of pg_control values.
+ */
+ guessed = true;
+ memset(&ControlFile, 0, sizeof(ControlFile));
+
+ ControlFile.pg_control_version = PG_CONTROL_VERSION;
+ ControlFile.catalog_version_no = CATALOG_VERSION_NO;
+
+ ControlFile.checkPointCopy.redo.xlogid = 0;
+ ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD;
+ ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
+ ControlFile.checkPointCopy.ThisStartUpID = 0;
+ ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
+ ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
+ ControlFile.checkPointCopy.time = time(NULL);
+
+ ControlFile.state = DB_SHUTDOWNED;
+ ControlFile.time = time(NULL);
+ ControlFile.logId = 0;
+ ControlFile.logSeg = 1;
+ ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+
+ ControlFile.blcksz = BLCKSZ;
+ ControlFile.relseg_size = RELSEG_SIZE;
+#ifdef USE_LOCALE
+ localeptr = setlocale(LC_COLLATE, "");
+ if (!localeptr)
+ {
+ fprintf(stderr, "Invalid LC_COLLATE setting\n");
+ exit(1);
+ }
+ StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
+ localeptr = setlocale(LC_CTYPE, "");
+ if (!localeptr)
+ {
+ fprintf(stderr, "Invalid LC_CTYPE setting\n");
+ exit(1);
+ }
+ StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
+#else
+ strcpy(ControlFile.lc_collate, "C");
+ strcpy(ControlFile.lc_ctype, "C");
+#endif
+
+ /*
+ * XXX eventually, should try to grovel through old XLOG to develop
+ * more accurate values for startupid, nextXID, and nextOID.
+ */
+}
+
+
+/*
+ * Print the guessed pg_control values when we had to guess.
+ *
+ * NB: this display should be just those fields that will not be
+ * reset by RewriteControlFile().
+ */
+static void
+PrintControlValues(void)
+{
+ printf("Guessed-at pg_control values:\n\n"
+ "pg_control version number: %u\n"
+ "Catalog version number: %u\n"
+ "Latest checkpoint's StartUpID: %u\n"
+ "Latest checkpoint's NextXID: %u\n"
+ "Latest checkpoint's NextOID: %u\n"
+ "Database block size: %u\n"
+ "Blocks per segment of large relation: %u\n"
+ "LC_COLLATE: %s\n"
+ "LC_CTYPE: %s\n",
+
+ ControlFile.pg_control_version,
+ ControlFile.catalog_version_no,
+ ControlFile.checkPointCopy.ThisStartUpID,
+ ControlFile.checkPointCopy.nextXid,
+ ControlFile.checkPointCopy.nextOid,
+ ControlFile.blcksz,
+ ControlFile.relseg_size,
+ ControlFile.lc_collate,
+ ControlFile.lc_ctype);
+}
+
+
+/*
+ * Write out the new pg_control file.
+ */
+static void
+RewriteControlFile(void)
+{
+ int fd;
+ char buffer[BLCKSZ]; /* need not be aligned */
+
+ /*
+ * Adjust fields as needed to force an empty XLOG.
+ */
+ ControlFile.checkPointCopy.redo.xlogid = 0;
+ ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD;
+ ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
+ ControlFile.checkPointCopy.time = time(NULL);
+
+ ControlFile.state = DB_SHUTDOWNED;
+ ControlFile.time = time(NULL);
+ ControlFile.logId = 0;
+ ControlFile.logSeg = 1;
+ ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+ ControlFile.prevCheckPoint.xlogid = 0;
+ ControlFile.prevCheckPoint.xrecoff = 0;
+
+ /* Contents are protected with a CRC */
+ INIT_CRC64(ControlFile.crc);
+ COMP_CRC64(ControlFile.crc,
+ (char*) &ControlFile + sizeof(crc64),
+ sizeof(ControlFileData) - sizeof(crc64));
+ FIN_CRC64(ControlFile.crc);
+
+ /*
+ * We write out BLCKSZ bytes into pg_control, zero-padding the
+ * excess over sizeof(ControlFileData). This reduces the odds
+ * of premature-EOF errors when reading pg_control. We'll still
+ * fail when we check the contents of the file, but hopefully with
+ * a more specific error than "couldn't read pg_control".
+ */
+ if (sizeof(ControlFileData) > BLCKSZ)
+ {
+ fprintf(stderr, "sizeof(ControlFileData) is too large ... fix xlog.c\n");
+ exit(1);
+ }
+
+ memset(buffer, 0, BLCKSZ);
+ memcpy(buffer, &ControlFile, sizeof(ControlFileData));
+
+ unlink(ControlFilePath);
+
+ fd = open(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ perror("RewriteControlFile failed to create pg_control file");
+ exit(1);
+ }
+
+ if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+ {
+ perror("RewriteControlFile failed to write pg_control file");
+ exit(1);
+ }
+
+ if (fsync(fd) != 0)
+ {
+ perror("fsync");
+ exit(1);
+ }
+
+ close(fd);
+}
+
+
+/*
+ * Remove existing XLOG files
+ */
+static void
+KillExistingXLOG(void)
+{
+ DIR *xldir;
+ struct dirent *xlde;
+ char path[MAXPGPATH];
+
+ xldir = opendir(XLogDir);
+ if (xldir == NULL)
+ {
+ perror("KillExistingXLOG: cannot open $PGDATA/pg_xlog directory");
+ exit(1);
+ }
+
+ errno = 0;
+ while ((xlde = readdir(xldir)) != NULL)
+ {
+ if (strlen(xlde->d_name) == 16 &&
+ strspn(xlde->d_name, "0123456789ABCDEF") == 16)
+ {
+ sprintf(path, "%s%c%s", XLogDir, SEP_CHAR, xlde->d_name);
+ if (unlink(path) < 0)
+ {
+ perror(path);
+ exit(1);
+ }
+ }
+ errno = 0;
+ }
+ if (errno)
+ {
+ perror("KillExistingXLOG: cannot read $PGDATA/pg_xlog directory");
+ exit(1);
+ }
+ closedir(xldir);
+}
+
+
+/*
+ * Write an empty XLOG file, containing only the checkpoint record
+ * already set up in ControlFile.
+ */
+static void
+WriteEmptyXLOG(void)
+{
+ char *buffer;
+ XLogPageHeader page;
+ XLogRecord *record;
+ crc64 crc;
+ char path[MAXPGPATH];
+ int fd;
+ int nbytes;
+
+ /* Use malloc() to ensure buffer is MAXALIGNED */
+ buffer = (char *) malloc(BLCKSZ);
+ page = (XLogPageHeader) buffer;
+
+ /* Set up the first page with initial record */
+ memset(buffer, 0, BLCKSZ);
+ page->xlp_magic = XLOG_PAGE_MAGIC;
+ page->xlp_info = 0;
+ page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID;
+ record = (XLogRecord *) ((char *) page + SizeOfXLogPHD);
+ record->xl_prev.xlogid = 0;
+ record->xl_prev.xrecoff = 0;
+ record->xl_xact_prev = record->xl_prev;
+ record->xl_xid = InvalidTransactionId;
+ record->xl_len = sizeof(CheckPoint);
+ record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
+ record->xl_rmid = RM_XLOG_ID;
+ memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
+ sizeof(CheckPoint));
+
+ INIT_CRC64(crc);
+ COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
+ COMP_CRC64(crc, (char*) record + sizeof(crc64),
+ SizeOfXLogRecord - sizeof(crc64));
+ FIN_CRC64(crc);
+ record->xl_crc = crc;
+
+ /* Write the first page */
+ XLogFileName(path, 0, 0);
+
+ unlink(path);
+
+ fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ perror(path);
+ exit(1);
+ }
+
+ if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+ {
+ perror("WriteEmptyXLOG: failed to write xlog file");
+ exit(1);
+ }
+
+ /* Fill the rest of the file with zeroes */
+ memset(buffer, 0, BLCKSZ);
+ for (nbytes = BLCKSZ; nbytes < XLogSegSize; nbytes += BLCKSZ)
+ {
+ if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+ {
+ perror("WriteEmptyXLOG: failed to write xlog file");
+ exit(1);
+ }
+ }
+
+ if (fsync(fd) != 0)
+ {
+ perror("fsync");
+ exit(1);
+ }
+
+ close(fd);
+}
+
+
+static void
+usage(void)
+{
+ fprintf(stderr, "Usage: pg_resetxlog [-f] [-n] PGDataDirectory\n\n"
+ " -f\tforce update to be done\n"
+ " -n\tno update, just show extracted pg_control values (for testing)\n");
+ exit(1);
+}
+
+
+int
+main(int argc, char ** argv)
+{
+ int argn;
+ bool force = false;
+ bool noupdate = false;
+ int fd;
+ char path[MAXPGPATH];
+
+ for (argn = 1; argn < argc; argn++)
+ {
+ if (argv[argn][0] != '-')
+ break; /* end of switches */
+ if (strcmp(argv[argn], "-f") == 0)
+ force = true;
+ else if (strcmp(argv[argn], "-n") == 0)
+ noupdate = true;
+ else
+ usage();
+ }
+
+ if (argn != argc-1) /* one required non-switch argument */
+ usage();
+
+ DataDir = argv[argn++];
+
+ snprintf(XLogDir, MAXPGPATH, "%s%cpg_xlog", DataDir, SEP_CHAR);
+
+ snprintf(ControlFilePath, MAXPGPATH, "%s%cglobal%cpg_control",
+ DataDir, SEP_CHAR, SEP_CHAR);
+
+ /*
+ * Check for a postmaster lock file --- if there is one, refuse to
+ * proceed, on grounds we might be interfering with a live installation.
+ */
+ snprintf(path, MAXPGPATH, "%s%cpostmaster.pid", DataDir, SEP_CHAR);
+
+ if ((fd = open(path, O_RDONLY)) < 0)
+ {
+ if (errno != ENOENT)
+ {
+ perror("Failed to open $PGDATA/postmaster.pid for reading");
+ exit(1);
+ }
+ }
+ else
+ {
+ fprintf(stderr, "Lock file '%s' exists --- is a postmaster running?\n"
+ "If not, delete the lock file and try again.\n",
+ path);
+ exit(1);
+ }
+
+ /*
+ * Attempt to read the existing pg_control file
+ */
+ if (!ReadControlFile())
+ GuessControlValues();
+
+ /*
+ * If we had to guess anything, and -f was not given, just print
+ * the guessed values and exit. Also print if -n is given.
+ */
+ if ((guessed && !force) || noupdate)
+ {
+ PrintControlValues();
+ if (!noupdate)
+ printf("\nIf these values seem acceptable, use -f to force reset.\n");
+ exit(1);
+ }
+
+ /*
+ * Don't reset from a dirty pg_control without -f, either.
+ */
+ if (ControlFile.state != DB_SHUTDOWNED && !force)
+ {
+ printf("The database was not shut down cleanly.\n"
+ "Resetting the xlog may cause data to be lost!\n"
+ "If you want to proceed anyway, use -f to force reset.\n");
+ exit(1);
+ }
+
+ /*
+ * Else, do the dirty deed.
+ */
+ RewriteControlFile();
+ KillExistingXLOG();
+ WriteEmptyXLOG();
+
+ printf("XLOG reset.\n");
+ return 0;
+}