aboutsummaryrefslogtreecommitdiff
path: root/src/include/access
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/access')
-rw-r--r--src/include/access/brin_xlog.h63
-rw-r--r--src/include/access/clog.h6
-rw-r--r--src/include/access/gin.h6
-rw-r--r--src/include/access/gin_private.h66
-rw-r--r--src/include/access/gist_private.h31
-rw-r--r--src/include/access/hash.h6
-rw-r--r--src/include/access/heapam_xlog.h139
-rw-r--r--src/include/access/htup_details.h1
-rw-r--r--src/include/access/itup.h1
-rw-r--r--src/include/access/multixact.h6
-rw-r--r--src/include/access/nbtree.h94
-rw-r--r--src/include/access/spgist.h6
-rw-r--r--src/include/access/spgist_private.h144
-rw-r--r--src/include/access/xact.h6
-rw-r--r--src/include/access/xlog.h10
-rw-r--r--src/include/access/xlog_internal.h19
-rw-r--r--src/include/access/xloginsert.h70
-rw-r--r--src/include/access/xlogreader.h77
-rw-r--r--src/include/access/xlogrecord.h160
-rw-r--r--src/include/access/xlogutils.h21
20 files changed, 534 insertions, 398 deletions
diff --git a/src/include/access/brin_xlog.h b/src/include/access/brin_xlog.h
index d748db4d0c6..6dc9eb3eca8 100644
--- a/src/include/access/brin_xlog.h
+++ b/src/include/access/brin_xlog.h
@@ -14,7 +14,7 @@
#ifndef BRIN_XLOG_H
#define BRIN_XLOG_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/bufpage.h"
#include "storage/itemptr.h"
@@ -42,59 +42,82 @@
*/
#define XLOG_BRIN_INIT_PAGE 0x80
-/* This is what we need to know about a BRIN index create */
+/*
+ * This is what we need to know about a BRIN index create.
+ *
+ * Backup block 0: metapage
+ */
typedef struct xl_brin_createidx
{
BlockNumber pagesPerRange;
- RelFileNode node;
uint16 version;
} xl_brin_createidx;
#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
/*
* This is what we need to know about a BRIN tuple insert
+ *
+ * Backup block 0: main page, block data is the new BrinTuple.
+ * Backup block 1: revmap page
*/
typedef struct xl_brin_insert
{
- RelFileNode node;
BlockNumber heapBlk;
/* extra information needed to update the revmap */
- BlockNumber revmapBlk;
BlockNumber pagesPerRange;
- uint16 tuplen;
- ItemPointerData tid;
- /* tuple data follows at end of struct */
+ /* offset number in the main page to insert the tuple to. */
+ OffsetNumber offnum;
} xl_brin_insert;
-#define SizeOfBrinInsert (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinInsert (offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
/*
- * A cross-page update is the same as an insert, but also store the old tid.
+ * A cross-page update is the same as an insert, but also stores information
+ * about the old tuple.
+ *
+ * Like in xlog_brin_update:
+ * Backup block 0: new page, block data includes the new BrinTuple.
+ * Backup block 1: revmap page
+ *
+ * And in addition:
+ * Backup block 2: old page
*/
typedef struct xl_brin_update
{
- ItemPointerData oldtid;
+ /* offset number of old tuple on old page */
+ OffsetNumber oldOffnum;
+
xl_brin_insert insert;
} xl_brin_update;
#define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
-/* This is what we need to know about a BRIN tuple samepage update */
+/*
+ * This is what we need to know about a BRIN tuple samepage update
+ *
+ * Backup block 0: updated page, with new BrinTuple as block data
+ */
typedef struct xl_brin_samepage_update
{
- RelFileNode node;
- ItemPointerData tid;
- /* tuple data follows at end of struct */
+ OffsetNumber offnum;
} xl_brin_samepage_update;
-#define SizeOfBrinSamepageUpdate (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinSamepageUpdate (sizeof(OffsetNumber))
-/* This is what we need to know about a revmap extension */
+/*
+ * This is what we need to know about a revmap extension
+ *
+ * Backup block 0: metapage
+ * Backup block 1: new revmap page
+ */
typedef struct xl_brin_revmap_extend
{
- RelFileNode node;
+ /*
+ * XXX: This is actually redundant - the block number is stored as part of
+ * backup block 1.
+ */
BlockNumber targetBlk;
} xl_brin_revmap_extend;
@@ -102,8 +125,8 @@ typedef struct xl_brin_revmap_extend
sizeof(BlockNumber))
-extern void brin_desc(StringInfo buf, XLogRecord *record);
-extern void brin_redo(XLogRecPtr lsn, XLogRecord *record);
+extern void brin_redo(XLogReaderState *record);
+extern void brin_desc(StringInfo buf, XLogReaderState *record);
extern const char *brin_identify(uint8 info);
#endif /* BRIN_XLOG_H */
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
index 04ac4ba3119..fe5e4c634d1 100644
--- a/src/include/access/clog.h
+++ b/src/include/access/clog.h
@@ -11,7 +11,7 @@
#ifndef CLOG_H
#define CLOG_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
/*
@@ -48,8 +48,8 @@ extern void TruncateCLOG(TransactionId oldestXact);
#define CLOG_ZEROPAGE 0x00
#define CLOG_TRUNCATE 0x10
-extern void clog_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void clog_desc(StringInfo buf, XLogRecord *record);
+extern void clog_redo(XLogReaderState *record);
+extern void clog_desc(StringInfo buf, XLogReaderState *record);
extern const char *clog_identify(uint8 info);
#endif /* CLOG_H */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 433e56f20df..fe5f77b1736 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -10,7 +10,7 @@
#ifndef GIN_H
#define GIN_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/block.h"
#include "utils/relcache.h"
@@ -74,8 +74,8 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats);
/* ginxlog.c */
-extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void gin_desc(StringInfo buf, XLogRecord *record);
+extern void gin_redo(XLogReaderState *record);
+extern void gin_desc(StringInfo buf, XLogReaderState *record);
extern const char *gin_identify(uint8 info);
extern void gin_xlog_startup(void);
extern void gin_xlog_cleanup(void);
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 333316d78e2..3d46f20bb83 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -13,7 +13,6 @@
#include "access/genam.h"
#include "access/gin.h"
#include "access/itup.h"
-#include "access/xloginsert.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "utils/rbtree.h"
@@ -397,22 +396,22 @@ typedef struct
typedef struct ginxlogCreatePostingTree
{
- RelFileNode node;
- BlockNumber blkno;
uint32 size;
/* A compressed posting list follows */
} ginxlogCreatePostingTree;
-#define XLOG_GIN_INSERT 0x20
-
/*
* The format of the insertion record varies depending on the page type.
* ginxlogInsert is the common part between all variants.
+ *
+ * Backup Blk 0: target page
+ * Backup Blk 1: left child, if this insertion finishes an incomplete split
*/
+
+#define XLOG_GIN_INSERT 0x20
+
typedef struct
{
- RelFileNode node;
- BlockNumber blkno;
uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
/*
@@ -477,14 +476,17 @@ typedef struct
PostingItem newitem;
} ginxlogInsertDataInternal;
-
+/*
+ * Backup Blk 0: new left page (= original page, if not root split)
+ * Backup Blk 1: new right page
+ * Backup Blk 2: original page / new root page, if root split
+ * Backup Blk 3: left child, if this insertion completes an earlier split
+ */
#define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit
{
RelFileNode node;
- BlockNumber lblkno;
- BlockNumber rblkno;
BlockNumber rrlink; /* right link, or root's blocknumber if root
* split */
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
@@ -538,15 +540,6 @@ typedef struct
*/
#define XLOG_GIN_VACUUM_PAGE 0x40
-typedef struct ginxlogVacuumPage
-{
- RelFileNode node;
- BlockNumber blkno;
- uint16 hole_offset; /* number of bytes before "hole" */
- uint16 hole_length; /* number of bytes in "hole" */
- /* entire page contents (minus the hole) follow at end of record */
-} ginxlogVacuumPage;
-
/*
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
* by insertion.
@@ -555,26 +548,28 @@ typedef struct ginxlogVacuumPage
typedef struct ginxlogVacuumDataLeafPage
{
- RelFileNode node;
- BlockNumber blkno;
-
ginxlogRecompressDataLeaf data;
} ginxlogVacuumDataLeafPage;
+/*
+ * Backup Blk 0: deleted page
+ * Backup Blk 1: parent
+ * Backup Blk 2: left sibling
+ */
#define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage
{
- RelFileNode node;
- BlockNumber blkno;
- BlockNumber parentBlkno;
OffsetNumber parentOffset;
- BlockNumber leftBlkno;
BlockNumber rightLink;
} ginxlogDeletePage;
#define XLOG_GIN_UPDATE_META_PAGE 0x60
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1: tail page
+ */
typedef struct ginxlogUpdateMeta
{
RelFileNode node;
@@ -591,22 +586,29 @@ typedef struct ginxlogUpdateMeta
typedef struct ginxlogInsertListPage
{
- RelFileNode node;
- BlockNumber blkno;
BlockNumber rightlink;
int32 ntuples;
/* array of inserted tuples follows */
} ginxlogInsertListPage;
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1 to (ndeleted + 1): deleted pages
+ */
+
#define XLOG_GIN_DELETE_LISTPAGE 0x80
-#define GIN_NDELETE_AT_ONCE 16
+/*
+ * The WAL record for deleting list pages must contain a block reference to
+ * all the deleted pages, so the number of pages that can be deleted in one
+ * record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
+ * metapage.)
+ */
+#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
typedef struct ginxlogDeleteListPages
{
- RelFileNode node;
GinMetaPageData metadata;
int32 ndeleted;
- BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
} ginxlogDeleteListPages;
@@ -673,7 +675,7 @@ typedef struct GinBtreeData
/* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
- GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *);
+ GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, Page *, Page *);
void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 21daf3b2b6a..2cbc918ad1a 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -16,7 +16,7 @@
#include "access/gist.h"
#include "access/itup.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "storage/buffile.h"
@@ -185,34 +185,33 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
#define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
+/*
+ * Backup Blk 0: updated page.
+ * Backup Blk 1: If this operation completes a page split, by inserting a
+ * downlink for the split page, the left half of the split
+ */
typedef struct gistxlogPageUpdate
{
- RelFileNode node;
- BlockNumber blkno;
-
- /*
- * If this operation completes a page split, by inserting a downlink for
- * the split page, leftchild points to the left half of the split.
- */
- BlockNumber leftchild;
-
/* number of deleted offsets */
uint16 ntodelete;
+ uint16 ntoinsert;
/*
- * follow: 1. todelete OffsetNumbers 2. tuples to insert
+ * In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
*/
} gistxlogPageUpdate;
+/*
+ * Backup Blk 0: If this operation completes a page split, by inserting a
+ * downlink for the split page, the left half of the split
+ * Backup Blk 1 - npage: split pages (1 is the original page)
+ */
typedef struct gistxlogPageSplit
{
- RelFileNode node;
- BlockNumber origblkno; /* splitted page */
BlockNumber origrlink; /* rightlink of the page before split */
GistNSN orignsn; /* NSN of the page before split */
bool origleaf; /* was splitted page a leaf page? */
- BlockNumber leftchild; /* like in gistxlogPageUpdate */
uint16 npage; /* # of pages in the split */
bool markfollowright; /* set F_FOLLOW_RIGHT flags */
@@ -451,8 +450,8 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
/* gistxlog.c */
-extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void gist_desc(StringInfo buf, XLogRecord *record);
+extern void gist_redo(XLogReaderState *record);
+extern void gist_desc(StringInfo buf, XLogReaderState *record);
extern const char *gist_identify(uint8 info);
extern void gist_xlog_startup(void);
extern void gist_xlog_cleanup(void);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index c175a5c1822..afd06ff7def 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -20,7 +20,7 @@
#include "access/genam.h"
#include "access/itup.h"
#include "access/sdir.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
@@ -356,8 +356,8 @@ extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
/* hash.c */
-extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void hash_desc(StringInfo buf, XLogRecord *record);
+extern void hash_redo(XLogReaderState *record);
+extern void hash_desc(StringInfo buf, XLogReaderState *record);
extern const char *hash_identify(uint8 info);
#endif /* HASH_H */
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 1d64264b010..853e2dd491f 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -15,7 +15,7 @@
#define HEAPAM_XLOG_H
#include "access/htup.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
@@ -78,27 +78,11 @@
#define XLOG_HEAP_CONTAINS_OLD \
(XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY)
-/*
- * All what we need to find changed tuple
- *
- * NB: on most machines, sizeof(xl_heaptid) will include some trailing pad
- * bytes for alignment. We don't want to store the pad space in the XLOG,
- * so use SizeOfHeapTid for space calculations. Similar comments apply for
- * the other xl_FOO structs.
- */
-typedef struct xl_heaptid
-{
- RelFileNode node;
- ItemPointerData tid; /* changed tuple id */
-} xl_heaptid;
-
-#define SizeOfHeapTid (offsetof(xl_heaptid, tid) + SizeOfIptrData)
-
/* This is what we need to know about delete */
typedef struct xl_heap_delete
{
- xl_heaptid target; /* deleted tuple id */
TransactionId xmax; /* xmax of the deleted tuple */
+ OffsetNumber offnum; /* deleted tuple's offset */
uint8 infobits_set; /* infomask bits */
uint8 flags;
} xl_heap_delete;
@@ -122,45 +106,33 @@ typedef struct xl_heap_header
#define SizeOfHeapHeader (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
-/*
- * Variant of xl_heap_header that contains the length of the tuple, which is
- * useful if the length of the tuple cannot be computed using the overall
- * record length. E.g. because there are several tuples inside a single
- * record.
- */
-typedef struct xl_heap_header_len
-{
- uint16 t_len;
- xl_heap_header header;
-} xl_heap_header_len;
-
-#define SizeOfHeapHeaderLen (offsetof(xl_heap_header_len, header) + SizeOfHeapHeader)
-
/* This is what we need to know about insert */
typedef struct xl_heap_insert
{
- xl_heaptid target; /* inserted tuple id */
+ OffsetNumber offnum; /* inserted tuple's offset */
uint8 flags;
- /* xl_heap_header & TUPLE DATA FOLLOWS AT END OF STRUCT */
+
+ /* xl_heap_header & TUPLE DATA in backup block 0 */
} xl_heap_insert;
#define SizeOfHeapInsert (offsetof(xl_heap_insert, flags) + sizeof(uint8))
/*
- * This is what we need to know about a multi-insert. The record consists of
- * xl_heap_multi_insert header, followed by a xl_multi_insert_tuple and tuple
- * data for each tuple. 'offsets' array is omitted if the whole page is
- * reinitialized (XLOG_HEAP_INIT_PAGE)
+ * This is what we need to know about a multi-insert.
+ *
+ * The main data of the record consists of this xl_heap_multi_insert header.
+ * 'offsets' array is omitted if the whole page is reinitialized
+ * (XLOG_HEAP_INIT_PAGE).
+ *
+ * In block 0's data portion, there is an xl_multi_insert_tuple struct,
+ * followed by the tuple data for each tuple. There is padding to align
+ * each xl_multi_insert struct.
*/
typedef struct xl_heap_multi_insert
{
- RelFileNode node;
- BlockNumber blkno;
uint8 flags;
uint16 ntuples;
OffsetNumber offsets[1];
-
- /* TUPLE DATA (xl_multi_insert_tuples) FOLLOW AT END OF STRUCT */
} xl_heap_multi_insert;
#define SizeOfHeapMultiInsert offsetof(xl_heap_multi_insert, offsets)
@@ -176,34 +148,39 @@ typedef struct xl_multi_insert_tuple
#define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
-/* This is what we need to know about update|hot_update */
+/*
+ * This is what we need to know about update|hot_update
+ *
+ * Backup blk 0: new page
+ *
+ * If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are set,
+ * the prefix and/or suffix come first, as one or two uint16s.
+ *
+ * After that, xl_heap_header and new tuple data follow. The new tuple
+ * data doesn't include the prefix and suffix, which are copied from the
+ * old tuple on replay.
+ *
+ * If HEAP_CONTAINS_NEW_TUPLE_DATA flag is given, the tuple data is
+ * included even if a full-page image was taken.
+ *
+ * Backup blk 1: old page, if different. (no data, just a reference to the blk)
+ */
typedef struct xl_heap_update
{
- xl_heaptid target; /* deleted tuple id */
TransactionId old_xmax; /* xmax of the old tuple */
- TransactionId new_xmax; /* xmax of the new tuple */
- ItemPointerData newtid; /* new inserted tuple id */
+ OffsetNumber old_offnum; /* old tuple's offset */
uint8 old_infobits_set; /* infomask bits to set on old tuple */
uint8 flags;
+ TransactionId new_xmax; /* xmax of the new tuple */
+ OffsetNumber new_offnum; /* new tuple's offset */
/*
- * If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are
- * set, the prefix and/or suffix come next, as one or two uint16s.
- *
- * After that, xl_heap_header_len and new tuple data follow. The new
- * tuple data and length don't include the prefix and suffix, which are
- * copied from the old tuple on replay. The new tuple data is omitted if
- * a full-page image of the page was taken (unless the
- * XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, in which case it's included
- * anyway).
- *
* If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
- * set, another xl_heap_header_len struct and tuple data for the old tuple
- * follows.
+ * set, a xl_heap_header struct and tuple data for the old tuple follows.
*/
} xl_heap_update;
-#define SizeOfHeapUpdate (offsetof(xl_heap_update, flags) + sizeof(uint8))
+#define SizeOfHeapUpdate (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
/*
* This is what we need to know about vacuum page cleanup/redirect
@@ -218,12 +195,10 @@ typedef struct xl_heap_update
*/
typedef struct xl_heap_clean
{
- RelFileNode node;
- BlockNumber block;
TransactionId latestRemovedXid;
uint16 nredirected;
uint16 ndead;
- /* OFFSET NUMBERS FOLLOW */
+ /* OFFSET NUMBERS are in the block reference 0 */
} xl_heap_clean;
#define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
@@ -251,8 +226,8 @@ typedef struct xl_heap_cleanup_info
/* This is what we need to know about lock */
typedef struct xl_heap_lock
{
- xl_heaptid target; /* locked tuple id */
TransactionId locking_xid; /* might be a MultiXactId not xid */
+ OffsetNumber offnum; /* locked tuple's offset on page */
int8 infobits_set; /* infomask and infomask2 bits to set */
} xl_heap_lock;
@@ -261,8 +236,8 @@ typedef struct xl_heap_lock
/* This is what we need to know about locking an updated version of a row */
typedef struct xl_heap_lock_updated
{
- xl_heaptid target;
TransactionId xmax;
+ OffsetNumber offnum;
uint8 infobits_set;
} xl_heap_lock_updated;
@@ -271,11 +246,11 @@ typedef struct xl_heap_lock_updated
/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
{
- xl_heaptid target; /* updated tuple id */
+ OffsetNumber offnum; /* updated tuple's offset on page */
/* TUPLE DATA FOLLOWS AT END OF STRUCT */
} xl_heap_inplace;
-#define SizeOfHeapInplace (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
+#define SizeOfHeapInplace (offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))
/*
* This struct represents a 'freeze plan', which is what we need to know about
@@ -296,23 +271,26 @@ typedef struct xl_heap_freeze_tuple
/*
* This is what we need to know about a block being frozen during vacuum
+ *
+ * Backup block 0's data contains an array of xl_heap_freeze_tuple structs,
+ * one for each tuple.
*/
typedef struct xl_heap_freeze_page
{
- RelFileNode node;
- BlockNumber block;
TransactionId cutoff_xid;
uint16 ntuples;
- xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
} xl_heap_freeze_page;
-#define SizeOfHeapFreezePage offsetof(xl_heap_freeze_page, tuples)
+#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, ntuples) + sizeof(uint16))
-/* This is what we need to know about setting a visibility map bit */
+/*
+ * This is what we need to know about setting a visibility map bit
+ *
+ * Backup blk 0: visibility map buffer
+ * Backup blk 1: heap buffer
+ */
typedef struct xl_heap_visible
{
- RelFileNode node;
- BlockNumber block;
TransactionId cutoff_xid;
} xl_heap_visible;
@@ -338,10 +316,11 @@ typedef struct xl_heap_new_cid
/*
* Store the relfilenode/ctid pair to facilitate lookups.
*/
- xl_heaptid target;
+ RelFileNode target_node;
+ ItemPointerData target_tid;
} xl_heap_new_cid;
-#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid)
+#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
/* logical rewrite xlog record header */
typedef struct xl_heap_rewrite_mapping
@@ -357,13 +336,13 @@ typedef struct xl_heap_rewrite_mapping
extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
TransactionId *latestRemovedXid);
-extern void heap_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void heap_desc(StringInfo buf, XLogRecord *record);
+extern void heap_redo(XLogReaderState *record);
+extern void heap_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap_identify(uint8 info);
-extern void heap2_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void heap2_desc(StringInfo buf, XLogRecord *record);
+extern void heap2_redo(XLogReaderState *record);
+extern void heap2_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap2_identify(uint8 info);
-extern void heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r);
+extern void heap_xlog_logical_rewrite(XLogReaderState *r);
extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode,
TransactionId latestRemovedXid);
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index 294d21bd180..300c2a52f02 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -498,6 +498,7 @@ do { \
* you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page.
*/
#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
+#define MinHeapTupleSize MAXALIGN(offsetof(HeapTupleHeaderData, t_bits))
/*
* MaxHeapTuplesPerPage is an upper bound on the number of tuples that can
diff --git a/src/include/access/itup.h b/src/include/access/itup.h
index de17936b106..e4dc51e8720 100644
--- a/src/include/access/itup.h
+++ b/src/include/access/itup.h
@@ -133,6 +133,7 @@ typedef IndexAttributeBitMapData *IndexAttributeBitMap;
* IndexTupleData struct. We arrive at the divisor because each tuple
* must be maxaligned, and it must have an associated item pointer.
*/
+#define MinIndexTupleSize MAXALIGN(sizeof(IndexTupleData) + 1)
#define MaxIndexTuplesPerPage \
((int) ((BLCKSZ - SizeOfPageHeaderData) / \
(MAXALIGN(sizeof(IndexTupleData) + 1) + sizeof(ItemIdData))))
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 43d737505d2..ac58a3766d5 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -11,7 +11,7 @@
#ifndef MULTIXACT_H
#define MULTIXACT_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
@@ -135,8 +135,8 @@ extern void multixact_twophase_postcommit(TransactionId xid, uint16 info,
extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len);
-extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void multixact_desc(StringInfo buf, XLogRecord *record);
+extern void multixact_redo(XLogReaderState *record);
+extern void multixact_desc(StringInfo buf, XLogReaderState *record);
extern const char *multixact_identify(uint8 info);
extern char *mxid_to_string(MultiXactId multi, int nmembers,
MultiXactMember *members);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 6ecd2ced62d..d3d258bcc9f 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -17,7 +17,7 @@
#include "access/genam.h"
#include "access/itup.h"
#include "access/sdir.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "catalog/pg_index.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
@@ -228,15 +228,6 @@ typedef struct BTMetaPageData
* FSM */
/*
- * All that we need to find changed index tuple
- */
-typedef struct xl_btreetid
-{
- RelFileNode node;
- ItemPointerData tid; /* changed tuple id */
-} xl_btreetid;
-
-/*
* All that we need to regenerate the meta-data page
*/
typedef struct xl_btree_metadata
@@ -252,16 +243,17 @@ typedef struct xl_btree_metadata
*
* This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
* Note that INSERT_META implies it's not a leaf page.
+ *
+ * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
+ * Backup Blk 2: xl_btree_metadata, if INSERT_META
*/
typedef struct xl_btree_insert
{
- xl_btreetid target; /* inserted tuple id */
- /* BlockNumber finishes_split field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
- /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
- /* INDEX TUPLE FOLLOWS AT END OF STRUCT */
+ OffsetNumber offnum;
} xl_btree_insert;
-#define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData)
+#define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
/*
* On insert with split, we save all the items going into the right sibling
@@ -278,45 +270,41 @@ typedef struct xl_btree_insert
* the root page, and thus that a newroot record rather than an insert or
* split record should follow. Note that a split record never carries a
* metapage update --- we'll do that in the parent-level update.
+ *
+ * Backup Blk 0: original page / new left page
+ *
+ * The left page's data portion contains the new item, if it's the _L variant.
+ * (In the _R variants, the new item is one of the right page's tuples.)
+ * If level > 0, an IndexTuple representing the HIKEY of the left page
+ * follows. We don't need this on leaf pages, because it's the same as the
+ * leftmost key in the new right page.
+ *
+ * Backup Blk 1: new right page
+ *
+ * The right page's data portion contains the right page's tuples in the
+ * form used by _bt_restore_page.
+ *
+ * Backup Blk 2: next block (orig page's rightlink), if any
+ * Backup Blk 3: child's left sibling, if non-leaf split
*/
typedef struct xl_btree_split
{
- RelFileNode node;
- BlockNumber leftsib; /* orig page / new left page */
- BlockNumber rightsib; /* new right page */
- BlockNumber rnext; /* next block (orig page's rightlink) */
uint32 level; /* tree level of page being split */
OffsetNumber firstright; /* first item moved to right page */
-
- /*
- * In the _L variants, next are OffsetNumber newitemoff and the new item.
- * (In the _R variants, the new item is one of the right page's tuples.)
- * The new item, but not newitemoff, is suppressed if XLogInsert chooses
- * to store the left page's whole page image.
- *
- * If level > 0, an IndexTuple representing the HIKEY of the left page
- * follows. We don't need this on leaf pages, because it's the same as
- * the leftmost key in the new right page. Also, it's suppressed if
- * XLogInsert chooses to store the left page's whole page image.
- *
- * If level > 0, BlockNumber of the page whose incomplete-split flag this
- * insertion clears. (not aligned)
- *
- * Last are the right page's tuples in the form used by _bt_restore_page.
- */
+ OffsetNumber newitemoff; /* new item's offset (if placed on left page) */
} xl_btree_split;
-#define SizeOfBtreeSplit (offsetof(xl_btree_split, firstright) + sizeof(OffsetNumber))
+#define SizeOfBtreeSplit (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
/*
* This is what we need to know about delete of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a
* single index page when *not* executed by VACUUM.
+ *
+ * Backup Blk 0: index page
*/
typedef struct xl_btree_delete
{
- RelFileNode node; /* RelFileNode of the index */
- BlockNumber block;
RelFileNode hnode; /* RelFileNode of the heap the index currently
* points at */
int nitems;
@@ -361,8 +349,6 @@ typedef struct xl_btree_reuse_page
*/
typedef struct xl_btree_vacuum
{
- RelFileNode node;
- BlockNumber block;
BlockNumber lastBlockVacuumed;
/* TARGET OFFSET NUMBERS FOLLOW */
@@ -376,10 +362,13 @@ typedef struct xl_btree_vacuum
* remove this tuple's downlink and the *following* tuple's key). Note that
* the leaf page is empty, so we don't need to store its content --- it is
* just reinitialized during recovery using the rest of the fields.
+ *
+ * Backup Blk 0: leaf block
+ * Backup Blk 1: top parent
*/
typedef struct xl_btree_mark_page_halfdead
{
- xl_btreetid target; /* deleted tuple id in parent page */
+ OffsetNumber poffset; /* deleted tuple id in parent page */
/* information needed to recreate the leaf page: */
BlockNumber leafblk; /* leaf block ultimately being deleted */
@@ -394,11 +383,15 @@ typedef struct xl_btree_mark_page_halfdead
* This is what we need to know about deletion of a btree page. Note we do
* not store any content for the deleted page --- it is just rewritten as empty
* during recovery, apart from resetting the btpo.xact.
+ *
+ * Backup Blk 0: target block being deleted
+ * Backup Blk 1: target block's left sibling, if any
+ * Backup Blk 2: target block's right sibling
+ * Backup Blk 3: leaf block (if different from target)
+ * Backup Blk 4: metapage (if rightsib becomes new fast root)
*/
typedef struct xl_btree_unlink_page
{
- RelFileNode node;
- BlockNumber deadblk; /* target block being deleted */
BlockNumber leftsib; /* target block's left sibling, if any */
BlockNumber rightsib; /* target block's right sibling */
@@ -406,7 +399,6 @@ typedef struct xl_btree_unlink_page
* Information needed to recreate the leaf page, when target is an
* internal page.
*/
- BlockNumber leafblk;
BlockNumber leafleftsib;
BlockNumber leafrightsib;
BlockNumber topparent; /* next child down in the branch */
@@ -423,13 +415,15 @@ typedef struct xl_btree_unlink_page
*
* Note that although this implies rewriting the metadata page, we don't need
* an xl_btree_metadata record --- the rootblk and level are sufficient.
+ *
+ * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
+ * Backup Blk 1: left child (if splitting an old root)
+ * Backup Blk 2: metapage
*/
typedef struct xl_btree_newroot
{
- RelFileNode node;
- BlockNumber rootblk; /* location of new root */
+ BlockNumber rootblk; /* location of new root (redundant with blk 0) */
uint32 level; /* its tree level */
- /* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
} xl_btree_newroot;
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
@@ -726,8 +720,8 @@ extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
/*
* prototypes for functions in nbtxlog.c
*/
-extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void btree_desc(StringInfo buf, XLogRecord *record);
+extern void btree_redo(XLogReaderState *record);
+extern void btree_desc(StringInfo buf, XLogReaderState *record);
extern const char *btree_identify(uint8 info);
#endif /* NBTREE_H */
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index ccf1ed77869..3aa96bde86f 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -15,7 +15,7 @@
#define SPGIST_H
#include "access/skey.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
@@ -197,8 +197,8 @@ extern Datum spgbulkdelete(PG_FUNCTION_ARGS);
extern Datum spgvacuumcleanup(PG_FUNCTION_ARGS);
/* spgxlog.c */
-extern void spg_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void spg_desc(StringInfo buf, XLogRecord *record);
+extern void spg_redo(XLogReaderState *record);
+extern void spg_desc(StringInfo buf, XLogReaderState *record);
extern const char *spg_identify(uint8 info);
extern void spg_xlog_startup(void);
extern void spg_xlog_cleanup(void);
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index 3330644651c..4b6fdee8017 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -18,7 +18,6 @@
#include "access/spgist.h"
#include "nodes/tidbitmap.h"
#include "storage/buf.h"
-#include "storage/relfilenode.h"
#include "utils/relcache.h"
@@ -351,35 +350,8 @@ typedef SpGistDeadTupleData *SpGistDeadTuple;
/*
* XLOG stuff
- *
- * ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof
*/
-#define ACCEPT_RDATA_DATA(p, s, i) \
- do { \
- Assert((i) < lengthof(rdata)); \
- rdata[i].data = (char *) (p); \
- rdata[i].len = (s); \
- rdata[i].buffer = InvalidBuffer; \
- rdata[i].buffer_std = true; \
- rdata[i].next = NULL; \
- if ((i) > 0) \
- rdata[(i) - 1].next = rdata + (i); \
- } while(0)
-
-#define ACCEPT_RDATA_BUFFER(b, i) \
- do { \
- Assert((i) < lengthof(rdata)); \
- rdata[i].data = NULL; \
- rdata[i].len = 0; \
- rdata[i].buffer = (b); \
- rdata[i].buffer_std = true; \
- rdata[i].next = NULL; \
- if ((i) > 0) \
- rdata[(i) - 1].next = rdata + (i); \
- } while(0)
-
-
/* XLOG record types for SPGiST */
#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
@@ -408,36 +380,36 @@ typedef struct spgxlogState
(d).isBuild = (s)->isBuild; \
} while(0)
-
+/*
+ * Backup Blk 0: destination page for leaf tuple
+ * Backup Blk 1: parent page (if any)
+ */
typedef struct spgxlogAddLeaf
{
- RelFileNode node;
-
- BlockNumber blknoLeaf; /* destination page for leaf tuple */
bool newPage; /* init dest page? */
bool storesNulls; /* page is in the nulls tree? */
OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */
OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
- BlockNumber blknoParent; /* where the parent downlink is, if any */
- OffsetNumber offnumParent;
+ OffsetNumber offnumParent; /* where the parent downlink is, if any */
uint16 nodeI;
/* new leaf tuple follows (unaligned!) */
} spgxlogAddLeaf;
+/*
+ * Backup Blk 0: source leaf page
+ * Backup Blk 1: destination leaf page
+ * Backup Blk 2: parent page
+ */
typedef struct spgxlogMoveLeafs
{
- RelFileNode node;
-
- BlockNumber blknoSrc; /* source leaf page */
- BlockNumber blknoDst; /* destination leaf page */
uint16 nMoves; /* number of tuples moved from source page */
bool newPage; /* init dest page? */
bool replaceDead; /* are we replacing a DEAD source tuple? */
bool storesNulls; /* pages are in the nulls tree? */
- BlockNumber blknoParent; /* where the parent downlink is */
+ /* where the parent downlink is */
OffsetNumber offnumParent;
uint16 nodeI;
@@ -452,11 +424,6 @@ typedef struct spgxlogMoveLeafs
* Note: if replaceDead is true then there is only one inserted tuple
* number and only one leaf tuple in the data, because we are not copying
* the dead tuple from the source
- *
- * Buffer references in the rdata array are:
- * Src page
- * Dest page
- * Parent page
*----------
*/
OffsetNumber offsets[1];
@@ -464,21 +431,43 @@ typedef struct spgxlogMoveLeafs
#define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
+/*
+ * Backup Blk 0: original page
+ * Backup Blk 1: where new tuple goes, if not same place
+ * Backup Blk 2: where parent downlink is, if updated and different from
+ * the old and new
+ */
typedef struct spgxlogAddNode
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number of original inner tuple */
- OffsetNumber offnum; /* offset of original inner tuple */
-
- BlockNumber blknoParent; /* where parent downlink is, if updated */
- OffsetNumber offnumParent;
- uint16 nodeI;
+ /*
+ * Offset of the original inner tuple, in the original page (on backup
+ * block 0).
+ */
+ OffsetNumber offnum;
- BlockNumber blknoNew; /* where new tuple goes, if not same place */
+ /*
+ * Offset of the new tuple, on the new page (on backup block 1). Invalid,
+ * if we overwrote the old tuple in the original page).
+ */
OffsetNumber offnumNew;
bool newPage; /* init new page? */
+ /*----
+ * Where is the parent downlink? parentBlk indicates which page it's on,
+ * and offnumParent is the offset within the page. The possible values for
+ * parentBlk are:
+ *
+ * 0: parent == original page
+ * 1: parent == new page
+ * 2: parent == different page (blk ref 2)
+ * -1: parent not updated
+ *----
+ */
+ char parentBlk;
+ OffsetNumber offnumParent; /* offset within the parent page */
+
+ uint16 nodeI;
+
spgxlogState stateSrc;
/*
@@ -486,41 +475,51 @@ typedef struct spgxlogAddNode
*/
} spgxlogAddNode;
+/*
+ * Backup Blk 0: where the prefix tuple goes
+ * Backup Blk 1: where the postfix tuple goes (if different page)
+ */
typedef struct spgxlogSplitTuple
{
- RelFileNode node;
-
- BlockNumber blknoPrefix; /* where the prefix tuple goes */
+ /* where the prefix tuple goes */
OffsetNumber offnumPrefix;
- BlockNumber blknoPostfix; /* where the postfix tuple goes */
+ /* where the postfix tuple goes */
OffsetNumber offnumPostfix;
bool newPage; /* need to init that page? */
+ bool postfixBlkSame; /* was postfix tuple put on same page as
+ * prefix? */
/*
- * new prefix inner tuple follows, then new postfix inner tuple
- * (both are unaligned!)
+ * new prefix inner tuple follows, then new postfix inner tuple (both are
+ * unaligned!)
*/
} spgxlogSplitTuple;
+/*
+ * Buffer references in the rdata array are:
+ * Backup Blk 0: Src page (only if not root)
+ * Backup Blk 1: Dest page (if used)
+ * Backup Blk 2: Inner page
+ * Backup Blk 3: Parent page (if any, and different from Inner)
+ */
typedef struct spgxlogPickSplit
{
- RelFileNode node;
+ bool isRootSplit;
- BlockNumber blknoSrc; /* original leaf page */
- BlockNumber blknoDest; /* other leaf page, if any */
uint16 nDelete; /* n to delete from Src */
uint16 nInsert; /* n to insert on Src and/or Dest */
bool initSrc; /* re-init the Src page? */
bool initDest; /* re-init the Dest page? */
- BlockNumber blknoInner; /* where to put new inner tuple */
+ /* where to put new inner tuple */
OffsetNumber offnumInner;
bool initInner; /* re-init the Inner page? */
bool storesNulls; /* pages are in the nulls tree? */
- BlockNumber blknoParent; /* where the parent downlink is, if any */
+ /* where the parent downlink is, if any */
+ bool innerIsParent; /* is parent the same as inner page? */
OffsetNumber offnumParent;
uint16 nodeI;
@@ -533,24 +532,15 @@ typedef struct spgxlogPickSplit
* array of page selector bytes for inserted tuples, length nInsert
* new inner tuple (unaligned!)
* list of leaf tuples, length nInsert (unaligned!)
- *
- * Buffer references in the rdata array are:
- * Src page (only if not root and not being init'd)
- * Dest page (if used and not being init'd)
- * Inner page (only if not being init'd)
- * Parent page (if any; could be same as Inner)
*----------
*/
- OffsetNumber offsets[1];
+ OffsetNumber offsets[1];
} spgxlogPickSplit;
#define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets)
typedef struct spgxlogVacuumLeaf
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nDead; /* number of tuples to become DEAD */
uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */
uint16 nMove; /* number of tuples to move */
@@ -576,9 +566,6 @@ typedef struct spgxlogVacuumLeaf
typedef struct spgxlogVacuumRoot
{
/* vacuum a root page when it is also a leaf */
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nDelete; /* number of tuples to delete */
spgxlogState stateSrc;
@@ -591,9 +578,6 @@ typedef struct spgxlogVacuumRoot
typedef struct spgxlogVacuumRedirect
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nToPlaceholder; /* number of redirects to make placeholders */
OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */
TransactionId newestRedirectXid; /* newest XID of removed redirects */
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 11a51b26859..b018aa4f5d8 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -14,7 +14,7 @@
#ifndef XACT_H
#define XACT_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "storage/relfilenode.h"
@@ -256,8 +256,8 @@ extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg);
extern int xactGetCommittedChildren(TransactionId **ptr);
-extern void xact_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void xact_desc(StringInfo buf, XLogRecord *record);
+extern void xact_redo(XLogReaderState *record);
+extern void xact_desc(StringInfo buf, XLogReaderState *record);
extern const char *xact_identify(uint8 info);
#endif /* XACT_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 6f8b5f46e10..d06fbc0ec1e 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -14,7 +14,7 @@
#include "access/rmgr.h"
#include "access/xlogdefs.h"
#include "access/xloginsert.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
@@ -186,7 +186,9 @@ typedef struct CheckpointStatsData
extern CheckpointStatsData CheckpointStats;
-extern XLogRecPtr XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn);
+struct XLogRecData;
+
+extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn);
extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
@@ -198,8 +200,8 @@ extern XLogSegNo XLogGetLastRemovedSegno(void);
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
-extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void xlog_desc(StringInfo buf, XLogRecord *record);
+extern void xlog_redo(XLogReaderState *record);
+extern void xlog_desc(StringInfo buf, XLogReaderState *record);
extern const char *xlog_identify(uint8 info);
extern void issue_xlog_fsync(int fd, XLogSegNo segno);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 19b2ef8d90d..423ef4d7fa0 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -20,7 +20,7 @@
#define XLOG_INTERNAL_H
#include "access/xlogdefs.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "pgtime.h"
@@ -31,7 +31,7 @@
/*
* Each page of XLOG file has a header like this:
*/
-#define XLOG_PAGE_MAGIC 0xD080 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD081 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
@@ -204,6 +204,17 @@ typedef struct xl_end_of_recovery
} xl_end_of_recovery;
/*
+ * The functions in xloginsert.c construct a chain of XLogRecData structs
+ * to represent the final WAL record.
+ */
+typedef struct XLogRecData
+{
+ struct XLogRecData *next; /* next struct in chain, or NULL */
+ char *data; /* start of rmgr data to include */
+ uint32 len; /* length of rmgr data to include */
+} XLogRecData;
+
+/*
* Method table for resource managers.
*
* This struct must be kept in sync with the PG_RMGR definition in
@@ -219,8 +230,8 @@ typedef struct xl_end_of_recovery
typedef struct RmgrData
{
const char *rm_name;
- void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
- void (*rm_desc) (StringInfo buf, XLogRecord *rptr);
+ void (*rm_redo) (XLogReaderState *record);
+ void (*rm_desc) (StringInfo buf, XLogReaderState *record);
const char *(*rm_identify) (uint8 info);
void (*rm_startup) (void);
void (*rm_cleanup) (void);
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index 30c2e84cbc9..e5ab71e2305 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -18,49 +18,43 @@
#include "storage/relfilenode.h"
/*
- * The rmgr data to be written by XLogInsert() is defined by a chain of
- * one or more XLogRecData structs. (Multiple structs would be used when
- * parts of the source data aren't physically adjacent in memory, or when
- * multiple associated buffers need to be specified.)
- *
- * If buffer is valid then XLOG will check if buffer must be backed up
- * (ie, whether this is first change of that page since last checkpoint).
- * If so, the whole page contents are attached to the XLOG record, and XLOG
- * sets XLR_BKP_BLOCK(N) bit in xl_info. Note that the buffer must be pinned
- * and exclusive-locked by the caller, so that it won't change under us.
- * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
- * this XLogRecData struct into the XLOG record, since we assume it's present
- * in the buffer. Therefore, rmgr redo routines MUST pay attention to
- * XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
- * The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
- * value (ignoring InvalidBuffer) appearing in the rdata chain.
- *
- * When buffer is valid, caller must set buffer_std to indicate whether the
- * page uses standard pd_lower/pd_upper header fields. If this is true, then
- * XLOG is allowed to omit the free space between pd_lower and pd_upper from
- * the backed-up page image. Note that even when buffer_std is false, the
- * page MUST have an LSN field as its first eight bytes!
- *
- * Note: data can be NULL to indicate no rmgr data associated with this chain
- * entry. This can be sensible (ie, not a wasted entry) if buffer is valid.
- * The implication is that the buffer has been changed by the operation being
- * logged, and so may need to be backed up, but the change can be redone using
- * only information already present elsewhere in the XLOG entry.
+ * The minimum size of the WAL construction working area. If you need to
+ * register more than XLR_NORMAL_MAX_BLOCK_ID block references or have more
+ * than XLR_NORMAL_RDATAS data chunks in a single WAL record, you must call
+ * XLogEnsureRecordSpace() first to allocate more working memory.
*/
-typedef struct XLogRecData
-{
- char *data; /* start of rmgr data to include */
- uint32 len; /* length of rmgr data to include */
- Buffer buffer; /* buffer associated with data, if any */
- bool buffer_std; /* buffer has standard pd_lower/pd_upper */
- struct XLogRecData *next; /* next struct in chain, or NULL */
-} XLogRecData;
+#define XLR_NORMAL_MAX_BLOCK_ID 4
+#define XLR_NORMAL_RDATAS 20
+
+/* flags for XLogRegisterBuffer */
+#define REGBUF_FORCE_IMAGE 0x01 /* force a full-page image */
+#define REGBUF_NO_IMAGE 0x02 /* don't take a full-page image */
+#define REGBUF_WILL_INIT (0x04 | 0x02) /* page will be re-initialized at
+ * replay (implies NO_IMAGE) */
+#define REGBUF_STANDARD 0x08 /* page follows "standard" page layout,
+ * (data between pd_lower and pd_upper
+ * will be skipped) */
+#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
+ * is taken */
+
+/* prototypes for public functions in xloginsert.c: */
+extern void XLogBeginInsert(void);
+extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
+extern void XLogRegisterData(char *data, int len);
+extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
+extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
+ ForkNumber forknum, BlockNumber blknum, char *page,
+ uint8 flags);
+extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
+extern void XLogResetInsertion(void);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
-extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
BlockNumber blk, char *page, bool page_std);
extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
-extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
+
+extern void InitXLogInsert(void);
#endif /* XLOGINSERT_H */
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index ea873a2d9c7..eb6cc8996a5 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -14,12 +14,18 @@
*
* The basic idea is to allocate an XLogReaderState via
* XLogReaderAllocate(), and call XLogReadRecord() until it returns NULL.
+ *
+ * After reading a record with XLogReadRecord(), it's decomposed into
+ * the per-block and main data parts, and the parts can be accessed
+ * with the XLogRec* macros and functions. You can also decode a
+ * record that's already constructed in memory, without reading from
+ * disk, by calling the DecodeXLogRecord() function.
*-------------------------------------------------------------------------
*/
#ifndef XLOGREADER_H
#define XLOGREADER_H
-#include "access/xlog_internal.h"
+#include "access/xlogrecord.h"
typedef struct XLogReaderState XLogReaderState;
@@ -31,6 +37,32 @@ typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
char *readBuf,
TimeLineID *pageTLI);
+typedef struct
+{
+ /* Is this block ref in use? */
+ bool in_use;
+
+ /* Identify the block this refers to */
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blkno;
+
+ /* copy of the fork_flags field from the XLogRecordBlockHeader */
+ uint8 flags;
+
+ /* Information on full-page image, if any */
+ bool has_image;
+ char *bkp_image;
+ uint16 hole_offset;
+ uint16 hole_length;
+
+ /* Buffer holding the rmgr-specific data associated with this block */
+ bool has_data;
+ char *data;
+ uint16 data_len;
+ uint16 data_bufsz;
+} DecodedBkpBlock;
+
struct XLogReaderState
{
/* ----------------------------------------
@@ -79,6 +111,25 @@ struct XLogReaderState
XLogRecPtr ReadRecPtr; /* start of last record read */
XLogRecPtr EndRecPtr; /* end+1 of last record read */
+
+ /* ----------------------------------------
+ * Decoded representation of current record
+ *
+ * Use XLogRecGet* functions to investigate the record; these fields
+ * should not be accessed directly.
+ * ----------------------------------------
+ */
+ XLogRecord *decoded_record; /* currently decoded record */
+
+ char *main_data; /* record's main data portion */
+ uint32 main_data_len; /* main data portion's length */
+ uint32 main_data_bufsz; /* allocated size of the buffer */
+
+ /* information about blocks referenced by the record. */
+ DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
+
+ int max_block_id; /* highest block_id in use (-1 if none) */
+
/* ----------------------------------------
* private/internal state
* ----------------------------------------
@@ -123,4 +174,28 @@ extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
#endif /* FRONTEND */
+/* Functions for decoding an XLogRecord */
+
+extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
+ char **errmsg);
+
+#define XLogRecGetTotalLen(decoder) ((decoder)->decoded_record->xl_tot_len)
+#define XLogRecGetPrev(decoder) ((decoder)->decoded_record->xl_prev)
+#define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
+#define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
+#define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
+#define XLogRecGetData(decoder) ((decoder)->main_data)
+#define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
+#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
+#define XLogRecHasBlockRef(decoder, block_id) \
+ ((decoder)->blocks[block_id].in_use)
+#define XLogRecHasBlockImage(decoder, block_id) \
+ ((decoder)->blocks[block_id].has_image)
+
+extern bool RestoreBlockImage(XLogReaderState *recoder, uint8 block_id, char *dst);
+extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
+extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
+ RelFileNode *rnode, ForkNumber *forknum,
+ BlockNumber *blknum);
+
#endif /* XLOGREADER_H */
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index ab0fb1c5004..11ddfac9c7f 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -20,81 +20,161 @@
/*
* The overall layout of an XLOG record is:
* Fixed-size header (XLogRecord struct)
- * rmgr-specific data
- * BkpBlock
- * backup block data
- * BkpBlock
- * backup block data
+ * XLogRecordBlockHeader struct
+ * XLogRecordBlockHeader struct
* ...
+ * XLogRecordDataHeader[Short|Long] struct
+ * block data
+ * block data
+ * ...
+ * main data
*
- * where there can be zero to four backup blocks (as signaled by xl_info flag
- * bits). XLogRecord structs always start on MAXALIGN boundaries in the WAL
- * files, and we round up SizeOfXLogRecord so that the rmgr data is also
- * guaranteed to begin on a MAXALIGN boundary. However, no padding is added
- * to align BkpBlock structs or backup block data.
+ * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
+ * rmgr-specific data not associated with a block. XLogRecord structs
+ * always start on MAXALIGN boundaries in the WAL files, but the rest of
+ * the fields are not aligned.
*
- * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
- * and also not any backup blocks. xl_tot_len counts everything. Neither
- * length field is rounded up to an alignment boundary.
+ * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
+ * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
+ * used to distinguish between block references, and the main data structs.
*/
typedef struct XLogRecord
{
uint32 xl_tot_len; /* total len of entire record */
TransactionId xl_xid; /* xact id */
- uint32 xl_len; /* total len of rmgr data */
+ XLogRecPtr xl_prev; /* ptr to previous record in log */
uint8 xl_info; /* flag bits, see below */
RmgrId xl_rmid; /* resource manager for this record */
/* 2 bytes of padding here, initialize to zero */
- XLogRecPtr xl_prev; /* ptr to previous record in log */
pg_crc32 xl_crc; /* CRC for this record */
- /* If MAXALIGN==8, there are 4 wasted bytes here */
-
- /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
+ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
} XLogRecord;
-#define SizeOfXLogRecord MAXALIGN(sizeof(XLogRecord))
-
-#define XLogRecGetData(record) ((char*) (record) + SizeOfXLogRecord)
+#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32))
/*
- * XLOG uses only low 4 bits of xl_info. High 4 bits may be used by rmgr.
+ * The high 4 bits in xl_info may be used freely by rmgr. The
+ * XLR_SPECIAL_REL_UPDATE bit can be passed by XLogInsert caller. The rest
+ * are set internally by XLogInsert.
*/
#define XLR_INFO_MASK 0x0F
+#define XLR_RMGR_INFO_MASK 0xF0
/*
- * If we backed up any disk blocks with the XLOG record, we use flag bits in
- * xl_info to signal it. We support backup of up to 4 disk blocks per XLOG
- * record.
+ * If a WAL record modifies any relation files, in ways not covered by the
+ * usual block references, this flag is set. This is not used for anything
+ * by PostgreSQL itself, but it allows external tools that read WAL and keep
+ * track of modified blocks to recognize such special record types.
+ */
+#define XLR_SPECIAL_REL_UPDATE 0x01
+
+/*
+ * Header info for block data appended to an XLOG record.
+ *
+ * Note that we don't attempt to align the XLogRecordBlockHeader struct!
+ * So, the struct must be copied to aligned local storage before use.
+ * 'data_length' is the length of the payload data associated with this,
+ * and includes the possible full-page image, and rmgr-specific data. It
+ * does not include the XLogRecordBlockHeader struct itself.
*/
-#define XLR_BKP_BLOCK_MASK 0x0F /* all info bits used for bkp blocks */
-#define XLR_MAX_BKP_BLOCKS 4
-#define XLR_BKP_BLOCK(iblk) (0x08 >> (iblk)) /* iblk in 0..3 */
+typedef struct XLogRecordBlockHeader
+{
+ uint8 id; /* block reference ID */
+ uint8 fork_flags; /* fork within the relation, and flags */
+ uint16 data_length; /* number of payload bytes (not including page
+ * image) */
+
+ /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
+ /* If !BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
+ /* BlockNumber follows */
+} XLogRecordBlockHeader;
+
+#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
/*
- * Header info for a backup block appended to an XLOG record.
+ * Additional header information when a full-page image is included
+ * (i.e. when BKPBLOCK_HAS_IMAGE is set).
*
* As a trivial form of data compression, the XLOG code is aware that
* PG data pages usually contain an unused "hole" in the middle, which
* contains only zero bytes. If hole_length > 0 then we have removed
* such a "hole" from the stored data (and it's not counted in the
* XLOG record's CRC, either). Hence, the amount of block data actually
- * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
- *
- * Note that we don't attempt to align either the BkpBlock struct or the
- * block's data. So, the struct must be copied to aligned local storage
- * before use.
+ * present is BLCKSZ - hole_length bytes.
*/
-typedef struct BkpBlock
+typedef struct XLogRecordBlockImageHeader
{
- RelFileNode node; /* relation containing block */
- ForkNumber fork; /* fork within the relation */
- BlockNumber block; /* block number */
uint16 hole_offset; /* number of bytes before "hole" */
uint16 hole_length; /* number of bytes in "hole" */
+} XLogRecordBlockImageHeader;
+
+#define SizeOfXLogRecordBlockImageHeader sizeof(XLogRecordBlockImageHeader)
+
+/*
+ * Maximum size of the header for a block reference. This is used to size a
+ * temporary buffer for constructing the header.
+ */
+#define MaxSizeOfXLogRecordBlockHeader \
+ (SizeOfXLogRecordBlockHeader + \
+ SizeOfXLogRecordBlockImageHeader + \
+ sizeof(RelFileNode) + \
+ sizeof(BlockNumber))
+
+/*
+ * The fork number fits in the lower 4 bits in the fork_flags field. The upper
+ * bits are used for flags.
+ */
+#define BKPBLOCK_FORK_MASK 0x0F
+#define BKPBLOCK_FLAG_MASK 0xF0
+#define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
+#define BKPBLOCK_HAS_DATA 0x20
+#define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
+#define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */
+
+/*
+ * XLogRecordDataHeaderShort/Long are used for the "main data" portion of
+ * the record. If the length of the data is less than 256 bytes, the short
+ * form is used, with a single byte to hold the length. Otherwise the long
+ * form is used.
+ *
+ * (These structs are currently not used in the code, they are here just for
+ * documentation purposes).
+ */
+typedef struct XLogRecordDataHeaderShort
+{
+ uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */
+ uint8 data_length; /* number of payload bytes */
+} XLogRecordDataHeaderShort;
+
+#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
+
+typedef struct XLogRecordDataHeaderLong
+{
+ uint8 id; /* XLR_BLOCK_ID_DATA_LONG */
+ /* followed by uint32 data_length, unaligned */
+} XLogRecordDataHeaderLong;
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
+
+/*
+ * Block IDs used to distinguish different kinds of record fragments. Block
+ * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
+ * any ID number in that range (although you should stick to small numbers,
+ * because the WAL machinery is optimized for that case). A couple of ID
+ * numbers are reserved to denote the "main" data portion of the record.
+ *
+ * The maximum is currently set at 32, quite arbitrarily. Most records only
+ * need a handful of block references, but there are a few exceptions that
+ * need more.
+ */
+#define XLR_MAX_BLOCK_ID 32
+
+#define XLR_BLOCK_ID_DATA_SHORT 255
+#define XLR_BLOCK_ID_DATA_LONG 254
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
- /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
-} BkpBlock;
#endif /* XLOGRECORD_H */
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 8d906967232..68f72cfac6d 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -11,7 +11,7 @@
#ifndef XLOG_UTILS_H
#define XLOG_UTILS_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "storage/bufmgr.h"
@@ -33,26 +33,17 @@ typedef enum
* replayed) */
} XLogRedoAction;
-extern XLogRedoAction XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record,
- int block_index, RelFileNode rnode, BlockNumber blkno,
- Buffer *buf);
-extern XLogRedoAction XLogReadBufferForRedoExtended(XLogRecPtr lsn,
- XLogRecord *record, int block_index,
- RelFileNode rnode, ForkNumber forkno,
- BlockNumber blkno,
+extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record,
+ uint8 buffer_id, Buffer *buf);
+extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id);
+extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record,
+ uint8 buffer_id,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf);
-extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, ReadBufferMode mode);
-extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
- int block_index,
- bool get_cleanup_lock, bool keep_buffer);
-extern Buffer RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb,
- char *blk, bool get_cleanup_lock, bool keep_buffer);
-
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel);