aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/brin/brin.c11
-rw-r--r--src/backend/access/brin/brin_pageops.c97
-rw-r--r--src/backend/access/brin/brin_revmap.c23
-rw-r--r--src/backend/access/brin/brin_xlog.c111
-rw-r--r--src/backend/access/gin/ginbtree.c111
-rw-r--r--src/backend/access/gin/gindatapage.c162
-rw-r--r--src/backend/access/gin/ginentrypage.c64
-rw-r--r--src/backend/access/gin/ginfast.c92
-rw-r--r--src/backend/access/gin/gininsert.c10
-rw-r--r--src/backend/access/gin/ginutil.c10
-rw-r--r--src/backend/access/gin/ginvacuum.c114
-rw-r--r--src/backend/access/gin/ginxlog.c341
-rw-r--r--src/backend/access/gist/gist.c9
-rw-r--r--src/backend/access/gist/gistbuild.c9
-rw-r--r--src/backend/access/gist/gistxlog.c269
-rw-r--r--src/backend/access/hash/hash.c2
-rw-r--r--src/backend/access/heap/heapam.c987
-rw-r--r--src/backend/access/heap/rewriteheap.c19
-rw-r--r--src/backend/access/nbtree/nbtinsert.c207
-rw-r--r--src/backend/access/nbtree/nbtpage.c175
-rw-r--r--src/backend/access/nbtree/nbtxlog.c353
-rw-r--r--src/backend/access/rmgrdesc/brindesc.c41
-rw-r--r--src/backend/access/rmgrdesc/clogdesc.c4
-rw-r--r--src/backend/access/rmgrdesc/dbasedesc.c4
-rw-r--r--src/backend/access/rmgrdesc/gindesc.c40
-rw-r--r--src/backend/access/rmgrdesc/gistdesc.c23
-rw-r--r--src/backend/access/rmgrdesc/hashdesc.c2
-rw-r--r--src/backend/access/rmgrdesc/heapdesc.c72
-rw-r--r--src/backend/access/rmgrdesc/mxactdesc.c4
-rw-r--r--src/backend/access/rmgrdesc/nbtdesc.c52
-rw-r--r--src/backend/access/rmgrdesc/relmapdesc.c4
-rw-r--r--src/backend/access/rmgrdesc/seqdesc.c4
-rw-r--r--src/backend/access/rmgrdesc/smgrdesc.c4
-rw-r--r--src/backend/access/rmgrdesc/spgdesc.c74
-rw-r--r--src/backend/access/rmgrdesc/standbydesc.c4
-rw-r--r--src/backend/access/rmgrdesc/tblspcdesc.c4
-rw-r--r--src/backend/access/rmgrdesc/xactdesc.c4
-rw-r--r--src/backend/access/rmgrdesc/xlogdesc.c10
-rw-r--r--src/backend/access/spgist/spgdoinsert.c243
-rw-r--r--src/backend/access/spgist/spginsert.c17
-rw-r--r--src/backend/access/spgist/spgvacuum.c72
-rw-r--r--src/backend/access/spgist/spgxlog.c335
-rw-r--r--src/backend/access/transam/README249
-rw-r--r--src/backend/access/transam/clog.c25
-rw-r--r--src/backend/access/transam/multixact.c33
-rw-r--r--src/backend/access/transam/twophase.c105
-rw-r--r--src/backend/access/transam/xact.c129
-rw-r--r--src/backend/access/transam/xlog.c348
-rw-r--r--src/backend/access/transam/xloginsert.c972
-rw-r--r--src/backend/access/transam/xlogreader.c486
-rw-r--r--src/backend/access/transam/xlogutils.c237
-rw-r--r--src/backend/catalog/storage.c31
-rw-r--r--src/backend/commands/dbcommands.c46
-rw-r--r--src/backend/commands/sequence.c77
-rw-r--r--src/backend/commands/tablespace.c31
-rw-r--r--src/backend/replication/logical/decode.c158
-rw-r--r--src/backend/replication/logical/logical.c7
-rw-r--r--src/backend/replication/logical/logicalfuncs.c4
-rw-r--r--src/backend/replication/logical/reorderbuffer.c1
-rw-r--r--src/backend/replication/logical/snapbuild.c2
-rw-r--r--src/backend/replication/walsender.c2
-rw-r--r--src/backend/storage/ipc/standby.c41
-rw-r--r--src/backend/utils/cache/relmapper.c20
-rw-r--r--src/bin/pg_resetxlog/pg_resetxlog.c14
-rw-r--r--src/include/access/brin_xlog.h63
-rw-r--r--src/include/access/clog.h6
-rw-r--r--src/include/access/gin.h6
-rw-r--r--src/include/access/gin_private.h66
-rw-r--r--src/include/access/gist_private.h31
-rw-r--r--src/include/access/hash.h6
-rw-r--r--src/include/access/heapam_xlog.h139
-rw-r--r--src/include/access/htup_details.h1
-rw-r--r--src/include/access/itup.h1
-rw-r--r--src/include/access/multixact.h6
-rw-r--r--src/include/access/nbtree.h94
-rw-r--r--src/include/access/spgist.h6
-rw-r--r--src/include/access/spgist_private.h144
-rw-r--r--src/include/access/xact.h6
-rw-r--r--src/include/access/xlog.h10
-rw-r--r--src/include/access/xlog_internal.h19
-rw-r--r--src/include/access/xloginsert.h70
-rw-r--r--src/include/access/xlogreader.h77
-rw-r--r--src/include/access/xlogrecord.h160
-rw-r--r--src/include/access/xlogutils.h21
-rw-r--r--src/include/catalog/storage_xlog.h6
-rw-r--r--src/include/commands/dbcommands.h6
-rw-r--r--src/include/commands/sequence.h6
-rw-r--r--src/include/commands/tablespace.h6
-rw-r--r--src/include/replication/decode.h2
-rw-r--r--src/include/storage/standby.h6
-rw-r--r--src/include/utils/relmapper.h6
91 files changed, 3866 insertions, 4315 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index bd35cf6696a..cb645e3d459 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -666,19 +666,16 @@ brinbuild(PG_FUNCTION_ARGS)
{
xl_brin_createidx xlrec;
XLogRecPtr recptr;
- XLogRecData rdata;
Page page;
- xlrec.node = index->rd_node;
xlrec.version = BRIN_CURRENT_VERSION;
xlrec.pagesPerRange = BrinGetPagesPerRange(index);
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = SizeOfBrinCreateIdx;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
+ XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, &rdata);
+ recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
page = BufferGetPage(meta);
PageSetLSN(page, recptr);
diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c
index 50f1dec1631..0b6fbeb603c 100644
--- a/src/backend/access/brin/brin_pageops.c
+++ b/src/backend/access/brin/brin_pageops.c
@@ -140,27 +140,19 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
/* XLOG stuff */
if (RelationNeedsWAL(idxrel))
{
- BlockNumber blk = BufferGetBlockNumber(oldbuf);
xl_brin_samepage_update xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
- xlrec.node = idxrel->rd_node;
- ItemPointerSetBlockNumber(&xlrec.tid, blk);
- ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinSamepageUpdate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ xlrec.offnum = oldoff;
- rdata[1].data = (char *) newtup;
- rdata[1].len = newsz;
- rdata[1].buffer = oldbuf;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
- recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+ XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) newtup, newsz);
+
+ recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(oldpage, recptr);
}
@@ -211,43 +203,30 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
{
xl_brin_update xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[4];
uint8 info;
info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
- xlrec.insert.node = idxrel->rd_node;
- ItemPointerSet(&xlrec.insert.tid, BufferGetBlockNumber(newbuf), newoff);
+ xlrec.insert.offnum = newoff;
xlrec.insert.heapBlk = heapBlk;
- xlrec.insert.tuplen = newsz;
- xlrec.insert.revmapBlk = BufferGetBlockNumber(revmapbuf);
xlrec.insert.pagesPerRange = pagesPerRange;
- ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
+ xlrec.oldOffnum = oldoff;
+
+ XLogBeginInsert();
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinUpdate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ /* new page */
+ XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
- rdata[1].data = (char *) newtup;
- rdata[1].len = newsz;
- rdata[1].buffer = extended ? InvalidBuffer : newbuf;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
+ XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+ XLogRegisterBufData(0, (char *) newtup, newsz);
- rdata[2].data = (char *) NULL;
- rdata[2].len = 0;
- rdata[2].buffer = revmapbuf;
- rdata[2].buffer_std = true;
- rdata[2].next = &(rdata[3]);
+ /* revmap page */
+ XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD);
- rdata[3].data = (char *) NULL;
- rdata[3].len = 0;
- rdata[3].buffer = oldbuf;
- rdata[3].buffer_std = true;
- rdata[3].next = NULL;
+ /* old page */
+ XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
- recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+ recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(oldpage, recptr);
PageSetLSN(newpage, recptr);
@@ -354,36 +333,22 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
{
xl_brin_insert xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[3];
uint8 info;
info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
- xlrec.node = idxrel->rd_node;
xlrec.heapBlk = heapBlk;
xlrec.pagesPerRange = pagesPerRange;
- xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
- xlrec.tuplen = itemsz;
- ItemPointerSet(&xlrec.tid, blk, off);
-
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinInsert;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].buffer_std = false;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = (char *) tup;
- rdata[1].len = itemsz;
- rdata[1].buffer = extended ? InvalidBuffer : *buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = (char *) NULL;
- rdata[2].len = 0;
- rdata[2].buffer = revmapbuf;
- rdata[2].buffer_std = false;
- rdata[2].next = NULL;
-
- recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+ xlrec.offnum = off;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
+
+ XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+ XLogRegisterBufData(0, (char *) tup, itemsz);
+
+ XLogRegisterBuffer(1, revmapbuf, 0);
+
+ recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(page, recptr);
PageSetLSN(BufferGetPage(revmapbuf), recptr);
diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c
index 272c74e6b6e..adc7d0b8473 100644
--- a/src/backend/access/brin/brin_revmap.c
+++ b/src/backend/access/brin/brin_revmap.c
@@ -477,23 +477,16 @@ revmap_physical_extend(BrinRevmap *revmap)
{
xl_brin_revmap_extend xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.node = revmap->rm_irel->rd_node;
xlrec.targetBlk = mapBlk;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinRevmapExtend;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].buffer_std = false;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = (char *) NULL;
- rdata[1].len = 0;
- rdata[1].buffer = revmap->rm_metaBuf;
- rdata[1].buffer_std = false;
- rdata[1].next = NULL;
-
- recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, rdata);
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
+ XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
+
+ XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
+
+ recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
PageSetLSN(metapage, recptr);
PageSetLSN(page, recptr);
}
diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c
index 29370689a70..e6a17509753 100644
--- a/src/backend/access/brin/brin_xlog.c
+++ b/src/backend/access/brin/brin_xlog.c
@@ -20,17 +20,15 @@
* xlog replay routines
*/
static void
-brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_createidx(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record);
Buffer buf;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
/* create the index' metapage */
- buf = XLogReadBuffer(xlrec->node, BRIN_METAPAGE_BLKNO, true);
+ buf = XLogInitBufferForRedo(record, 0);
Assert(BufferIsValid(buf));
page = (Page) BufferGetPage(buf);
brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
@@ -44,51 +42,47 @@ brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
* revmap.
*/
static void
-brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
- xl_brin_insert *xlrec, BrinTuple *tuple)
+brin_xlog_insert_update(XLogReaderState *record,
+ xl_brin_insert *xlrec)
{
- BlockNumber blkno;
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
XLogRedoAction action;
- blkno = ItemPointerGetBlockNumber(&xlrec->tid);
-
/*
* If we inserted the first and only tuple on the page, re-initialize the
* page from scratch.
*/
- if (record->xl_info & XLOG_BRIN_INIT_PAGE)
+ if (XLogRecGetInfo(record) & XLOG_BRIN_INIT_PAGE)
{
- /*
- * No full-page image here. Don't try to read it, because there
- * might be one for the revmap buffer, below.
- */
- buffer = XLogReadBuffer(xlrec->node, blkno, true);
+ buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
brin_page_init(page, BRIN_PAGETYPE_REGULAR);
action = BLK_NEEDS_REDO;
}
else
{
- action = XLogReadBufferForRedo(lsn, record, 0,
- xlrec->node, blkno, &buffer);
+ action = XLogReadBufferForRedo(record, 0, &buffer);
}
/* insert the index item into the page */
if (action == BLK_NEEDS_REDO)
{
OffsetNumber offnum;
+ BrinTuple *tuple;
+ Size tuplen;
+
+ tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
Assert(tuple->bt_blkno == xlrec->heapBlk);
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_insert_update: invalid max offset number");
- offnum = PageAddItem(page, (Item) tuple, xlrec->tuplen, offnum, true,
- false);
+ offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "brin_xlog_insert_update: failed to add tuple");
@@ -99,16 +93,17 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
UnlockReleaseBuffer(buffer);
/* update the revmap */
- action = XLogReadBufferForRedo(lsn, record,
- record->xl_info & XLOG_BRIN_INIT_PAGE ? 0 : 1,
- xlrec->node,
- xlrec->revmapBlk, &buffer);
+ action = XLogReadBufferForRedo(record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
+ ItemPointerData tid;
+ BlockNumber blkno = BufferGetBlockNumber(buffer);
+
+ ItemPointerSet(&tid, blkno, xlrec->offnum);
page = (Page) BufferGetPage(buffer);
brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk,
- xlrec->tid);
+ tid);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
@@ -122,34 +117,26 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
* replay a BRIN index insertion
*/
static void
-brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_insert(XLogReaderState *record)
{
xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record);
- BrinTuple *newtup;
- newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinInsert);
-
- brin_xlog_insert_update(lsn, record, xlrec, newtup);
+ brin_xlog_insert_update(record, xlrec);
}
/*
* replay a BRIN index update
*/
static void
-brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_update(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
- BlockNumber blkno;
Buffer buffer;
- BrinTuple *newtup;
XLogRedoAction action;
- newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinUpdate);
-
/* First remove the old tuple */
- blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid));
- action = XLogReadBufferForRedo(lsn, record, 2, xlrec->insert.node,
- blkno, &buffer);
+ action = XLogReadBufferForRedo(record, 2, &buffer);
if (action == BLK_NEEDS_REDO)
{
Page page;
@@ -157,7 +144,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid));
+ offnum = xlrec->oldOffnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_update: invalid max offset number");
@@ -168,7 +155,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
}
/* Then insert the new tuple and update revmap, like in an insertion. */
- brin_xlog_insert_update(lsn, record, &xlrec->insert, newtup);
+ brin_xlog_insert_update(record, &xlrec->insert);
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
@@ -178,30 +165,27 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
* Update a tuple on a single page.
*/
static void
-brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_samepage_update(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_brin_samepage_update *xlrec;
- BlockNumber blkno;
Buffer buffer;
XLogRedoAction action;
xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
- blkno = ItemPointerGetBlockNumber(&(xlrec->tid));
- action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, blkno,
- &buffer);
+ action = XLogReadBufferForRedo(record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
- int tuplen;
+ Size tuplen;
BrinTuple *mmtuple;
Page page;
OffsetNumber offnum;
- tuplen = record->xl_len - SizeOfBrinSamepageUpdate;
- mmtuple = (BrinTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate);
+ mmtuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_samepage_update: invalid max offset number");
@@ -223,18 +207,23 @@ brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
* Replay a revmap page extension
*/
static void
-brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_revmap_extend(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_brin_revmap_extend *xlrec;
Buffer metabuf;
Buffer buf;
Page page;
+ BlockNumber targetBlk;
XLogRedoAction action;
xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
+
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
+ Assert(xlrec->targetBlk == targetBlk);
+
/* Update the metapage */
- action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node,
- BRIN_METAPAGE_BLKNO, &metabuf);
+ action = XLogReadBufferForRedo(record, 0, &metabuf);
if (action == BLK_NEEDS_REDO)
{
Page metapg;
@@ -255,7 +244,7 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
* image here.
*/
- buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
+ buf = XLogInitBufferForRedo(record, 1);
page = (Page) BufferGetPage(buf);
brin_page_init(page, BRIN_PAGETYPE_REVMAP);
@@ -268,26 +257,26 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
}
void
-brin_redo(XLogRecPtr lsn, XLogRecord *record)
+brin_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info & XLOG_BRIN_OPMASK)
{
case XLOG_BRIN_CREATE_INDEX:
- brin_xlog_createidx(lsn, record);
+ brin_xlog_createidx(record);
break;
case XLOG_BRIN_INSERT:
- brin_xlog_insert(lsn, record);
+ brin_xlog_insert(record);
break;
case XLOG_BRIN_UPDATE:
- brin_xlog_update(lsn, record);
+ brin_xlog_update(record);
break;
case XLOG_BRIN_SAMEPAGE_UPDATE:
- brin_xlog_samepage_update(lsn, record);
+ brin_xlog_samepage_update(record);
break;
case XLOG_BRIN_REVMAP_EXTEND:
- brin_xlog_revmap_extend(lsn, record);
+ brin_xlog_revmap_extend(record);
break;
default:
elog(PANIC, "brin_redo: unknown op code %u", info);
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 5365477000a..99f40a871f0 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -326,7 +326,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
Buffer childbuf, GinStatsData *buildStats)
{
Page page = BufferGetPage(stack->buffer);
- XLogRecData *payloadrdata;
GinPlaceToPageRC rc;
uint16 xlflags = 0;
Page childpage = NULL;
@@ -351,12 +350,36 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/*
* Try to put the incoming tuple on the page. placeToPage will decide if
* the page needs to be split.
+ *
+ * WAL-logging this operation is a bit funny:
+ *
+ * We're responsible for calling XLogBeginInsert() and XLogInsert().
+ * XLogBeginInsert() must be called before placeToPage, because
+ * placeToPage can register some data to the WAL record.
+ *
+ * If placeToPage returns INSERTED, placeToPage has already called
+ * START_CRIT_SECTION(), and we're responsible for calling
+ * END_CRIT_SECTION. When it returns INSERTED, it is also responsible for
+ * registering any data required to replay the operation with
+ * XLogRegisterData(0, ...). It may only add data to block index 0; the
+ * main data of the WAL record is reserved for this function.
+ *
+ * If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
+ * Splits happen infrequently, so we just make a full-page image of all
+ * the pages involved.
*/
+
+ if (RelationNeedsWAL(btree->index))
+ XLogBeginInsert();
+
rc = btree->placeToPage(btree, stack->buffer, stack,
insertdata, updateblkno,
- &payloadrdata, &newlpage, &newrpage);
+ &newlpage, &newrpage);
if (rc == UNMODIFIED)
+ {
+ XLogResetInsertion();
return true;
+ }
else if (rc == INSERTED)
{
/* placeToPage did START_CRIT_SECTION() */
@@ -372,17 +395,18 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
if (RelationNeedsWAL(btree->index))
{
XLogRecPtr recptr;
- XLogRecData rdata[3];
ginxlogInsert xlrec;
BlockIdData childblknos[2];
- xlrec.node = btree->index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(stack->buffer);
+ /*
+ * placetopage already registered stack->buffer as block 0.
+ */
xlrec.flags = xlflags;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(ginxlogInsert);
+ if (childbuf != InvalidBuffer)
+ XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
+
+ XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
/*
* Log information about child if this was an insertion of a
@@ -390,26 +414,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
*/
if (childbuf != InvalidBuffer)
{
- rdata[0].next = &rdata[1];
-
BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) childblknos;
- rdata[1].len = sizeof(BlockIdData) * 2;
- rdata[1].next = &rdata[2];
-
- rdata[2].buffer = childbuf;
- rdata[2].buffer_std = false;
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].next = payloadrdata;
+ XLogRegisterData((char *) childblknos,
+ sizeof(BlockIdData) * 2);
}
- else
- rdata[0].next = payloadrdata;
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
PageSetLSN(page, recptr);
if (childbuf != InvalidBuffer)
PageSetLSN(childpage, recptr);
@@ -421,10 +432,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
else if (rc == SPLIT)
{
- /* Didn't fit, have to split */
+ /* Didn't fit, had to split */
Buffer rbuffer;
BlockNumber savedRightLink;
- XLogRecData rdata[2];
ginxlogSplit data;
Buffer lbuffer = InvalidBuffer;
Page newrootpg = NULL;
@@ -448,7 +458,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
*/
data.node = btree->index->rd_node;
- data.rblkno = BufferGetBlockNumber(rbuffer);
data.flags = xlflags;
if (childbuf != InvalidBuffer)
{
@@ -462,23 +471,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
else
data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplit);
-
- if (childbuf != InvalidBuffer)
- {
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = childbuf;
- rdata[1].buffer_std = false;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = payloadrdata;
- }
- else
- rdata[0].next = payloadrdata;
-
if (stack->parent == NULL)
{
/*
@@ -496,12 +488,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
buildStats->nEntryPages++;
}
- /*
- * root never has a right-link, so we borrow the rrlink field to
- * store the root block number.
- */
- data.rrlink = BufferGetBlockNumber(stack->buffer);
- data.lblkno = BufferGetBlockNumber(lbuffer);
+ data.rrlink = InvalidBlockNumber;
data.flags |= GIN_SPLIT_ROOT;
GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
@@ -524,7 +511,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
{
/* split non-root page */
data.rrlink = savedRightLink;
- data.lblkno = BufferGetBlockNumber(stack->buffer);
GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
@@ -572,7 +558,28 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
+ /*
+ * We just take full page images of all the split pages. Splits
+ * are uncommon enough that it's not worth complicating the code
+ * to be more efficient.
+ */
+ if (stack->parent == NULL)
+ {
+ XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ }
+ else
+ {
+ XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ }
+ if (BufferIsValid(childbuf))
+ XLogRegisterBuffer(3, childbuf, 0);
+
+ XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
PageSetLSN(BufferGetPage(stack->buffer), recptr);
PageSetLSN(BufferGetPage(rbuffer), recptr);
if (stack->parent == NULL)
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 97cd706c08e..012225eaa35 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -98,20 +98,19 @@ static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage);
+ Page *newlpage, Page *newrpage);
static disassembledLeaf *disassembleLeaf(Page page);
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
int nNewItems);
-static XLogRecData *constructLeafRecompressWALData(Buffer buf,
- disassembledLeaf *leaf);
+static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf);
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
static void dataPlaceToPageLeafSplit(Buffer buf,
disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage);
+ Page lpage, Page rpage);
/*
* Read TIDs from leaf data page to single uncompressed array. The TIDs are
@@ -428,8 +427,7 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
*/
static GinPlaceToPageRC
dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+ void *insertdata, Page *newlpage, Page *newrpage)
{
GinBtreeDataLeafInsertData *items = insertdata;
ItemPointer newItems = &items->items[items->curitem];
@@ -602,9 +600,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
*/
MemoryContextSwitchTo(oldCxt);
if (RelationNeedsWAL(btree->index))
- *prdata = constructLeafRecompressWALData(buf, leaf);
- else
- *prdata = NULL;
+ registerLeafRecompressWALData(buf, leaf);
START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buf, leaf);
@@ -685,7 +681,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
*newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
- prdata, *newlpage, *newrpage);
+ *newlpage, *newrpage);
Assert(GinPageRightMost(page) ||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
@@ -791,7 +787,6 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
*/
if (removedsomething)
{
- XLogRecData *payloadrdata = NULL;
bool modified;
/*
@@ -818,7 +813,10 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
}
if (RelationNeedsWAL(indexrel))
- payloadrdata = constructLeafRecompressWALData(buffer, leaf);
+ {
+ XLogBeginInsert();
+ registerLeafRecompressWALData(buffer, leaf);
+ }
START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buffer, leaf);
@@ -827,18 +825,8 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
if (RelationNeedsWAL(indexrel))
{
XLogRecPtr recptr;
- XLogRecData rdata;
- ginxlogVacuumDataLeafPage xlrec;
- xlrec.node = indexrel->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
-
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
- rdata.next = payloadrdata;
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
PageSetLSN(page, recptr);
}
@@ -850,13 +838,12 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
* Construct a ginxlogRecompressDataLeaf record representing the changes
* in *leaf.
*/
-static XLogRecData *
-constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
+static void
+registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
{
int nmodified = 0;
char *walbufbegin;
char *walbufend;
- XLogRecData *rdata;
dlist_iter iter;
int segno;
ginxlogRecompressDataLeaf *recompress_xlog;
@@ -871,12 +858,11 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
nmodified++;
}
- walbufbegin = palloc(
- sizeof(ginxlogRecompressDataLeaf) +
- BLCKSZ + /* max size needed to hold the segment
- * data */
- nmodified * 2 + /* (segno + action) per action */
- sizeof(XLogRecData));
+ walbufbegin =
+ palloc(sizeof(ginxlogRecompressDataLeaf) +
+ BLCKSZ + /* max size needed to hold the segment data */
+ nmodified * 2 /* (segno + action) per action */
+ );
walbufend = walbufbegin;
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
@@ -944,14 +930,10 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
segno++;
}
- rdata = (XLogRecData *) MAXALIGN(walbufend);
- rdata->buffer = buf;
- rdata->buffer_std = TRUE;
- rdata->data = walbufbegin;
- rdata->len = walbufend - walbufbegin;
- rdata->next = NULL;
- return rdata;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin);
+
}
/*
@@ -1024,7 +1006,7 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
static void
dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage)
+ Page lpage, Page rpage)
{
char *ptr;
int segsize;
@@ -1034,10 +1016,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
dlist_node *firstright;
leafSegmentInfo *seginfo;
- /* these must be static so they can be returned to caller */
- static ginxlogSplitDataLeaf split_xlog;
- static XLogRecData rdata[3];
-
/* Initialize temporary pages to hold the new left and right pages */
GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
@@ -1092,29 +1070,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
Assert(rsize == leaf->rsize);
GinDataPageSetDataSize(rpage, rsize);
*GinDataPageGetRightBound(rpage) = rbound;
-
- /* Create WAL record */
- split_xlog.lsize = lsize;
- split_xlog.rsize = rsize;
- split_xlog.lrightbound = lbound;
- split_xlog.rrightbound = rbound;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &split_xlog;
- rdata[0].len = sizeof(ginxlogSplitDataLeaf);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) GinDataLeafPageGetPostingList(lpage);
- rdata[1].len = lsize;
- rdata[1].next = &rdata[2];
-
- rdata[2].buffer = InvalidBuffer;
- rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
- rdata[2].len = rsize;
- rdata[2].next = NULL;
-
- *prdata = rdata;
}
/*
@@ -1124,29 +1079,30 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
*
* In addition to inserting the given item, the downlink of the existing item
* at 'off' is updated to point to 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
*/
static GinPlaceToPageRC
dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
PostingItem *pitem;
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata;
+ /* this must be static so it can be returned to caller */
static ginxlogInsertDataInternal data;
/* split if we have to */
if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
{
dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
return SPLIT;
}
- *prdata = &rdata;
Assert(GinPageIsData(page));
START_CRIT_SECTION();
@@ -1159,14 +1115,15 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- data.offset = off;
- data.newitem = *pitem;
+ if (RelationNeedsWAL(btree->index))
+ {
+ data.offset = off;
+ data.newitem = *pitem;
- rdata.buffer = buf;
- rdata.buffer_std = TRUE;
- rdata.data = (char *) &data;
- rdata.len = sizeof(ginxlogInsertDataInternal);
- rdata.next = NULL;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) &data,
+ sizeof(ginxlogInsertDataInternal));
+ }
return INSERTED;
}
@@ -1178,7 +1135,6 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
static GinPlaceToPageRC
dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata,
Page *newlpage, Page *newrpage)
{
Page page = BufferGetPage(buf);
@@ -1187,11 +1143,11 @@ dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
if (GinPageIsLeaf(page))
return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
else
return dataPlaceToPageInternal(btree, buf, stack,
insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
}
/*
@@ -1202,7 +1158,7 @@ static void
dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
Page oldpage = BufferGetPage(origbuf);
OffsetNumber off = stack->off;
@@ -1215,19 +1171,13 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
Page lpage;
Page rpage;
OffsetNumber separator;
-
- /* these must be static so they can be returned to caller */
- static ginxlogSplitDataInternal data;
- static XLogRecData rdata[4];
- static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
+ PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
lpage = PageGetTempPage(oldpage);
rpage = PageGetTempPage(oldpage);
GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
- *prdata = rdata;
-
/*
* First construct a new list of PostingItems, which includes all the old
* items, and the new item.
@@ -1277,20 +1227,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
/* set up right bound for right page */
*GinDataPageGetRightBound(rpage) = oldbound;
- data.separator = separator;
- data.nitem = nitems;
- data.rightbound = oldbound;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplitDataInternal);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) allitems;
- rdata[1].len = nitems * sizeof(PostingItem);
- rdata[1].next = NULL;
-
*newlpage = lpage;
*newrpage = rpage;
}
@@ -1797,24 +1733,18 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata[2];
ginxlogCreatePostingTree data;
- data.node = index->rd_node;
- data.blkno = blkno;
data.size = rootsize;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogCreatePostingTree);
- rdata[0].next = &rdata[1];
+ XLogBeginInsert();
+ XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) GinDataLeafPageGetPostingList(page);
- rdata[1].len = rootsize;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
+ rootsize);
+ XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
PageSetLSN(page, recptr);
}
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 84dc1e228c1..2dae7b95499 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -22,7 +22,7 @@
static void entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
+ BlockNumber updateblkno,
Page *newlpage, Page *newrpage);
/*
@@ -515,33 +515,33 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
* On insertion to an internal node, in addition to inserting the given item,
* the downlink of the existing item at 'off' is updated to point to
* 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
*/
static GinPlaceToPageRC
entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertPayload, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
GinBtreeEntryInsertData *insertData = insertPayload;
Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
OffsetNumber placed;
- int cnt = 0;
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata[3];
+ /* this must be static so it can be returned to caller. */
static ginxlogInsertEntry data;
/* quick exit if it doesn't fit */
if (!entryIsEnoughSpace(btree, buf, off, insertData))
{
entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
return SPLIT;
}
START_CRIT_SECTION();
- *prdata = rdata;
entryPreparePage(btree, page, off, insertData, updateblkno);
placed = PageAddItem(page,
@@ -552,21 +552,17 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- data.isDelete = insertData->isDelete;
- data.offset = off;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) &data;
- rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
- rdata[cnt].next = &rdata[cnt + 1];
- cnt++;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) insertData->entry;
- rdata[cnt].len = IndexTupleSize(insertData->entry);
- rdata[cnt].next = NULL;
+ if (RelationNeedsWAL(btree->index))
+ {
+ data.isDelete = insertData->isDelete;
+ data.offset = off;
+
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) &data,
+ offsetof(ginxlogInsertEntry, tuple));
+ XLogRegisterBufData(0, (char *) insertData->entry,
+ IndexTupleSize(insertData->entry));
+ }
return INSERTED;
}
@@ -581,7 +577,7 @@ static void
entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
+ BlockNumber updateblkno,
Page *newlpage, Page *newrpage)
{
GinBtreeEntryInsertData *insertData = insertPayload;
@@ -590,7 +586,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
maxoff,
separator = InvalidOffsetNumber;
Size totalsize = 0;
- Size tupstoresize;
Size lsize = 0,
size;
char *ptr;
@@ -599,13 +594,8 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
Size pageSize = PageGetPageSize(lpage);
+ char tupstore[2 * BLCKSZ];
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata[2];
- static ginxlogSplitEntry data;
- static char tupstore[2 * BLCKSZ];
-
- *prdata = rdata;
entryPreparePage(btree, lpage, off, insertData, updateblkno);
/*
@@ -638,7 +628,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
ptr += size;
totalsize += size + sizeof(ItemIdData);
}
- tupstoresize = ptr - tupstore;
/*
* Initialize the left and right pages, and copy all the tuples back to
@@ -673,19 +662,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
ptr += MAXALIGN(IndexTupleSize(itup));
}
- data.separator = separator;
- data.nitem = maxoff;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplitEntry);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = tupstore;
- rdata[1].len = tupstoresize;
- rdata[1].next = NULL;
-
*newlpage = lpage;
*newrpage = rpage;
}
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index 25746995b5e..fd81d675570 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -108,26 +108,19 @@ writeListPage(Relation index, Buffer buffer,
if (RelationNeedsWAL(index))
{
- XLogRecData rdata[2];
ginxlogInsertListPage data;
XLogRecPtr recptr;
- data.node = index->rd_node;
- data.blkno = BufferGetBlockNumber(buffer);
data.rightlink = rightlink;
data.ntuples = ntuples;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogInsertListPage);
- rdata[0].next = rdata + 1;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = workspace;
- rdata[1].len = size;
- rdata[1].next = NULL;
+ XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
+ XLogRegisterBufData(0, workspace, size);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
PageSetLSN(page, recptr);
}
@@ -224,26 +217,23 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
Buffer metabuffer;
Page metapage;
GinMetaPageData *metadata = NULL;
- XLogRecData rdata[2];
Buffer buffer = InvalidBuffer;
Page page = NULL;
ginxlogUpdateMeta data;
bool separateList = false;
bool needCleanup = false;
int cleanupSize;
+ bool needWal;
if (collector->ntuples == 0)
return;
+ needWal = RelationNeedsWAL(index);
+
data.node = index->rd_node;
data.ntuples = 0;
data.newRightlink = data.prevTail = InvalidBlockNumber;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogUpdateMeta);
- rdata[0].next = NULL;
-
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
@@ -283,6 +273,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
memset(&sublist, 0, sizeof(GinMetaPageData));
makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+ if (needWal)
+ XLogBeginInsert();
+
/*
* metapage was unlocked, see above
*/
@@ -315,14 +308,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
LockBuffer(buffer, GIN_EXCLUSIVE);
page = BufferGetPage(buffer);
- rdata[0].next = rdata + 1;
-
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = NULL;
-
Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
START_CRIT_SECTION();
@@ -336,6 +321,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
metadata->nPendingPages += sublist.nPendingPages;
metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+ if (needWal)
+ XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
}
}
else
@@ -348,6 +336,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
int i,
tupsize;
char *ptr;
+ char *collectordata;
buffer = ReadBuffer(index, metadata->tail);
LockBuffer(buffer, GIN_EXCLUSIVE);
@@ -356,16 +345,13 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page));
- rdata[0].next = rdata + 1;
-
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- ptr = rdata[1].data = (char *) palloc(collector->sumsize);
- rdata[1].len = collector->sumsize;
- rdata[1].next = NULL;
+ collectordata = ptr = (char *) palloc(collector->sumsize);
data.ntuples = collector->ntuples;
+ if (needWal)
+ XLogBeginInsert();
+
START_CRIT_SECTION();
/*
@@ -390,7 +376,12 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
off++;
}
- Assert((ptr - rdata[1].data) <= collector->sumsize);
+ Assert((ptr - collectordata) <= collector->sumsize);
+ if (needWal)
+ {
+ XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(1, collectordata, collector->sumsize);
+ }
metadata->tailFreeSize = PageGetExactFreeSpace(page);
@@ -402,13 +393,16 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
*/
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (needWal)
{
XLogRecPtr recptr;
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+ XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
PageSetLSN(metapage, recptr);
if (buffer != InvalidBuffer)
@@ -526,20 +520,11 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
int i;
int64 nDeletedHeapTuples = 0;
ginxlogDeleteListPages data;
- XLogRecData rdata[1];
Buffer buffers[GIN_NDELETE_AT_ONCE];
- data.node = index->rd_node;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogDeleteListPages);
- rdata[0].next = NULL;
-
data.ndeleted = 0;
while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
{
- data.toDelete[data.ndeleted] = blknoToDelete;
buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
page = BufferGetPage(buffers[data.ndeleted]);
@@ -562,6 +547,13 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
if (stats)
stats->pages_deleted += data.ndeleted;
+ /*
+ * This operation touches an unusually large number of pages, so
+ * prepare the XLogInsert machinery for that before entering the
+ * critical section.
+ */
+ XLogEnsureRecordSpace(data.ndeleted, 0);
+
START_CRIT_SECTION();
metadata->head = blknoToDelete;
@@ -592,9 +584,17 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
{
XLogRecPtr recptr;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+ for (i = 0; i < data.ndeleted; i++)
+ XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
+
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+ XLogRegisterData((char *) &data,
+ sizeof(ginxlogDeleteListPages));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
PageSetLSN(metapage, recptr);
for (i = 0; i < data.ndeleted; i++)
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 370884ed17f..c1ad0fd8c4d 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -347,15 +347,13 @@ ginbuild(PG_FUNCTION_ARGS)
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata;
Page page;
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &(index->rd_node);
- rdata.len = sizeof(RelFileNode);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index d0458cfd0cf..f593a7224f2 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -605,19 +605,17 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
- XLogRecData rdata;
data.node = index->rd_node;
data.ntuples = 0;
data.newRightlink = data.prevTail = InvalidBlockNumber;
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &data;
- rdata.len = sizeof(ginxlogUpdateMeta);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
PageSetLSN(metapage, recptr);
}
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 3a61321a835..6f32600ed79 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -89,10 +89,6 @@ xlogVacuumPage(Relation index, Buffer buffer)
{
Page page = BufferGetPage(buffer);
XLogRecPtr recptr;
- XLogRecData rdata[3];
- ginxlogVacuumPage xlrec;
- uint16 lower;
- uint16 upper;
/* This is only used for entry tree leaf pages. */
Assert(!GinPageIsData(page));
@@ -101,57 +97,14 @@ xlogVacuumPage(Relation index, Buffer buffer)
if (!RelationNeedsWAL(index))
return;
- xlrec.node = index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
-
- /* Assume we can omit data between pd_lower and pd_upper */
- lower = ((PageHeader) page)->pd_lower;
- upper = ((PageHeader) page)->pd_upper;
-
- Assert(lower < BLCKSZ);
- Assert(upper < BLCKSZ);
-
- if (lower >= SizeOfPageHeaderData &&
- upper > lower &&
- upper <= BLCKSZ)
- {
- xlrec.hole_offset = lower;
- xlrec.hole_length = upper - lower;
- }
- else
- {
- /* No "hole" to compress out */
- xlrec.hole_offset = 0;
- xlrec.hole_length = 0;
- }
-
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(ginxlogVacuumPage);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
-
- if (xlrec.hole_length == 0)
- {
- rdata[1].data = (char *) page;
- rdata[1].len = BLCKSZ;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
- }
- else
- {
- /* must skip the hole */
- rdata[1].data = (char *) page;
- rdata[1].len = xlrec.hole_offset;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &rdata[2];
-
- rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
- rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
- rdata[2].buffer = InvalidBuffer;
- rdata[2].next = NULL;
- }
+ /*
+ * Always create a full image, we don't track the changes on the page at
+ * any more fine-grained level. This could obviously be improved...
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
PageSetLSN(page, recptr);
}
@@ -292,48 +245,27 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
if (RelationNeedsWAL(gvs->index))
{
XLogRecPtr recptr;
- XLogRecData rdata[4];
ginxlogDeletePage data;
- data.node = gvs->index->rd_node;
- data.blkno = deleteBlkno;
- data.parentBlkno = parentBlkno;
+ /*
+ * We can't pass REGBUF_STANDARD for the deleted page, because we
+ * didn't set pd_lower on pre-9.4 versions. The page might've been
+ * binary-upgraded from an older version, and hence not have pd_lower
+ * set correctly. Ditto for the left page, but removing the item from
+ * the parent updated its pd_lower, so we know that's OK at this
+ * point.
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, dBuffer, 0);
+ XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(2, lBuffer, 0);
+
data.parentOffset = myoff;
- data.leftBlkno = leftBlkno;
data.rightLink = GinPageGetOpaque(page)->rightlink;
- /*
- * We can't pass buffer_std = TRUE, because we didn't set pd_lower on
- * pre-9.4 versions. The page might've been binary-upgraded from an
- * older version, and hence not have pd_lower set correctly. Ditto for
- * the left page, but removing the item from the parent updated its
- * pd_lower, so we know that's OK at this point.
- */
- rdata[0].buffer = dBuffer;
- rdata[0].buffer_std = FALSE;
- rdata[0].data = NULL;
- rdata[0].len = 0;
- rdata[0].next = rdata + 1;
-
- rdata[1].buffer = pBuffer;
- rdata[1].buffer_std = TRUE;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = rdata + 2;
-
- rdata[2].buffer = lBuffer;
- rdata[2].buffer_std = FALSE;
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].next = rdata + 3;
-
- rdata[3].buffer = InvalidBuffer;
- rdata[3].buffer_std = FALSE;
- rdata[3].len = sizeof(ginxlogDeletePage);
- rdata[3].data = (char *) &data;
- rdata[3].next = NULL;
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
+ XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
PageSetLSN(page, recptr);
PageSetLSN(parentPage, recptr);
PageSetLSN(BufferGetPage(lBuffer), recptr);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index d0553bb8f72..6c0042bd795 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -20,18 +20,15 @@
static MemoryContext opCtx; /* working memory for operations */
static void
-ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
- int block_index,
- RelFileNode node, BlockNumber blkno)
+ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
{
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
- if (XLogReadBufferForRedo(lsn, record, block_index, node, blkno, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
-
GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
PageSetLSN(page, lsn);
@@ -42,18 +39,15 @@ ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
}
static void
-ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
+ginRedoCreateIndex(XLogReaderState *record)
{
- RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer RootBuffer,
MetaBuffer;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
- Assert(BufferIsValid(MetaBuffer));
+ MetaBuffer = XLogInitBufferForRedo(record, 0);
+ Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
page = (Page) BufferGetPage(MetaBuffer);
GinInitMetabuffer(MetaBuffer);
@@ -61,8 +55,8 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
MarkBufferDirty(MetaBuffer);
- RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
- Assert(BufferIsValid(RootBuffer));
+ RootBuffer = XLogInitBufferForRedo(record, 1);
+ Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
page = (Page) BufferGetPage(RootBuffer);
GinInitBuffer(RootBuffer, GIN_LEAF);
@@ -75,18 +69,15 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
}
static void
-ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
+ginRedoCreatePTree(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
char *ptr;
Buffer buffer;
Page page;
- /* Backup blocks are not used in create_ptree records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- buffer = XLogReadBuffer(data->node, data->blkno, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
@@ -328,35 +319,40 @@ ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdat
}
static void
-ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
+ginRedoInsert(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
Buffer buffer;
- char *payload;
+#ifdef NOT_USED
BlockNumber leftChildBlkno = InvalidBlockNumber;
+#endif
BlockNumber rightChildBlkno = InvalidBlockNumber;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
- payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
-
/*
* First clear incomplete-split flag on child page if this finishes a
* split.
*/
if (!isLeaf)
{
+ char *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
+
+#ifdef NOT_USED
leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
+#endif
payload += sizeof(BlockIdData);
rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
payload += sizeof(BlockIdData);
- ginRedoClearIncompleteSplit(lsn, record, 0, data->node, leftChildBlkno);
+ ginRedoClearIncompleteSplit(record, 1);
}
- if (XLogReadBufferForRedo(lsn, record, isLeaf ? 0 : 1, data->node,
- data->blkno, &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
+ Size len;
+ char *payload = XLogRecGetBlockData(record, 0, &len);
/* How to insert the payload is tree-type specific */
if (data->flags & GIN_INSERT_ISDATA)
@@ -378,161 +374,33 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
}
static void
-ginRedoSplitEntry(Page lpage, Page rpage, void *rdata)
-{
- ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
- IndexTuple itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
- OffsetNumber i;
-
- for (i = 0; i < data->separator; i++)
- {
- if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
- elog(ERROR, "failed to add item to gin index page");
- itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
- }
-
- for (i = data->separator; i < data->nitem; i++)
- {
- if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
- elog(ERROR, "failed to add item to gin index page");
- itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
- }
-}
-
-static void
-ginRedoSplitData(Page lpage, Page rpage, void *rdata)
-{
- bool isleaf = GinPageIsLeaf(lpage);
-
- if (isleaf)
- {
- ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata;
- Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
- Pointer rptr = lptr + data->lsize;
-
- Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
- Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
-
- memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
- memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
-
- GinDataPageSetDataSize(lpage, data->lsize);
- GinDataPageSetDataSize(rpage, data->rsize);
- *GinDataPageGetRightBound(lpage) = data->lrightbound;
- *GinDataPageGetRightBound(rpage) = data->rrightbound;
- }
- else
- {
- ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata;
- PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal));
- OffsetNumber i;
- OffsetNumber maxoff;
-
- for (i = 0; i < data->separator; i++)
- GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber);
- for (i = data->separator; i < data->nitem; i++)
- GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber);
-
- /* set up right key */
- maxoff = GinPageGetOpaque(lpage)->maxoff;
- *GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key;
- *GinDataPageGetRightBound(rpage) = data->rightbound;
- }
-}
-
-static void
-ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
+ginRedoSplit(XLogReaderState *record)
{
ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
Buffer lbuffer,
- rbuffer;
- Page lpage,
- rpage;
- uint32 flags;
- uint32 lflags,
- rflags;
- char *payload;
+ rbuffer,
+ rootbuf;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
- bool isData = (data->flags & GIN_INSERT_ISDATA) != 0;
bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
- payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
-
/*
* First clear incomplete-split flag on child page if this finishes a
* split
*/
if (!isLeaf)
- ginRedoClearIncompleteSplit(lsn, record, 0, data->node, data->leftChildBlkno);
-
- flags = 0;
- if (isLeaf)
- flags |= GIN_LEAF;
- if (isData)
- flags |= GIN_DATA;
- if (isLeaf && isData)
- flags |= GIN_COMPRESSED;
-
- lflags = rflags = flags;
- if (!isRoot)
- lflags |= GIN_INCOMPLETE_SPLIT;
-
- lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
- Assert(BufferIsValid(lbuffer));
- lpage = (Page) BufferGetPage(lbuffer);
- GinInitBuffer(lbuffer, lflags);
-
- rbuffer = XLogReadBuffer(data->node, data->rblkno, true);
- Assert(BufferIsValid(rbuffer));
- rpage = (Page) BufferGetPage(rbuffer);
- GinInitBuffer(rbuffer, rflags);
-
- GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
- GinPageGetOpaque(rpage)->rightlink = isRoot ? InvalidBlockNumber : data->rrlink;
-
- /* Do the tree-type specific portion to restore the page contents */
- if (isData)
- ginRedoSplitData(lpage, rpage, payload);
- else
- ginRedoSplitEntry(lpage, rpage, payload);
+ ginRedoClearIncompleteSplit(record, 3);
- PageSetLSN(rpage, lsn);
- MarkBufferDirty(rbuffer);
+ if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
+ elog(ERROR, "GIN split record did not contain a full-page image of left page");
- PageSetLSN(lpage, lsn);
- MarkBufferDirty(lbuffer);
+ if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED)
+ elog(ERROR, "GIN split record did not contain a full-page image of right page");
if (isRoot)
{
- BlockNumber rootBlkno = data->rrlink;
- Buffer rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
- Page rootPage = BufferGetPage(rootBuf);
-
- GinInitBuffer(rootBuf, flags & ~GIN_LEAF & ~GIN_COMPRESSED);
-
- if (isData)
- {
- Assert(rootBlkno != GIN_ROOT_BLKNO);
- ginDataFillRoot(NULL, BufferGetPage(rootBuf),
- BufferGetBlockNumber(lbuffer),
- BufferGetPage(lbuffer),
- BufferGetBlockNumber(rbuffer),
- BufferGetPage(rbuffer));
- }
- else
- {
- Assert(rootBlkno == GIN_ROOT_BLKNO);
- ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
- BufferGetBlockNumber(lbuffer),
- BufferGetPage(lbuffer),
- BufferGetBlockNumber(rbuffer),
- BufferGetPage(rbuffer));
- }
-
- PageSetLSN(rootPage, lsn);
-
- MarkBufferDirty(rootBuf);
- UnlockReleaseBuffer(rootBuf);
+ if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED)
+ elog(ERROR, "GIN split record did not contain a full-page image of root page");
+ UnlockReleaseBuffer(rootbuf);
}
UnlockReleaseBuffer(rbuffer);
@@ -544,54 +412,30 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
* a XLOG_FPI record.
*/
static void
-ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoVacuumPage(XLogReaderState *record)
{
- ginxlogVacuumPage *xlrec = (ginxlogVacuumPage *) XLogRecGetData(record);
- char *blk = ((char *) xlrec) + sizeof(ginxlogVacuumPage);
Buffer buffer;
- Page page;
-
- Assert(xlrec->hole_offset < BLCKSZ);
- Assert(xlrec->hole_length < BLCKSZ);
-
- /* Backup blocks are not used, we'll re-initialize the page always. */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
- buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, true);
- if (!BufferIsValid(buffer))
- return;
- page = (Page) BufferGetPage(buffer);
-
- if (xlrec->hole_length == 0)
+ if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
{
- memcpy((char *) page, blk, BLCKSZ);
+ elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
}
- else
- {
- memcpy((char *) page, blk, xlrec->hole_offset);
- /* must zero-fill the hole */
- MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
- memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
- blk + xlrec->hole_offset,
- BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
- }
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
static void
-ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoVacuumDataLeafPage(XLogReaderState *record)
{
- ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record);
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
+ Size len;
+ ginxlogVacuumDataLeafPage *xlrec;
+
+ xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
Assert(GinPageIsLeaf(page));
Assert(GinPageIsData(page));
@@ -605,30 +449,27 @@ ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
}
static void
-ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoDeletePage(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
Buffer dbuffer;
Buffer pbuffer;
Buffer lbuffer;
Page page;
- if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->blkno, &dbuffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(dbuffer);
-
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->flags = GIN_DELETED;
PageSetLSN(page, lsn);
MarkBufferDirty(dbuffer);
}
- if (XLogReadBufferForRedo(lsn, record, 1, data->node, data->parentBlkno,
- &pbuffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(pbuffer);
-
Assert(GinPageIsData(page));
Assert(!GinPageIsLeaf(page));
GinPageDeletePostingItem(page, data->parentOffset);
@@ -636,11 +477,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
MarkBufferDirty(pbuffer);
}
- if (XLogReadBufferForRedo(lsn, record, 2, data->node, data->leftBlkno,
- &lbuffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(lbuffer);
-
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->rightlink = data->rightLink;
PageSetLSN(page, lsn);
@@ -656,8 +495,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
}
static void
-ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoUpdateMetapage(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
Buffer metabuffer;
Page metapage;
@@ -668,9 +508,8 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
* image, so restore the metapage unconditionally without looking at the
* LSN, to avoid torn page hazards.
*/
- metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
- if (!BufferIsValid(metabuffer))
- return; /* assume index was deleted, nothing to do */
+ metabuffer = XLogInitBufferForRedo(record, 0);
+ Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
@@ -682,17 +521,18 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
/*
* insert into tail page
*/
- if (XLogReadBufferForRedo(lsn, record, 0, data->node,
- data->metadata.tail, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
OffsetNumber off;
int i;
Size tupsize;
+ char *payload;
IndexTuple tuples;
+ Size totaltupsize;
- tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+ payload = XLogRecGetBlockData(record, 1, &totaltupsize);
+ tuples = (IndexTuple) payload;
if (PageIsEmpty(page))
off = FirstOffsetNumber;
@@ -711,6 +551,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
off++;
}
+ Assert(payload + totaltupsize == (char *) tuples);
/*
* Increase counter of heap tuples
@@ -728,8 +569,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
/*
* New tail
*/
- if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->prevTail,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
@@ -746,8 +586,9 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
}
static void
-ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoInsertListPage(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
Buffer buffer;
Page page;
@@ -755,15 +596,12 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
off = FirstOffsetNumber;
int i,
tupsize;
- IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
-
- /*
- * Backup blocks are not used, we always re-initialize the page.
- */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ char *payload;
+ IndexTuple tuples;
+ Size totaltupsize;
- buffer = XLogReadBuffer(data->node, data->blkno, true);
- Assert(BufferIsValid(buffer));
+ /* We always re-initialize the page. */
+ buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_LIST);
@@ -779,6 +617,9 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
GinPageGetOpaque(page)->maxoff = 0;
}
+ payload = XLogRecGetBlockData(record, 0, &totaltupsize);
+
+ tuples = (IndexTuple) payload;
for (i = 0; i < data->ntuples; i++)
{
tupsize = IndexTupleSize(tuples);
@@ -791,6 +632,7 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
tuples = (IndexTuple) (((char *) tuples) + tupsize);
off++;
}
+ Assert((char *) tuples == payload + totaltupsize);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -799,21 +641,20 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
}
static void
-ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+ginRedoDeleteListPages(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
Buffer metabuffer;
Page metapage;
int i;
- /* Backup blocks are not used in delete_listpage records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
- if (!BufferIsValid(metabuffer))
- return; /* assume index was deleted, nothing to do */
+ metabuffer = XLogInitBufferForRedo(record, 0);
+ Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
+ GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
+
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
PageSetLSN(metapage, lsn);
MarkBufferDirty(metabuffer);
@@ -838,7 +679,7 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer;
Page page;
- buffer = XLogReadBuffer(data->node, data->toDelete[i], true);
+ buffer = XLogInitBufferForRedo(record, i + 1);
page = BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DELETED);
@@ -851,9 +692,9 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
}
void
-gin_redo(XLogRecPtr lsn, XLogRecord *record)
+gin_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
MemoryContext oldCtx;
/*
@@ -866,34 +707,34 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
switch (info)
{
case XLOG_GIN_CREATE_INDEX:
- ginRedoCreateIndex(lsn, record);
+ ginRedoCreateIndex(record);
break;
case XLOG_GIN_CREATE_PTREE:
- ginRedoCreatePTree(lsn, record);
+ ginRedoCreatePTree(record);
break;
case XLOG_GIN_INSERT:
- ginRedoInsert(lsn, record);
+ ginRedoInsert(record);
break;
case XLOG_GIN_SPLIT:
- ginRedoSplit(lsn, record);
+ ginRedoSplit(record);
break;
case XLOG_GIN_VACUUM_PAGE:
- ginRedoVacuumPage(lsn, record);
+ ginRedoVacuumPage(record);
break;
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
- ginRedoVacuumDataLeafPage(lsn, record);
+ ginRedoVacuumDataLeafPage(record);
break;
case XLOG_GIN_DELETE_PAGE:
- ginRedoDeletePage(lsn, record);
+ ginRedoDeletePage(record);
break;
case XLOG_GIN_UPDATE_META_PAGE:
- ginRedoUpdateMetapage(lsn, record);
+ ginRedoUpdateMetapage(record);
break;
case XLOG_GIN_INSERT_LISTPAGE:
- ginRedoInsertListPage(lsn, record);
+ ginRedoInsertListPage(record);
break;
case XLOG_GIN_DELETE_LISTPAGE:
- ginRedoDeleteListPages(lsn, record);
+ ginRedoDeleteListPages(record);
break;
default:
elog(PANIC, "gin_redo: unknown op code %u", info);
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 644b882b7d4..2141045f994 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -16,6 +16,7 @@
#include "access/genam.h"
#include "access/gist_private.h"
+#include "access/xloginsert.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
#include "miscadmin.h"
@@ -394,6 +395,14 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
GistPageSetNSN(ptr->page, oldnsn);
}
+ /*
+ * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
+ * insertion for that. NB: The number of pages and data segments
+ * specified here must match the calculations in gistXLogSplit()!
+ */
+ if (RelationNeedsWAL(rel))
+ XLogEnsureRecordSpace(npage, 1 + npage * 2);
+
START_CRIT_SECTION();
/*
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 2143096c66b..5acc986585a 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -183,14 +183,11 @@ gistbuild(PG_FUNCTION_ARGS)
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata;
- rdata.data = (char *) &(index->rd_node);
- rdata.len = sizeof(RelFileNode);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
PageSetLSN(page, recptr);
}
else
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 2999d211916..0a4f04810f2 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -18,18 +18,6 @@
#include "access/xlogutils.h"
#include "utils/memutils.h"
-typedef struct
-{
- gistxlogPage *header;
- IndexTuple *itup;
-} NewPage;
-
-typedef struct
-{
- gistxlogPageSplit *data;
- NewPage *page;
-} PageSplitRecord;
-
static MemoryContext opCtx; /* working memory for operations */
/*
@@ -44,9 +32,9 @@ static MemoryContext opCtx; /* working memory for operations */
* action.)
*/
static void
-gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
- RelFileNode node, BlockNumber childblkno)
+gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
{
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
XLogRedoAction action;
@@ -55,8 +43,7 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
* Note that we still update the page even if it was restored from a full
* page image, because the updated NSN is not included in the image.
*/
- action = XLogReadBufferForRedo(lsn, record, block_index, node, childblkno,
- &buffer);
+ action = XLogReadBufferForRedo(record, block_id, &buffer);
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
{
page = BufferGetPage(buffer);
@@ -75,20 +62,23 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
* redo any page update (except page split)
*/
static void
-gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
+gistRedoPageUpdateRecord(XLogReaderState *record)
{
- char *begin = XLogRecGetData(record);
- gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin;
+ XLogRecPtr lsn = record->EndRecPtr;
+ gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
Buffer buffer;
Page page;
- char *data;
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(buffer);
+ char *begin;
+ char *data;
+ Size datalen;
+ int ninserted = 0;
- data = begin + sizeof(gistxlogPageUpdate);
+ data = begin = XLogRecGetBlockData(record, 0, &datalen);
+
+ page = (Page) BufferGetPage(buffer);
/* Delete old tuples */
if (xldata->ntodelete > 0)
@@ -105,12 +95,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
}
/* add tuples */
- if (data - begin < record->xl_len)
+ if (data - begin < datalen)
{
OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page));
- while (data - begin < record->xl_len)
+ while (data - begin < datalen)
{
IndexTuple itup = (IndexTuple) data;
Size sz = IndexTupleSize(itup);
@@ -123,9 +113,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
elog(ERROR, "failed to add item to GiST index page, size %d bytes",
(int) sz);
off++;
+ ninserted++;
}
}
+ Assert(ninserted == xldata->ntoinsert);
+
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
@@ -137,58 +130,51 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
* that even if the target page no longer exists, we still attempt to
* replay the change on the child page.
*/
- if (BlockNumberIsValid(xldata->leftchild))
- gistRedoClearFollowRight(lsn, record, 1,
- xldata->node, xldata->leftchild);
+ if (XLogRecHasBlockRef(record, 1))
+ gistRedoClearFollowRight(record, 1);
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
-static void
-decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
+/*
+ * Returns an array of index pointers.
+ */
+static IndexTuple *
+decodePageSplitRecord(char *begin, int len, int *n)
{
- char *begin = XLogRecGetData(record),
- *ptr;
- int j,
- i = 0;
+ char *ptr;
+ int i = 0;
+ IndexTuple *tuples;
+
+ /* extract the number of tuples */
+ memcpy(n, begin, sizeof(int));
+ ptr = begin + sizeof(int);
- decoded->data = (gistxlogPageSplit *) begin;
- decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
+ tuples = palloc(*n * sizeof(IndexTuple));
- ptr = begin + sizeof(gistxlogPageSplit);
- for (i = 0; i < decoded->data->npage; i++)
+ for (i = 0; i < *n; i++)
{
- Assert(ptr - begin < record->xl_len);
- decoded->page[i].header = (gistxlogPage *) ptr;
- ptr += sizeof(gistxlogPage);
-
- decoded->page[i].itup = (IndexTuple *)
- palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
- j = 0;
- while (j < decoded->page[i].header->num)
- {
- Assert(ptr - begin < record->xl_len);
- decoded->page[i].itup[j] = (IndexTuple) ptr;
- ptr += IndexTupleSize((IndexTuple) ptr);
- j++;
- }
+ Assert(ptr - begin < len);
+ tuples[i] = (IndexTuple) ptr;
+ ptr += IndexTupleSize((IndexTuple) ptr);
}
+ Assert(ptr - begin == len);
+
+ return tuples;
}
static void
-gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
+gistRedoPageSplitRecord(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
- PageSplitRecord xlrec;
Buffer firstbuffer = InvalidBuffer;
Buffer buffer;
Page page;
int i;
bool isrootsplit = false;
- decodePageSplitRecord(&xlrec, record);
-
/*
* We must hold lock on the first-listed page throughout the action,
* including while updating the left child page (if any). We can unlock
@@ -198,32 +184,39 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
*/
/* loop around all pages */
- for (i = 0; i < xlrec.data->npage; i++)
+ for (i = 0; i < xldata->npage; i++)
{
- NewPage *newpage = xlrec.page + i;
int flags;
-
- if (newpage->header->blkno == GIST_ROOT_BLKNO)
+ char *data;
+ Size datalen;
+ int num;
+ BlockNumber blkno;
+ IndexTuple *tuples;
+
+ XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
+ if (blkno == GIST_ROOT_BLKNO)
{
Assert(i == 0);
isrootsplit = true;
}
- buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, i + 1);
page = (Page) BufferGetPage(buffer);
+ data = XLogRecGetBlockData(record, i + 1, &datalen);
+
+ tuples = decodePageSplitRecord(data, datalen, &num);
/* ok, clear buffer */
- if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO)
+ if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
flags = F_LEAF;
else
flags = 0;
GISTInitBuffer(buffer, flags);
/* and fill it */
- gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);
+ gistfillbuffer(page, tuples, num, FirstOffsetNumber);
- if (newpage->header->blkno == GIST_ROOT_BLKNO)
+ if (blkno == GIST_ROOT_BLKNO)
{
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
GistPageSetNSN(page, xldata->orignsn);
@@ -231,12 +224,17 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
}
else
{
- if (i < xlrec.data->npage - 1)
- GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno;
+ if (i < xldata->npage - 1)
+ {
+ BlockNumber nextblkno;
+
+ XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
+ GistPageGetOpaque(page)->rightlink = nextblkno;
+ }
else
GistPageGetOpaque(page)->rightlink = xldata->origrlink;
GistPageSetNSN(page, xldata->orignsn);
- if (i < xlrec.data->npage - 1 && !isrootsplit &&
+ if (i < xldata->npage - 1 && !isrootsplit &&
xldata->markfollowright)
GistMarkFollowRight(page);
else
@@ -253,26 +251,22 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
}
/* Fix follow-right data on left child page, if any */
- if (BlockNumberIsValid(xldata->leftchild))
- gistRedoClearFollowRight(lsn, record, 0,
- xldata->node, xldata->leftchild);
+ if (XLogRecHasBlockRef(record, 0))
+ gistRedoClearFollowRight(record, 0);
/* Finally, release lock on the first page */
UnlockReleaseBuffer(firstbuffer);
}
static void
-gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
+gistRedoCreateIndex(XLogReaderState *record)
{
- RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 0);
+ Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
page = (Page) BufferGetPage(buffer);
GISTInitBuffer(buffer, F_LEAF);
@@ -284,9 +278,9 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
}
void
-gist_redo(XLogRecPtr lsn, XLogRecord *record)
+gist_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
MemoryContext oldCxt;
/*
@@ -299,13 +293,13 @@ gist_redo(XLogRecPtr lsn, XLogRecord *record)
switch (info)
{
case XLOG_GIST_PAGE_UPDATE:
- gistRedoPageUpdateRecord(lsn, record);
+ gistRedoPageUpdateRecord(record);
break;
case XLOG_GIST_PAGE_SPLIT:
- gistRedoPageSplitRecord(lsn, record);
+ gistRedoPageSplitRecord(record);
break;
case XLOG_GIST_CREATE_INDEX:
- gistRedoCreateIndex(lsn, record);
+ gistRedoCreateIndex(record);
break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
@@ -336,70 +330,49 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
BlockNumber origrlink, GistNSN orignsn,
Buffer leftchildbuf, bool markfollowright)
{
- XLogRecData rdata[GIST_MAX_SPLIT_PAGES * 2 + 2];
gistxlogPageSplit xlrec;
SplitedPageLayout *ptr;
- int npage = 0,
- cur;
+ int npage = 0;
XLogRecPtr recptr;
+ int i;
for (ptr = dist; ptr; ptr = ptr->next)
npage++;
- /*
- * the caller should've checked this already, but doesn't hurt to check
- * again.
- */
- if (npage > GIST_MAX_SPLIT_PAGES)
- elog(ERROR, "GiST page split into too many halves");
-
- xlrec.node = node;
- xlrec.origblkno = blkno;
xlrec.origrlink = origrlink;
xlrec.orignsn = orignsn;
xlrec.origleaf = page_is_leaf;
xlrec.npage = (uint16) npage;
- xlrec.leftchild =
- BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
xlrec.markfollowright = markfollowright;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(gistxlogPageSplit);
- rdata[0].buffer = InvalidBuffer;
-
- cur = 1;
+ XLogBeginInsert();
/*
* Include a full page image of the child buf. (only necessary if a
* checkpoint happened since the child page was split)
*/
if (BufferIsValid(leftchildbuf))
- {
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].data = NULL;
- rdata[cur].len = 0;
- rdata[cur].buffer = leftchildbuf;
- rdata[cur].buffer_std = true;
- cur++;
- }
+ XLogRegisterBuffer(0, leftchildbuf, REGBUF_STANDARD);
+ /*
+ * NOTE: We register a lot of data. The caller must've called
+ * XLogEnsureRecordSpace() to prepare for that. We cannot do it here,
+ * because we're already in a critical section. If you change the number
+ * of buffer or data registrations here, make sure you modify the
+ * XLogEnsureRecordSpace() calls accordingly!
+ */
+ XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageSplit));
+
+ i = 1;
for (ptr = dist; ptr; ptr = ptr->next)
{
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].buffer = InvalidBuffer;
- rdata[cur].data = (char *) &(ptr->block);
- rdata[cur].len = sizeof(gistxlogPage);
- cur++;
-
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].buffer = InvalidBuffer;
- rdata[cur].data = (char *) (ptr->list);
- rdata[cur].len = ptr->lenlist;
- cur++;
+ XLogRegisterBuffer(i, ptr->buffer, REGBUF_WILL_INIT);
+ XLogRegisterBufData(i, (char *) &(ptr->block.num), sizeof(int));
+ XLogRegisterBufData(i, (char *) ptr->list, ptr->lenlist);
+ i++;
}
- rdata[cur - 1].next = NULL;
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT);
return recptr;
}
@@ -413,9 +386,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
*
* Note that both the todelete array and the tuples are marked as belonging
* to the target buffer; they need not be stored in XLOG if XLogInsert decides
- * to log the whole buffer contents instead. Also, we take care that there's
- * at least one rdata item referencing the buffer, even when ntodelete and
- * ituplen are both zero; this ensures that XLogInsert knows about the buffer.
+ * to log the whole buffer contents instead.
*/
XLogRecPtr
gistXLogUpdate(RelFileNode node, Buffer buffer,
@@ -423,57 +394,31 @@ gistXLogUpdate(RelFileNode node, Buffer buffer,
IndexTuple *itup, int ituplen,
Buffer leftchildbuf)
{
- XLogRecData rdata[MaxIndexTuplesPerPage + 3];
gistxlogPageUpdate xlrec;
- int cur,
- i;
+ int i;
XLogRecPtr recptr;
- xlrec.node = node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.ntodelete = ntodelete;
- xlrec.leftchild =
- BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
-
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(gistxlogPageUpdate);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ xlrec.ntoinsert = ituplen;
- rdata[1].data = (char *) todelete;
- rdata[1].len = sizeof(OffsetNumber) * ntodelete;
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
- cur = 2;
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
/* new tuples */
for (i = 0; i < ituplen; i++)
- {
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].data = (char *) (itup[i]);
- rdata[cur].len = IndexTupleSize(itup[i]);
- rdata[cur].buffer = buffer;
- rdata[cur].buffer_std = true;
- cur++;
- }
+ XLogRegisterBufData(0, (char *) (itup[i]), IndexTupleSize(itup[i]));
/*
* Include a full page image of the child buf. (only necessary if a
* checkpoint happened since the child page was split)
*/
if (BufferIsValid(leftchildbuf))
- {
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].data = NULL;
- rdata[cur].len = 0;
- rdata[cur].buffer = leftchildbuf;
- rdata[cur].buffer_std = true;
- cur++;
- }
- rdata[cur - 1].next = NULL;
+ XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD);
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE);
return recptr;
}
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 925a58f4f64..673459fd6c1 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -700,7 +700,7 @@ hashvacuumcleanup(PG_FUNCTION_ARGS)
void
-hash_redo(XLogRecPtr lsn, XLogRecord *record)
+hash_redo(XLogReaderState *record)
{
elog(PANIC, "hash_redo: unimplemented");
}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 1763b70631d..c6e1eb79b2c 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2132,84 +2132,64 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
xl_heap_insert xlrec;
xl_heap_header xlhdr;
XLogRecPtr recptr;
- XLogRecData rdata[4];
Page page = BufferGetPage(buffer);
uint8 info = XLOG_HEAP_INSERT;
- bool need_tuple_data;
+ int bufflags = 0;
/*
- * For logical decoding, we need the tuple even if we're doing a full
- * page write, so make sure to log it separately. (XXX We could
- * alternatively store a pointer into the FPW).
- *
- * Also, if this is a catalog, we need to transmit combocids to
- * properly decode, so log that as well.
+ * If this is a catalog, we need to transmit combocids to properly
+ * decode, so log that as well.
*/
- need_tuple_data = RelationIsLogicallyLogged(relation);
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, heaptup);
- xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = heaptup->t_self;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapInsert;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
- xlhdr.t_infomask = heaptup->t_data->t_infomask;
- xlhdr.t_hoff = heaptup->t_data->t_hoff;
-
/*
- * note we mark rdata[1] as belonging to buffer; if XLogInsert decides
- * to write the whole page to the xlog, we don't need to store
- * xl_heap_header in the xlog.
+ * If this is the single and first tuple on page, we can reinit the
+ * page instead of restoring the whole thing. Set flag, and hide
+ * buffer references from XLogInsert.
*/
- rdata[1].data = (char *) &xlhdr;
- rdata[1].len = SizeOfHeapHeader;
- rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
+ if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
+ PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
+ {
+ info |= XLOG_HEAP_INIT_PAGE;
+ bufflags |= REGBUF_WILL_INIT;
+ }
- /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
- rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits);
- rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits);
- rdata[2].buffer = need_tuple_data ? InvalidBuffer : buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
+ xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
+ xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+ Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
/*
- * Make a separate rdata entry for the tuple's buffer if we're doing
- * logical decoding, so that an eventual FPW doesn't remove the
- * tuple's data.
+ * For logical decoding, we need the tuple even if we're doing a full
+ * page write, so make sure it's included even if we take a full-page
+ * image. (XXX We could alternatively store a pointer into the FPW).
*/
- if (need_tuple_data)
+ if (RelationIsLogicallyLogged(relation))
{
- rdata[2].next = &(rdata[3]);
-
- rdata[3].data = NULL;
- rdata[3].len = 0;
- rdata[3].buffer = buffer;
- rdata[3].buffer_std = true;
- rdata[3].next = NULL;
-
xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ bufflags |= REGBUF_KEEP_DATA;
}
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
+
+ xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
+ xlhdr.t_infomask = heaptup->t_data->t_infomask;
+ xlhdr.t_hoff = heaptup->t_data->t_hoff;
+
/*
- * If this is the single and first tuple on page, we can reinit the
- * page instead of restoring the whole thing. Set flag, and hide
- * buffer references from XLogInsert.
+ * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
+ * write the whole page to the xlog, we don't need to store
+ * xl_heap_header in the xlog.
*/
- if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
- PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
- {
- info |= XLOG_HEAP_INIT_PAGE;
- rdata[1].buffer = rdata[2].buffer = rdata[3].buffer = InvalidBuffer;
- }
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+ XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
+ /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+ XLogRegisterBufData(0,
+ (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+ heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits));
- recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, info);
PageSetLSN(page, recptr);
}
@@ -2397,6 +2377,13 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
break;
RelationPutHeapTuple(relation, buffer, heaptup);
+
+ /*
+ * We don't use heap_multi_insert for catalog tuples yet, but
+ * better be prepared...
+ */
+ if (needwal && need_cids)
+ log_heap_new_cid(relation, heaptup);
}
if (PageIsAllVisible(page))
@@ -2419,12 +2406,12 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
{
XLogRecPtr recptr;
xl_heap_multi_insert *xlrec;
- XLogRecData rdata[3];
uint8 info = XLOG_HEAP2_MULTI_INSERT;
char *tupledata;
int totaldatalen;
char *scratchptr = scratch;
bool init;
+ int bufflags = 0;
/*
* If the page was previously empty, we can reinit the page
@@ -2450,8 +2437,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
tupledata = scratchptr;
xlrec->flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
- xlrec->node = relation->rd_node;
- xlrec->blkno = BufferGetBlockNumber(buffer);
xlrec->ntuples = nthispage;
/*
@@ -2481,64 +2466,40 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
datalen);
tuphdr->datalen = datalen;
scratchptr += datalen;
-
- /*
- * We don't use heap_multi_insert for catalog tuples yet, but
- * better be prepared...
- */
- if (need_cids)
- log_heap_new_cid(relation, heaptup);
}
totaldatalen = scratchptr - tupledata;
Assert((scratchptr - scratch) < BLCKSZ);
- rdata[0].data = (char *) xlrec;
- rdata[0].len = tupledata - scratch;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
-
- rdata[1].data = tupledata;
- rdata[1].len = totaldatalen;
- rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
-
- /*
- * Make a separate rdata entry for the tuple's buffer if we're
- * doing logical decoding, so that an eventual FPW doesn't remove
- * the tuple's data.
- */
if (need_tuple_data)
- {
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
xlrec->flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
- }
/*
- * If we're going to reinitialize the whole page using the WAL
- * record, hide buffer reference from XLogInsert.
+ * Signal that this is the last xl_heap_multi_insert record
+ * emitted by this call to heap_multi_insert(). Needed for logical
+ * decoding so it knows when to cleanup temporary data.
*/
+ if (ndone + nthispage == ntuples)
+ xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+
if (init)
{
- rdata[1].buffer = rdata[2].buffer = InvalidBuffer;
info |= XLOG_HEAP_INIT_PAGE;
+ bufflags |= REGBUF_WILL_INIT;
}
/*
- * Signal that this is the last xl_heap_multi_insert record
- * emitted by this call to heap_multi_insert(). Needed for logical
- * decoding so it knows when to cleanup temporary data.
+ * If we're doing logical decoding, include the new tuple data
+ * even if we take a full-page image of the page.
*/
- if (ndone + nthispage == ntuples)
- xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+ if (need_tuple_data)
+ bufflags |= REGBUF_KEEP_DATA;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) xlrec, tupledata - scratch);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
- recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+ XLogRegisterBufData(0, tupledata, totaldatalen);
+ recptr = XLogInsert(RM_HEAP2_ID, info);
PageSetLSN(page, recptr);
}
@@ -2909,7 +2870,6 @@ l1:
{
xl_heap_delete xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[4];
/* For logical decode we need combocids to properly decode the catalog */
if (RelationIsAccessibleInLogicalDecoding(relation))
@@ -2918,19 +2878,21 @@ l1:
xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tp.t_self;
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
xlrec.xmax = new_xmax;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapDelete;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ if (old_key_tuple != NULL)
+ {
+ if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+ else
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ }
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
/*
* Log replica identity of the deleted tuple if there is one
@@ -2943,27 +2905,14 @@ l1:
xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
- rdata[1].next = &(rdata[2]);
- rdata[2].data = (char *) &xlhdr;
- rdata[2].len = SizeOfHeapHeader;
- rdata[2].buffer = InvalidBuffer;
- rdata[2].next = NULL;
-
- rdata[2].next = &(rdata[3]);
- rdata[3].data = (char *) old_key_tuple->t_data
- + offsetof(HeapTupleHeaderData, t_bits);
- rdata[3].len = old_key_tuple->t_len
- - offsetof(HeapTupleHeaderData, t_bits);
- rdata[3].buffer = InvalidBuffer;
- rdata[3].next = NULL;
-
- if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
- else
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
+ XLogRegisterData((char *) old_key_tuple->t_data
+ + offsetof(HeapTupleHeaderData, t_bits),
+ old_key_tuple->t_len
+ - offsetof(HeapTupleHeaderData, t_bits));
}
- recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
PageSetLSN(page, recptr);
}
@@ -4735,25 +4684,17 @@ failed:
{
xl_heap_lock xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tuple->t_self;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
xlrec.locking_xid = xid;
xlrec.infobits_set = compute_infobits(new_infomask,
tuple->t_data->t_infomask2);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapLock;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = *buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
- recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
PageSetLSN(page, recptr);
}
@@ -5342,26 +5283,18 @@ l4:
{
xl_heap_lock_updated xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
Page page = BufferGetPage(buf);
- xlrec.target.node = rel->rd_node;
- xlrec.target.tid = mytup.t_self;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+
+ xlrec.offnum = ItemPointerGetOffsetNumber(&mytup.t_self);
xlrec.xmax = new_xmax;
xlrec.infobits_set = compute_infobits(new_infomask, new_infomask2);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapLockUpdated;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated);
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = buf;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
-
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED);
PageSetLSN(page, recptr);
}
@@ -5489,23 +5422,16 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
{
xl_heap_inplace xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tuple->t_self;
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapInplace;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
- rdata[1].data = (char *) htup + htup->t_hoff;
- rdata[1].len = newlen;
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
- recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
PageSetLSN(page, recptr);
}
@@ -6507,17 +6433,14 @@ log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
{
xl_heap_cleanup_info xlrec;
XLogRecPtr recptr;
- XLogRecData rdata;
xlrec.node = rnode;
xlrec.latestRemovedXid = latestRemovedXid;
- rdata.data = (char *) &xlrec;
- rdata.len = SizeOfHeapCleanupInfo;
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapCleanupInfo);
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, &rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO);
return recptr;
}
@@ -6542,23 +6465,19 @@ log_heap_clean(Relation reln, Buffer buffer,
TransactionId latestRemovedXid)
{
xl_heap_clean xlrec;
- uint8 info;
XLogRecPtr recptr;
- XLogRecData rdata[4];
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
- xlrec.node = reln->rd_node;
- xlrec.block = BufferGetBlockNumber(buffer);
xlrec.latestRemovedXid = latestRemovedXid;
xlrec.nredirected = nredirected;
xlrec.ndead = ndead;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapClean;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapClean);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
/*
* The OffsetNumber arrays are not actually in the buffer, but we pretend
@@ -6569,49 +6488,18 @@ log_heap_clean(Relation reln, Buffer buffer,
* even if no item pointers changed state.
*/
if (nredirected > 0)
- {
- rdata[1].data = (char *) redirected;
- rdata[1].len = nredirected * sizeof(OffsetNumber) * 2;
- }
- else
- {
- rdata[1].data = NULL;
- rdata[1].len = 0;
- }
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
+ XLogRegisterBufData(0, (char *) redirected,
+ nredirected * sizeof(OffsetNumber) * 2);
if (ndead > 0)
- {
- rdata[2].data = (char *) nowdead;
- rdata[2].len = ndead * sizeof(OffsetNumber);
- }
- else
- {
- rdata[2].data = NULL;
- rdata[2].len = 0;
- }
- rdata[2].buffer = buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = &(rdata[3]);
+ XLogRegisterBufData(0, (char *) nowdead,
+ ndead * sizeof(OffsetNumber));
if (nunused > 0)
- {
- rdata[3].data = (char *) nowunused;
- rdata[3].len = nunused * sizeof(OffsetNumber);
- }
- else
- {
- rdata[3].data = NULL;
- rdata[3].len = 0;
- }
- rdata[3].buffer = buffer;
- rdata[3].buffer_std = true;
- rdata[3].next = NULL;
+ XLogRegisterBufData(0, (char *) nowunused,
+ nunused * sizeof(OffsetNumber));
- info = XLOG_HEAP2_CLEAN;
- recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN);
return recptr;
}
@@ -6626,35 +6514,28 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
{
xl_heap_freeze_page xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
/* nor when there are no tuples to freeze */
Assert(ntuples > 0);
- xlrec.node = reln->rd_node;
- xlrec.block = BufferGetBlockNumber(buffer);
xlrec.cutoff_xid = cutoff_xid;
xlrec.ntuples = ntuples;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapFreezePage;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
/*
* The freeze plan array is not actually in the buffer, but pretend that
* it is. When XLogInsert stores the whole buffer, the freeze plan need
* not be stored too.
*/
- rdata[1].data = (char *) tuples;
- rdata[1].len = ntuples * sizeof(xl_heap_freeze_tuple);
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) tuples,
+ ntuples * sizeof(xl_heap_freeze_tuple));
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE);
return recptr;
}
@@ -6665,8 +6546,8 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
* corresponding visibility map block. Both should have already been modified
* and dirtied.
*
- * If checksums are enabled, we also add the heap_buffer to the chain to
- * protect it from being torn.
+ * If checksums are enabled, we also generate a full-page image of
+ * heap_buffer, if necessary.
*/
XLogRecPtr
log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
@@ -6674,38 +6555,23 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
{
xl_heap_visible xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[3];
+ uint8 flags;
Assert(BufferIsValid(heap_buffer));
Assert(BufferIsValid(vm_buffer));
- xlrec.node = rnode;
- xlrec.block = BufferGetBlockNumber(heap_buffer);
xlrec.cutoff_xid = cutoff_xid;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapVisible;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogRegisterBuffer(0, vm_buffer, 0);
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = vm_buffer;
- rdata[1].buffer_std = false;
- rdata[1].next = NULL;
+ flags = REGBUF_STANDARD;
+ if (!XLogHintBitIsNeeded())
+ flags |= REGBUF_NO_IMAGE;
+ XLogRegisterBuffer(1, heap_buffer, flags);
- if (XLogHintBitIsNeeded())
- {
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = heap_buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
- }
-
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE);
return recptr;
}
@@ -6721,22 +6587,23 @@ log_heap_update(Relation reln, Buffer oldbuf,
bool all_visible_cleared, bool new_all_visible_cleared)
{
xl_heap_update xlrec;
- xl_heap_header_len xlhdr;
- xl_heap_header_len xlhdr_idx;
+ xl_heap_header xlhdr;
+ xl_heap_header xlhdr_idx;
uint8 info;
uint16 prefix_suffix[2];
uint16 prefixlen = 0,
suffixlen = 0;
XLogRecPtr recptr;
- XLogRecData rdata[9];
Page page = BufferGetPage(newbuf);
bool need_tuple_data = RelationIsLogicallyLogged(reln);
- int nr;
- Buffer newbufref;
+ bool init;
+ int bufflags;
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
+ XLogBeginInsert();
+
if (HeapTupleIsHeapOnly(newtup))
info = XLOG_HEAP_HOT_UPDATE;
else
@@ -6794,103 +6661,97 @@ log_heap_update(Relation reln, Buffer oldbuf,
suffixlen = 0;
}
- xlrec.target.node = reln->rd_node;
- xlrec.target.tid = oldtup->t_self;
- xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
- xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
- oldtup->t_data->t_infomask2);
- xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+ /* Prepare main WAL data chain */
xlrec.flags = 0;
if (all_visible_cleared)
xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
- xlrec.newtid = newtup->t_self;
if (new_all_visible_cleared)
xlrec.flags |= XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED;
if (prefixlen > 0)
xlrec.flags |= XLOG_HEAP_PREFIX_FROM_OLD;
if (suffixlen > 0)
xlrec.flags |= XLOG_HEAP_SUFFIX_FROM_OLD;
+ if (need_tuple_data)
+ {
+ xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ if (old_key_tuple)
+ {
+ if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+ else
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ }
+ }
/* If new tuple is the single and first tuple on page... */
if (ItemPointerGetOffsetNumber(&(newtup->t_self)) == FirstOffsetNumber &&
PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
{
info |= XLOG_HEAP_INIT_PAGE;
- newbufref = InvalidBuffer;
+ init = true;
}
else
- newbufref = newbuf;
+ init = false;
- rdata[0].data = NULL;
- rdata[0].len = 0;
- rdata[0].buffer = oldbuf;
- rdata[0].buffer_std = true;
- rdata[0].next = &(rdata[1]);
+ /* Prepare WAL data for the old page */
+ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
+ xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2);
+
+ /* Prepare WAL data for the new page */
+ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self);
+ xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+
+ bufflags = REGBUF_STANDARD;
+ if (init)
+ bufflags |= REGBUF_WILL_INIT;
+ if (need_tuple_data)
+ bufflags |= REGBUF_KEEP_DATA;
- rdata[1].data = (char *) &xlrec;
- rdata[1].len = SizeOfHeapUpdate;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &(rdata[2]);
+ XLogRegisterBuffer(0, newbuf, bufflags);
+ if (oldbuf != newbuf)
+ XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD);
- /* prefix and/or suffix length fields */
+ XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate);
+
+ /*
+ * Prepare WAL data for the new tuple.
+ */
if (prefixlen > 0 || suffixlen > 0)
{
if (prefixlen > 0 && suffixlen > 0)
{
prefix_suffix[0] = prefixlen;
prefix_suffix[1] = suffixlen;
- rdata[2].data = (char *) &prefix_suffix;
- rdata[2].len = 2 * sizeof(uint16);
+ XLogRegisterBufData(0, (char *) &prefix_suffix, sizeof(uint16) * 2);
}
else if (prefixlen > 0)
{
- rdata[2].data = (char *) &prefixlen;
- rdata[2].len = sizeof(uint16);
+ XLogRegisterBufData(0, (char *) &prefixlen, sizeof(uint16));
}
else
{
- rdata[2].data = (char *) &suffixlen;
- rdata[2].len = sizeof(uint16);
+ XLogRegisterBufData(0, (char *) &suffixlen, sizeof(uint16));
}
- rdata[2].buffer = newbufref;
- rdata[2].buffer_std = true;
- rdata[2].next = &(rdata[3]);
- nr = 3;
}
- else
- nr = 2;
-
- xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
- xlhdr.header.t_infomask = newtup->t_data->t_infomask;
- xlhdr.header.t_hoff = newtup->t_data->t_hoff;
- Assert(offsetof(HeapTupleHeaderData, t_bits) +prefixlen + suffixlen <= newtup->t_len);
- xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -prefixlen - suffixlen;
- /*
- * As with insert records, we need not store this rdata segment if we
- * decide to store the whole buffer instead, unless we're doing logical
- * decoding.
- */
- rdata[nr].data = (char *) &xlhdr;
- rdata[nr].len = SizeOfHeapHeaderLen;
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = &(rdata[nr + 1]);
- nr++;
+ xlhdr.t_infomask2 = newtup->t_data->t_infomask2;
+ xlhdr.t_infomask = newtup->t_data->t_infomask;
+ xlhdr.t_hoff = newtup->t_data->t_hoff;
+ Assert(offsetof(HeapTupleHeaderData, t_bits) + prefixlen + suffixlen <= newtup->t_len);
/*
* PG73FORMAT: write bitmap [+ padding] [+ oid] + data
*
* The 'data' doesn't include the common prefix or suffix.
*/
+ XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
if (prefixlen == 0)
{
- rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen;
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterBufData(0,
+ ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+ newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen);
}
else
{
@@ -6901,75 +6762,33 @@ log_heap_update(Relation reln, Buffer oldbuf,
/* bitmap [+ padding] [+ oid] */
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) >0)
{
- rdata[nr - 1].next = &(rdata[nr]);
- rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].len = newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterBufData(0,
+ ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+ newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits));
}
/* data after common prefix */
- rdata[nr - 1].next = &(rdata[nr]);
- rdata[nr].data = ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen;
- rdata[nr].len = newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen;
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterBufData(0,
+ ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen,
+ newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen);
}
- /*
- * Separate storage for the FPW buffer reference of the new page in the
- * wal_level >= logical case.
- */
- if (need_tuple_data)
+ /* We need to log a tuple identity */
+ if (need_tuple_data && old_key_tuple)
{
- rdata[nr - 1].next = &(rdata[nr]);
-
- rdata[nr].data = NULL,
- rdata[nr].len = 0;
- rdata[nr].buffer = newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
-
- xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ /* don't really need this, but its more comfy to decode */
+ xlhdr_idx.t_infomask2 = old_key_tuple->t_data->t_infomask2;
+ xlhdr_idx.t_infomask = old_key_tuple->t_data->t_infomask;
+ xlhdr_idx.t_hoff = old_key_tuple->t_data->t_hoff;
- /* We need to log a tuple identity */
- if (old_key_tuple)
- {
- /* don't really need this, but its more comfy to decode */
- xlhdr_idx.header.t_infomask2 = old_key_tuple->t_data->t_infomask2;
- xlhdr_idx.header.t_infomask = old_key_tuple->t_data->t_infomask;
- xlhdr_idx.header.t_hoff = old_key_tuple->t_data->t_hoff;
- xlhdr_idx.t_len = old_key_tuple->t_len;
-
- rdata[nr - 1].next = &(rdata[nr]);
- rdata[nr].data = (char *) &xlhdr_idx;
- rdata[nr].len = SizeOfHeapHeaderLen;
- rdata[nr].buffer = InvalidBuffer;
- rdata[nr].next = &(rdata[nr + 1]);
- nr++;
-
- /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
- rdata[nr].data = (char *) old_key_tuple->t_data
- + offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].len = old_key_tuple->t_len
- - offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].buffer = InvalidBuffer;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterData((char *) &xlhdr_idx, SizeOfHeapHeader);
- if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
- else
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
- }
+ /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+ XLogRegisterData((char *) old_key_tuple->t_data + offsetof(HeapTupleHeaderData, t_bits),
+ old_key_tuple->t_len - offsetof(HeapTupleHeaderData, t_bits));
}
- recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, info);
return recptr;
}
@@ -6986,15 +6805,14 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
xl_heap_new_cid xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[1];
HeapTupleHeader hdr = tup->t_data;
Assert(ItemPointerIsValid(&tup->t_self));
Assert(tup->t_tableOid != InvalidOid);
xlrec.top_xid = GetTopTransactionId();
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tup->t_self;
+ xlrec.target_node = relation->rd_node;
+ xlrec.target_tid = tup->t_self;
/*
* If the tuple got inserted & deleted in the same TX we definitely have a
@@ -7035,12 +6853,15 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
xlrec.combocid = InvalidCommandId;
}
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapNewCid;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ /*
+ * Note that we don't need to register the buffer here, because this
+ * operation does not modify the page. The insert/update/delete that
+ * called us certainly did, but that's WAL-logged separately.
+ */
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapNewCid);
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID);
return recptr;
}
@@ -7165,7 +6986,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
* Handles CLEANUP_INFO
*/
static void
-heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_cleanup_info(XLogReaderState *record)
{
xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) XLogRecGetData(record);
@@ -7179,15 +7000,16 @@ heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record)
*/
/* Backup blocks are not used in cleanup_info records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
}
/*
* Handles HEAP2_CLEAN record type
*/
static void
-heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_clean(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record);
Buffer buffer;
Size freespace = 0;
@@ -7195,8 +7017,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
BlockNumber blkno;
XLogRedoAction action;
- rnode = xlrec->node;
- blkno = xlrec->block;
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
/*
* We're about to remove tuples. In Hot Standby mode, ensure that there's
@@ -7213,9 +7034,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
* If we have a full-page image, restore it (using a cleanup lock) and
* we're done.
*/
- action = XLogReadBufferForRedoExtended(lsn, record, 0,
- rnode, MAIN_FORKNUM, blkno,
- RBM_NORMAL, true, &buffer);
+ action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true,
+ &buffer);
if (action == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(buffer);
@@ -7226,11 +7046,13 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
int nredirected;
int ndead;
int nunused;
+ Size datalen;
+
+ redirected = (OffsetNumber *) XLogRecGetBlockData(record, 0, &datalen);
nredirected = xlrec->nredirected;
ndead = xlrec->ndead;
- end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
- redirected = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
+ end = (OffsetNumber *) ((char *) redirected + datalen);
nowdead = redirected + (nredirected * 2);
nowunused = nowdead + ndead;
nunused = (end - nowunused);
@@ -7263,7 +7085,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO)
- XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
+ XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
}
/*
@@ -7275,17 +7097,18 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
* page modification would fail to clear the visibility map bit.
*/
static void
-heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_visible(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
+ Buffer vmbuffer = InvalidBuffer;
Buffer buffer;
Page page;
RelFileNode rnode;
BlockNumber blkno;
XLogRedoAction action;
- rnode = xlrec->node;
- blkno = xlrec->block;
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
/*
* If there are any Hot Standby transactions running that have an xmin
@@ -7304,7 +7127,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
* truncated later in recovery, we don't need to update the page, but we'd
* better still update the visibility map.
*/
- action = XLogReadBufferForRedo(lsn, record, 1, rnode, blkno, &buffer);
+ action = XLogReadBufferForRedo(record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
/*
@@ -7341,12 +7164,21 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
* the visibility map bit does so before checking the page LSN, so any
* bits that need to be cleared will still be cleared.
*/
- if (record->xl_info & XLR_BKP_BLOCK(0))
- (void) RestoreBackupBlock(lsn, record, 0, false, false);
- else
+ if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
+ &vmbuffer) == BLK_NEEDS_REDO)
{
+ Page vmpage = BufferGetPage(vmbuffer);
Relation reln;
- Buffer vmbuffer = InvalidBuffer;
+
+ /* initialize the page if it was read as zeros */
+ if (PageIsNew(vmpage))
+ PageInit(vmpage, BLCKSZ, 0);
+
+ /*
+ * XLogReplayBufferExtended locked the buffer. But visibilitymap_set
+ * will handle locking itself.
+ */
+ LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
reln = CreateFakeRelcacheEntry(rnode);
visibilitymap_pin(reln, blkno, &vmbuffer);
@@ -7362,25 +7194,27 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
* we did for the heap page. If this results in a dropped bit, no
* real harm is done; and the next VACUUM will fix it.
*/
- if (lsn > PageGetLSN(BufferGetPage(vmbuffer)))
+ if (lsn > PageGetLSN(vmpage))
visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
xlrec->cutoff_xid);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
+ else if (BufferIsValid(vmbuffer))
+ UnlockReleaseBuffer(vmbuffer);
}
/*
* Replay XLOG_HEAP2_FREEZE_PAGE records
*/
static void
-heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_freeze_page(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) XLogRecGetData(record);
TransactionId cutoff_xid = xlrec->cutoff_xid;
Buffer buffer;
- Page page;
int ntup;
/*
@@ -7388,12 +7222,19 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
* consider the frozen xids as running.
*/
if (InHotStandby)
- ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
+ {
+ RelFileNode rnode;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
- &buffer) == BLK_NEEDS_REDO)
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+ ResolveRecoveryConflictWithSnapshot(cutoff_xid, rnode);
+ }
+
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
- page = BufferGetPage(buffer);
+ Page page = BufferGetPage(buffer);
+ xl_heap_freeze_tuple *tuples;
+
+ tuples = (xl_heap_freeze_tuple *) XLogRecGetBlockData(record, 0, NULL);
/* now execute freeze plan for each frozen tuple */
for (ntup = 0; ntup < xlrec->ntuples; ntup++)
@@ -7402,7 +7243,7 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
ItemId lp;
HeapTupleHeader tuple;
- xlrec_tp = &xlrec->tuples[ntup];
+ xlrec_tp = &tuples[ntup];
lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
tuple = (HeapTupleHeader) PageGetItem(page, lp);
@@ -7444,19 +7285,21 @@ fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
}
static void
-heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_delete(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
Buffer buffer;
Page page;
- OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
BlockNumber blkno;
RelFileNode target_node;
+ ItemPointerData target_tid;
- blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
- target_node = xlrec->target.node;
+ XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+ ItemPointerSetBlockNumber(&target_tid, blkno);
+ ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
/*
* The visibility map may need to be fixed even if the heap page is
@@ -7473,16 +7316,14 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
FreeFakeRelcacheEntry(reln);
}
- if (XLogReadBufferForRedo(lsn, record, 0, target_node, blkno, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(buffer);
+ page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
+ if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
+ lp = PageGetItemId(page, xlrec->offnum);
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
elog(PANIC, "heap_delete_redo: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -7496,13 +7337,13 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */
- PageSetPrunable(page, record->xl_xid);
+ PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = xlrec->target.tid;
+ htup->t_ctid = target_tid;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
@@ -7511,12 +7352,12 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
}
static void
-heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_insert(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
Buffer buffer;
Page page;
- OffsetNumber offnum;
struct
{
HeapTupleHeaderData hdr;
@@ -7528,10 +7369,12 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
Size freespace = 0;
RelFileNode target_node;
BlockNumber blkno;
+ ItemPointerData target_tid;
XLogRedoAction action;
- target_node = xlrec->target.node;
- blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
+ XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+ ItemPointerSetBlockNumber(&target_tid, blkno);
+ ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
/*
* The visibility map may need to be fixed even if the heap page is
@@ -7549,51 +7392,51 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
}
/*
- * If we inserted the first and only tuple on the page, re-initialize
- * the page from scratch.
+ * If we inserted the first and only tuple on the page, re-initialize the
+ * page from scratch.
*/
- if (record->xl_info & XLOG_HEAP_INIT_PAGE)
+ if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
{
- XLogReadBufferForRedoExtended(lsn, record, 0,
- target_node, MAIN_FORKNUM, blkno,
- RBM_ZERO_AND_LOCK, false, &buffer);
+ buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 0, target_node, blkno,
- &buffer);
-
+ action = XLogReadBufferForRedo(record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
+ Size datalen;
+ char *data;
+
page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
- if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+ if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
elog(PANIC, "heap_insert_redo: invalid max offset number");
- newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader;
- Assert(newlen <= MaxHeapTupleSize);
- memcpy((char *) &xlhdr,
- (char *) xlrec + SizeOfHeapInsert,
- SizeOfHeapHeader);
+ data = XLogRecGetBlockData(record, 0, &datalen);
+
+ newlen = datalen - SizeOfHeapHeader;
+ Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
+ memcpy((char *) &xlhdr, data, SizeOfHeapHeader);
+ data += SizeOfHeapHeader;
+
htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
- (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
+ data,
newlen);
newlen += offsetof(HeapTupleHeaderData, t_bits);
htup->t_infomask2 = xlhdr.t_infomask2;
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff;
- HeapTupleHeaderSetXmin(htup, record->xl_xid);
+ HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, FirstCommandId);
- htup->t_ctid = xlrec->target.tid;
+ htup->t_ctid = target_tid;
- offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
- if (offnum == InvalidOffsetNumber)
+ if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
+ true, true) == InvalidOffsetNumber)
elog(PANIC, "heap_insert_redo: failed to add tuple");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
@@ -7618,16 +7461,16 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
- XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
+ XLogRecordPageWithFreeSpace(target_node, blkno, freespace);
}
/*
* Handles MULTI_INSERT record type.
*/
static void
-heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_multi_insert(XLogReaderState *record)
{
- char *recdata = XLogRecGetData(record);
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_multi_insert *xlrec;
RelFileNode rnode;
BlockNumber blkno;
@@ -7642,27 +7485,16 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
uint32 newlen;
Size freespace = 0;
int i;
- bool isinit = (record->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
+ bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
XLogRedoAction action;
/*
* Insertion doesn't overwrite MVCC data, so no conflict processing is
* required.
*/
+ xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
- xlrec = (xl_heap_multi_insert *) recdata;
- recdata += SizeOfHeapMultiInsert;
-
- rnode = xlrec->node;
- blkno = xlrec->blkno;
-
- /*
- * If we're reinitializing the page, the tuples are stored in order from
- * FirstOffsetNumber. Otherwise there's an array of offsets in the WAL
- * record.
- */
- if (!isinit)
- recdata += sizeof(OffsetNumber) * xlrec->ntuples;
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
/*
* The visibility map may need to be fixed even if the heap page is
@@ -7681,24 +7513,35 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
if (isinit)
{
- XLogReadBufferForRedoExtended(lsn, record, 0,
- rnode, MAIN_FORKNUM, blkno,
- RBM_ZERO_AND_LOCK, false, &buffer);
+ buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 0, rnode, blkno, &buffer);
-
+ action = XLogReadBufferForRedo(record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
- page = BufferGetPage(buffer);
+ char *tupdata;
+ char *endptr;
+ Size len;
+
+ /* Tuples are stored as block data */
+ tupdata = XLogRecGetBlockData(record, 0, &len);
+ endptr = tupdata + len;
+
+ page = (Page) BufferGetPage(buffer);
+
for (i = 0; i < xlrec->ntuples; i++)
{
OffsetNumber offnum;
xl_multi_insert_tuple *xlhdr;
+ /*
+ * If we're reinitializing the page, the tuples are stored in
+ * order from FirstOffsetNumber. Otherwise there's an array of
+ * offsets in the WAL record, and the tuples come after that.
+ */
if (isinit)
offnum = FirstOffsetNumber + i;
else
@@ -7706,8 +7549,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "heap_multi_insert_redo: invalid max offset number");
- xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(recdata);
- recdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
+ xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
+ tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
newlen = xlhdr->datalen;
Assert(newlen <= MaxHeapTupleSize);
@@ -7715,15 +7558,15 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
- (char *) recdata,
+ (char *) tupdata,
newlen);
- recdata += newlen;
+ tupdata += newlen;
newlen += offsetof(HeapTupleHeaderData, t_bits);
htup->t_infomask2 = xlhdr->t_infomask2;
htup->t_infomask = xlhdr->t_infomask;
htup->t_hoff = xlhdr->t_hoff;
- HeapTupleHeaderSetXmin(htup, record->xl_xid);
+ HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, FirstCommandId);
ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
@@ -7732,6 +7575,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_multi_insert_redo: failed to add tuple");
}
+ if (tupdata != endptr)
+ elog(PANIC, "heap_multi_insert_redo: total tuple length mismatch");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
@@ -7755,19 +7600,21 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
- XLogRecordPageWithFreeSpace(xlrec->node, blkno, freespace);
+ XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
}
/*
* Handles UPDATE and HOT_UPDATE
*/
static void
-heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
+heap_xlog_update(XLogReaderState *record, bool hot_update)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
RelFileNode rnode;
BlockNumber oldblk;
BlockNumber newblk;
+ ItemPointerData newtid;
Buffer obuffer,
nbuffer;
Page page;
@@ -7775,7 +7622,6 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
ItemId lp = NULL;
HeapTupleData oldtup;
HeapTupleHeader htup;
- char *recdata;
uint16 prefixlen = 0,
suffixlen = 0;
char *newp;
@@ -7784,7 +7630,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
HeapTupleHeaderData hdr;
char data[MaxHeapTupleSize];
} tbuf;
- xl_heap_header_len xlhdr;
+ xl_heap_header xlhdr;
uint32 newlen;
Size freespace = 0;
XLogRedoAction oldaction;
@@ -7794,9 +7640,16 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
oldtup.t_data = NULL;
oldtup.t_len = 0;
- rnode = xlrec->target.node;
- newblk = ItemPointerGetBlockNumber(&xlrec->newtid);
- oldblk = ItemPointerGetBlockNumber(&xlrec->target.tid);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &newblk);
+ if (XLogRecGetBlockTag(record, 1, NULL, NULL, &oldblk))
+ {
+ /* HOT updates are never done across pages */
+ Assert(!hot_update);
+ }
+ else
+ oldblk = newblk;
+
+ ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
/*
* The visibility map may need to be fixed even if the heap page is
@@ -7824,12 +7677,12 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
*/
/* Deal with old tuple version */
- oldaction = XLogReadBufferForRedo(lsn, record, 0, rnode, oldblk, &obuffer);
+ oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
+ &obuffer);
if (oldaction == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(obuffer);
-
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ page = BufferGetPage(obuffer);
+ offnum = xlrec->old_offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
@@ -7852,10 +7705,10 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
- htup->t_ctid = xlrec->newtid;
+ htup->t_ctid = newtid;
/* Mark the page as a candidate for pruning */
- PageSetPrunable(page, record->xl_xid);
+ PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
@@ -7872,18 +7725,15 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
nbuffer = obuffer;
newaction = oldaction;
}
- else if (record->xl_info & XLOG_HEAP_INIT_PAGE)
+ else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
{
- XLogReadBufferForRedoExtended(lsn, record, 1,
- rnode, MAIN_FORKNUM, newblk,
- RBM_ZERO_AND_LOCK, false, &nbuffer);
+ nbuffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(nbuffer);
PageInit(page, BufferGetPageSize(nbuffer), 0);
newaction = BLK_NEEDS_REDO;
}
else
- newaction = XLogReadBufferForRedo(lsn, record, 1, rnode, newblk,
- &nbuffer);
+ newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
/*
* The visibility map may need to be fixed even if the heap page is
@@ -7891,7 +7741,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
*/
if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
{
- Relation reln = CreateFakeRelcacheEntry(xlrec->target.node);
+ Relation reln = CreateFakeRelcacheEntry(rnode);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, newblk, &vmbuffer);
@@ -7903,14 +7753,20 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
/* Deal with new tuple */
if (newaction == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(nbuffer);
+ char *recdata;
+ char *recdata_end;
+ Size datalen;
+ Size tuplen;
+
+ recdata = XLogRecGetBlockData(record, 0, &datalen);
+ recdata_end = recdata + datalen;
- offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid));
+ page = BufferGetPage(nbuffer);
+
+ offnum = xlrec->new_offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "heap_update_redo: invalid max offset number");
- recdata = (char *) xlrec + SizeOfHeapUpdate;
-
if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD)
{
Assert(newblk == oldblk);
@@ -7924,10 +7780,12 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
recdata += sizeof(uint16);
}
- memcpy((char *) &xlhdr, recdata, SizeOfHeapHeaderLen);
- recdata += SizeOfHeapHeaderLen;
+ memcpy((char *) &xlhdr, recdata, SizeOfHeapHeader);
+ recdata += SizeOfHeapHeader;
+
+ tuplen = recdata_end - recdata;
+ Assert(tuplen <= MaxHeapTupleSize);
- Assert(xlhdr.t_len + prefixlen + suffixlen <= MaxHeapTupleSize);
htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
@@ -7941,7 +7799,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
int len;
/* copy bitmap [+ padding] [+ oid] from WAL record */
- len = xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
+ len = xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
memcpy(newp, recdata, len);
recdata += len;
newp += len;
@@ -7951,7 +7809,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
newp += prefixlen;
/* copy new tuple data from WAL record */
- len = xlhdr.t_len - (xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
+ len = tuplen - (xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
memcpy(newp, recdata, len);
recdata += len;
newp += len;
@@ -7962,24 +7820,26 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
* copy bitmap [+ padding] [+ oid] + data from record, all in one
* go
*/
- memcpy(newp, recdata, xlhdr.t_len);
- recdata += xlhdr.t_len;
- newp += xlhdr.t_len;
+ memcpy(newp, recdata, tuplen);
+ recdata += tuplen;
+ newp += tuplen;
}
+ Assert(recdata == recdata_end);
+
/* copy suffix from old tuple */
if (suffixlen > 0)
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
- newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
- htup->t_infomask2 = xlhdr.header.t_infomask2;
- htup->t_infomask = xlhdr.header.t_infomask;
- htup->t_hoff = xlhdr.header.t_hoff;
+ newlen = offsetof(HeapTupleHeaderData, t_bits) + tuplen + prefixlen + suffixlen;
+ htup->t_infomask2 = xlhdr.t_infomask2;
+ htup->t_infomask = xlhdr.t_infomask;
+ htup->t_hoff = xlhdr.t_hoff;
- HeapTupleHeaderSetXmin(htup, record->xl_xid);
+ HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, FirstCommandId);
HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = xlrec->newtid;
+ htup->t_ctid = newtid;
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
@@ -7993,6 +7853,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
PageSetLSN(page, lsn);
MarkBufferDirty(nbuffer);
}
+
if (BufferIsValid(nbuffer) && nbuffer != obuffer)
UnlockReleaseBuffer(nbuffer);
if (BufferIsValid(obuffer))
@@ -8014,14 +7875,13 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
* totally accurate anyway.
*/
if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
- XLogRecordPageWithFreeSpace(xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->newtid)),
- freespace);
+ XLogRecordPageWithFreeSpace(rnode, newblk, freespace);
}
static void
-heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_lock(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
Buffer buffer;
Page page;
@@ -8029,13 +7889,11 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
ItemId lp = NULL;
HeapTupleHeader htup;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
- ItemPointerGetBlockNumber(&xlrec->target.tid),
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
@@ -8055,7 +7913,9 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
{
HeapTupleHeaderClearHotUpdated(htup);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = xlrec->target.tid;
+ ItemPointerSet(&htup->t_ctid,
+ BufferGetBlockNumber(buffer),
+ offnum);
}
HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
@@ -8067,22 +7927,23 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
}
static void
-heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_lock_updated(XLogReaderState *record)
{
- xl_heap_lock_updated *xlrec =
- (xl_heap_lock_updated *) XLogRecGetData(record);
+ XLogRecPtr lsn = record->EndRecPtr;
+ xl_heap_lock_updated *xlrec;
Buffer buffer;
Page page;
OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->target.tid)),
- &buffer) == BLK_NEEDS_REDO)
+ xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
+
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
@@ -8103,8 +7964,9 @@ heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
}
static void
-heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_inplace(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
Buffer buffer;
Page page;
@@ -8112,15 +7974,15 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
ItemId lp = NULL;
HeapTupleHeader htup;
uint32 oldlen;
- uint32 newlen;
+ Size newlen;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->target.tid)),
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ char *newtup = XLogRecGetBlockData(record, 0, &newlen);
+
page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
@@ -8130,13 +7992,10 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
htup = (HeapTupleHeader) PageGetItem(page, lp);
oldlen = ItemIdGetLength(lp) - htup->t_hoff;
- newlen = record->xl_len - SizeOfHeapInplace;
if (oldlen != newlen)
elog(PANIC, "heap_inplace_redo: wrong tuple length");
- memcpy((char *) htup + htup->t_hoff,
- (char *) xlrec + SizeOfHeapInplace,
- newlen);
+ memcpy((char *) htup + htup->t_hoff, newtup, newlen);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -8146,9 +8005,9 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
}
void
-heap_redo(XLogRecPtr lsn, XLogRecord *record)
+heap_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/*
* These operations don't overwrite MVCC data so no conflict processing is
@@ -8158,22 +8017,22 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record)
switch (info & XLOG_HEAP_OPMASK)
{
case XLOG_HEAP_INSERT:
- heap_xlog_insert(lsn, record);
+ heap_xlog_insert(record);
break;
case XLOG_HEAP_DELETE:
- heap_xlog_delete(lsn, record);
+ heap_xlog_delete(record);
break;
case XLOG_HEAP_UPDATE:
- heap_xlog_update(lsn, record, false);
+ heap_xlog_update(record, false);
break;
case XLOG_HEAP_HOT_UPDATE:
- heap_xlog_update(lsn, record, true);
+ heap_xlog_update(record, true);
break;
case XLOG_HEAP_LOCK:
- heap_xlog_lock(lsn, record);
+ heap_xlog_lock(record);
break;
case XLOG_HEAP_INPLACE:
- heap_xlog_inplace(lsn, record);
+ heap_xlog_inplace(record);
break;
default:
elog(PANIC, "heap_redo: unknown op code %u", info);
@@ -8181,29 +8040,29 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record)
}
void
-heap2_redo(XLogRecPtr lsn, XLogRecord *record)
+heap2_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info & XLOG_HEAP_OPMASK)
{
case XLOG_HEAP2_CLEAN:
- heap_xlog_clean(lsn, record);
+ heap_xlog_clean(record);
break;
case XLOG_HEAP2_FREEZE_PAGE:
- heap_xlog_freeze_page(lsn, record);
+ heap_xlog_freeze_page(record);
break;
case XLOG_HEAP2_CLEANUP_INFO:
- heap_xlog_cleanup_info(lsn, record);
+ heap_xlog_cleanup_info(record);
break;
case XLOG_HEAP2_VISIBLE:
- heap_xlog_visible(lsn, record);
+ heap_xlog_visible(record);
break;
case XLOG_HEAP2_MULTI_INSERT:
- heap_xlog_multi_insert(lsn, record);
+ heap_xlog_multi_insert(record);
break;
case XLOG_HEAP2_LOCK_UPDATED:
- heap_xlog_lock_updated(lsn, record);
+ heap_xlog_lock_updated(record);
break;
case XLOG_HEAP2_NEW_CID:
@@ -8213,7 +8072,7 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
*/
break;
case XLOG_HEAP2_REWRITE:
- heap_xlog_logical_rewrite(lsn, record);
+ heap_xlog_logical_rewrite(record);
break;
default:
elog(PANIC, "heap2_redo: unknown op code %u", info);
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index bea52460a08..4b132b7d016 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -865,7 +865,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
- XLogRecData rdata[2];
char *waldata;
char *waldata_start;
xl_heap_rewrite_mapping xlrec;
@@ -889,11 +888,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
xlrec.offset = src->off;
xlrec.start_lsn = state->rs_begin_lsn;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = sizeof(xlrec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
/* write all mappings consecutively */
len = src->num_mappings * sizeof(LogicalRewriteMappingData);
waldata_start = waldata = palloc(len);
@@ -934,13 +928,12 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
written, len)));
src->off += len;
- rdata[1].data = waldata_start;
- rdata[1].len = len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
+ XLogRegisterData(waldata_start, len);
/* write xlog record */
- XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata);
+ XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE);
pfree(waldata_start);
}
@@ -1123,7 +1116,7 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
* Replay XLOG_HEAP2_REWRITE records
*/
void
-heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
+heap_xlog_logical_rewrite(XLogReaderState *r)
{
char path[MAXPGPATH];
int fd;
@@ -1138,7 +1131,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
xlrec->mapped_db, xlrec->mapped_rel,
(uint32) (xlrec->start_lsn >> 32),
(uint32) xlrec->start_lsn,
- xlrec->mapped_xid, r->xl_xid);
+ xlrec->mapped_xid, XLogRecGetXid(r));
fd = OpenTransientFile(path,
O_CREAT | O_WRONLY | PG_BINARY,
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index bcaba7e5e84..2c4f9904e1a 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -837,37 +837,25 @@ _bt_insertonpg(Relation rel,
if (RelationNeedsWAL(rel))
{
xl_btree_insert xlrec;
- BlockNumber xlleftchild;
xl_btree_metadata xlmeta;
uint8 xlinfo;
XLogRecPtr recptr;
- XLogRecData rdata[4];
- XLogRecData *nextrdata;
IndexTupleData trunctuple;
- xlrec.target.node = rel->rd_node;
- ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
+ xlrec.offnum = itup_off;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeInsert;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = nextrdata = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
if (P_ISLEAF(lpageop))
xlinfo = XLOG_BTREE_INSERT_LEAF;
else
{
/*
- * Include the block number of the left child, whose
- * INCOMPLETE_SPLIT flag was cleared.
+ * Register the left child whose INCOMPLETE_SPLIT flag was
+ * cleared.
*/
- xlleftchild = BufferGetBlockNumber(cbuf);
- nextrdata->data = (char *) &xlleftchild;
- nextrdata->len = sizeof(BlockNumber);
- nextrdata->buffer = cbuf;
- nextrdata->buffer_std = true;
- nextrdata->next = nextrdata + 1;
- nextrdata++;
+ XLogRegisterBuffer(1, cbuf, REGBUF_STANDARD);
xlinfo = XLOG_BTREE_INSERT_UPPER;
}
@@ -879,33 +867,25 @@ _bt_insertonpg(Relation rel,
xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel;
- nextrdata->data = (char *) &xlmeta;
- nextrdata->len = sizeof(xl_btree_metadata);
- nextrdata->buffer = InvalidBuffer;
- nextrdata->next = nextrdata + 1;
- nextrdata++;
+ XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+ XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
xlinfo = XLOG_BTREE_INSERT_META;
}
/* Read comments in _bt_pgaddtup */
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
{
trunctuple = *itup;
trunctuple.t_info = sizeof(IndexTupleData);
- nextrdata->data = (char *) &trunctuple;
- nextrdata->len = sizeof(IndexTupleData);
+ XLogRegisterBufData(0, (char *) &trunctuple,
+ sizeof(IndexTupleData));
}
else
- {
- nextrdata->data = (char *) itup;
- nextrdata->len = IndexTupleDSize(*itup);
- }
- nextrdata->buffer = buf;
- nextrdata->buffer_std = true;
- nextrdata->next = NULL;
+ XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
- recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, xlinfo);
if (BufferIsValid(metabuf))
{
@@ -1260,56 +1240,37 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
xl_btree_split xlrec;
uint8 xlinfo;
XLogRecPtr recptr;
- XLogRecData rdata[7];
- XLogRecData *lastrdata;
- BlockNumber cblkno;
-
- xlrec.node = rel->rd_node;
- xlrec.leftsib = origpagenumber;
- xlrec.rightsib = rightpagenumber;
- xlrec.rnext = ropaque->btpo_next;
+
xlrec.level = ropaque->btpo.level;
xlrec.firstright = firstright;
+ xlrec.newitemoff = newitemoff;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeSplit;
- rdata[0].buffer = InvalidBuffer;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeSplit);
- lastrdata = &rdata[0];
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuf, REGBUF_WILL_INIT);
+ /* Log the right sibling, because we've changed its prev-pointer. */
+ if (!P_RIGHTMOST(ropaque))
+ XLogRegisterBuffer(2, sbuf, REGBUF_STANDARD);
+ if (BufferIsValid(cbuf))
+ XLogRegisterBuffer(3, cbuf, REGBUF_STANDARD);
/*
- * Log the new item and its offset, if it was inserted on the left
- * page. (If it was put on the right page, we don't need to explicitly
- * WAL log it because it's included with all the other items on the
- * right page.) Show the new item as belonging to the left page
- * buffer, so that it is not stored if XLogInsert decides it needs a
- * full-page image of the left page. We store the offset anyway,
- * though, to support archive compression of these records.
+ * Log the new item, if it was inserted on the left page. (If it was
+ * put on the right page, we don't need to explicitly WAL log it
+ * because it's included with all the other items on the right page.)
+ * Show the new item as belonging to the left page buffer, so that it
+ * is not stored if XLogInsert decides it needs a full-page image of
+ * the left page. We store the offset anyway, though, to support
+ * archive compression of these records.
*/
if (newitemonleft)
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = (char *) &newitemoff;
- lastrdata->len = sizeof(OffsetNumber);
- lastrdata->buffer = InvalidBuffer;
-
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = (char *) newitem;
- lastrdata->len = MAXALIGN(newitemsz);
- lastrdata->buffer = buf; /* backup block 0 */
- lastrdata->buffer_std = true;
- }
+ XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz));
/* Log left page */
if (!isleaf)
{
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
/*
* We must also log the left page's high key, because the right
* page's leftmost key is suppressed on non-leaf levels. Show it
@@ -1319,43 +1280,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
*/
itemid = PageGetItemId(origpage, P_HIKEY);
item = (IndexTuple) PageGetItem(origpage, itemid);
- lastrdata->data = (char *) item;
- lastrdata->len = MAXALIGN(IndexTupleSize(item));
- lastrdata->buffer = buf; /* backup block 0 */
- lastrdata->buffer_std = true;
- }
-
- if (isleaf && !newitemonleft)
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- /*
- * Although we don't need to WAL-log anything on the left page, we
- * still need XLogInsert to consider storing a full-page image of
- * the left page, so make an empty entry referencing that buffer.
- * This also ensures that the left page is always backup block 0.
- */
- lastrdata->data = NULL;
- lastrdata->len = 0;
- lastrdata->buffer = buf; /* backup block 0 */
- lastrdata->buffer_std = true;
- }
-
- /*
- * Log block number of left child, whose INCOMPLETE_SPLIT flag this
- * insertion clears.
- */
- if (!isleaf)
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- cblkno = BufferGetBlockNumber(cbuf);
- lastrdata->data = (char *) &cblkno;
- lastrdata->len = sizeof(BlockNumber);
- lastrdata->buffer = cbuf; /* backup block 1 */
- lastrdata->buffer_std = true;
+ XLogRegisterBufData(0, (char *) item, MAXALIGN(IndexTupleSize(item)));
}
/*
@@ -1370,35 +1295,16 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
* and so the item pointers can be reconstructed. See comments for
* _bt_restore_page().
*/
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = (char *) rightpage +
- ((PageHeader) rightpage)->pd_upper;
- lastrdata->len = ((PageHeader) rightpage)->pd_special -
- ((PageHeader) rightpage)->pd_upper;
- lastrdata->buffer = InvalidBuffer;
-
- /* Log the right sibling, because we've changed its' prev-pointer. */
- if (!P_RIGHTMOST(ropaque))
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = NULL;
- lastrdata->len = 0;
- lastrdata->buffer = sbuf; /* bkp block 1 (leaf) or 2 (non-leaf) */
- lastrdata->buffer_std = true;
- }
-
- lastrdata->next = NULL;
+ XLogRegisterBufData(1,
+ (char *) rightpage + ((PageHeader) rightpage)->pd_upper,
+ ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper);
if (isroot)
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
else
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
- recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, xlinfo);
PageSetLSN(origpage, recptr);
PageSetLSN(rightpage, recptr);
@@ -2090,34 +1996,35 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
{
xl_btree_newroot xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[3];
+ xl_btree_metadata md;
- xlrec.node = rel->rd_node;
xlrec.rootblk = rootblknum;
xlrec.level = metad->btm_level;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeNewroot;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
+
+ XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+ XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+ md.root = rootblknum;
+ md.level = metad->btm_level;
+ md.fastroot = rootblknum;
+ md.fastlevel = metad->btm_level;
+
+ XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
/*
* Direct access to page is not good but faster - we should implement
* some new func in page API.
*/
- rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;
- rdata[1].len = ((PageHeader) rootpage)->pd_special -
- ((PageHeader) rootpage)->pd_upper;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &(rdata[2]);
-
- /* Make a full-page image of the left child if needed */
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = lbuf;
- rdata[2].next = NULL;
-
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
+ XLogRegisterBufData(0,
+ (char *) rootpage + ((PageHeader) rootpage)->pd_upper,
+ ((PageHeader) rootpage)->pd_special -
+ ((PageHeader) rootpage)->pd_upper);
+
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
PageSetLSN(lpage, recptr);
PageSetLSN(rootpage, recptr);
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index ea95ce6e1ec..a25dafeb400 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -236,18 +236,25 @@ _bt_getroot(Relation rel, int access)
{
xl_btree_newroot xlrec;
XLogRecPtr recptr;
- XLogRecData rdata;
+ xl_btree_metadata md;
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+ md.root = rootblkno;
+ md.level = 0;
+ md.fastroot = rootblkno;
+ md.fastlevel = 0;
+
+ XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
- xlrec.node = rel->rd_node;
xlrec.rootblk = rootblkno;
xlrec.level = 0;
- rdata.data = (char *) &xlrec;
- rdata.len = SizeOfBtreeNewroot;
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
PageSetLSN(rootpage, recptr);
PageSetLSN(metapg, recptr);
@@ -528,39 +535,23 @@ _bt_checkpage(Relation rel, Buffer buf)
static void
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{
- if (!RelationNeedsWAL(rel))
- return;
-
- /* No ereport(ERROR) until changes are logged */
- START_CRIT_SECTION();
+ xl_btree_reuse_page xlrec_reuse;
/*
- * We don't do MarkBufferDirty here because we're about to initialise the
- * page, and nobody else can see it yet.
+ * Note that we don't register the buffer with the record, because this
+ * operation doesn't modify the page. This record only exists to provide a
+ * conflict point for Hot Standby.
*/
/* XLOG stuff */
- {
- XLogRecData rdata[1];
- xl_btree_reuse_page xlrec_reuse;
+ xlrec_reuse.node = rel->rd_node;
+ xlrec_reuse.block = blkno;
+ xlrec_reuse.latestRemovedXid = latestRemovedXid;
- xlrec_reuse.node = rel->rd_node;
- xlrec_reuse.block = blkno;
- xlrec_reuse.latestRemovedXid = latestRemovedXid;
- rdata[0].data = (char *) &xlrec_reuse;
- rdata[0].len = SizeOfBtreeReusePage;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
- XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
-
- /*
- * We don't do PageSetLSN here because we're about to initialise the
- * page, so no need.
- */
- }
-
- END_CRIT_SECTION();
+ XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
}
/*
@@ -633,7 +624,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
* WAL record that will allow us to conflict with queries
* running on standby.
*/
- if (XLogStandbyInfoActive())
+ if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -830,17 +821,13 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
if (RelationNeedsWAL(rel))
{
XLogRecPtr recptr;
- XLogRecData rdata[2];
xl_btree_vacuum xlrec_vacuum;
- xlrec_vacuum.node = rel->rd_node;
- xlrec_vacuum.block = BufferGetBlockNumber(buf);
-
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
- rdata[0].data = (char *) &xlrec_vacuum;
- rdata[0].len = SizeOfBtreeVacuum;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
/*
* The target-offsets array is not in the buffer, but pretend that it
@@ -848,20 +835,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
* need not be stored too.
*/
if (nitems > 0)
- {
- rdata[1].data = (char *) itemnos;
- rdata[1].len = nitems * sizeof(OffsetNumber);
- }
- else
- {
- rdata[1].data = NULL;
- rdata[1].len = 0;
- }
- rdata[1].buffer = buf;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber));
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
PageSetLSN(page, recptr);
}
@@ -919,36 +895,23 @@ _bt_delitems_delete(Relation rel, Buffer buf,
if (RelationNeedsWAL(rel))
{
XLogRecPtr recptr;
- XLogRecData rdata[3];
xl_btree_delete xlrec_delete;
- xlrec_delete.node = rel->rd_node;
xlrec_delete.hnode = heapRel->rd_node;
- xlrec_delete.block = BufferGetBlockNumber(buf);
xlrec_delete.nitems = nitems;
- rdata[0].data = (char *) &xlrec_delete;
- rdata[0].len = SizeOfBtreeDelete;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
/*
* We need the target-offsets array whether or not we store the whole
* buffer, to allow us to find the latestRemovedXid on a standby
* server.
*/
- rdata[1].data = (char *) itemnos;
- rdata[1].len = nitems * sizeof(OffsetNumber);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = buf;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
+ XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
PageSetLSN(page, recptr);
}
@@ -1493,33 +1456,26 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
{
xl_btree_mark_page_halfdead xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.target.node = rel->rd_node;
- ItemPointerSet(&(xlrec.target.tid), BufferGetBlockNumber(topparent), topoff);
+ xlrec.poffset = topoff;
xlrec.leafblk = leafblkno;
if (target != leafblkno)
xlrec.topparent = target;
else
xlrec.topparent = InvalidBlockNumber;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, topparent, REGBUF_STANDARD);
+
page = BufferGetPage(leafbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
xlrec.leftblk = opaque->btpo_prev;
xlrec.rightblk = opaque->btpo_next;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeMarkPageHalfDead;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = topparent;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead);
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
page = BufferGetPage(topparent);
PageSetLSN(page, recptr);
@@ -1826,63 +1782,44 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
xl_btree_metadata xlmeta;
uint8 xlinfo;
XLogRecPtr recptr;
- XLogRecData rdata[4];
- XLogRecData *nextrdata;
- xlrec.node = rel->rd_node;
+ XLogBeginInsert();
+
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+ if (BufferIsValid(lbuf))
+ XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+ XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
+ if (target != leafblkno)
+ XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
/* information on the unlinked block */
- xlrec.deadblk = target;
xlrec.leftsib = leftsib;
xlrec.rightsib = rightsib;
xlrec.btpo_xact = opaque->btpo.xact;
/* information needed to recreate the leaf block (if not the target) */
- xlrec.leafblk = leafblkno;
xlrec.leafleftsib = leafleftsib;
xlrec.leafrightsib = leafrightsib;
xlrec.topparent = nextchild;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeUnlinkPage;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = nextrdata = &(rdata[1]);
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
if (BufferIsValid(metabuf))
{
+ XLogRegisterBuffer(4, metabuf, REGBUF_WILL_INIT);
+
xlmeta.root = metad->btm_root;
xlmeta.level = metad->btm_level;
xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel;
- nextrdata->data = (char *) &xlmeta;
- nextrdata->len = sizeof(xl_btree_metadata);
- nextrdata->buffer = InvalidBuffer;
- nextrdata->next = nextrdata + 1;
- nextrdata++;
+ XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
}
else
xlinfo = XLOG_BTREE_UNLINK_PAGE;
- nextrdata->data = NULL;
- nextrdata->len = 0;
- nextrdata->buffer = rbuf;
- nextrdata->buffer_std = true;
- nextrdata->next = NULL;
-
- if (BufferIsValid(lbuf))
- {
- nextrdata->next = nextrdata + 1;
- nextrdata++;
- nextrdata->data = NULL;
- nextrdata->len = 0;
- nextrdata->buffer = lbuf;
- nextrdata->buffer_std = true;
- nextrdata->next = NULL;
- }
-
- recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, xlinfo);
if (BufferIsValid(metabuf))
{
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 13951be62af..52aef9b9836 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -72,17 +72,23 @@ _bt_restore_page(Page page, char *from, int len)
}
static void
-_bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
- BlockNumber root, uint32 level,
- BlockNumber fastroot, uint32 fastlevel)
+_bt_restore_meta(XLogReaderState *record, uint8 block_id)
{
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer metabuf;
Page metapg;
BTMetaPageData *md;
BTPageOpaque pageop;
+ xl_btree_metadata *xlrec;
+ char *ptr;
+ Size len;
- metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true);
- Assert(BufferIsValid(metabuf));
+ metabuf = XLogInitBufferForRedo(record, block_id);
+ ptr = XLogRecGetBlockData(record, block_id, &len);
+
+ Assert(len == sizeof(xl_btree_metadata));
+ Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
+ xlrec = (xl_btree_metadata *) ptr;
metapg = BufferGetPage(metabuf);
_bt_pageinit(metapg, BufferGetPageSize(metabuf));
@@ -90,10 +96,10 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
md = BTPageGetMeta(metapg);
md->btm_magic = BTREE_MAGIC;
md->btm_version = BTREE_VERSION;
- md->btm_root = root;
- md->btm_level = level;
- md->btm_fastroot = fastroot;
- md->btm_fastlevel = fastlevel;
+ md->btm_root = xlrec->root;
+ md->btm_level = xlrec->level;
+ md->btm_fastroot = xlrec->fastroot;
+ md->btm_fastlevel = xlrec->fastlevel;
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
pageop->btpo_flags = BTP_META;
@@ -117,14 +123,12 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
* types that can insert a downlink: insert, split, and newroot.
*/
static void
-_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
- int block_index,
- RelFileNode rnode, BlockNumber cblock)
+_bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
{
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer buf;
- if (XLogReadBufferForRedo(lsn, record, block_index, rnode, cblock, &buf)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(buf);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -140,38 +144,12 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
}
static void
-btree_xlog_insert(bool isleaf, bool ismeta,
- XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
Buffer buffer;
Page page;
- char *datapos;
- int datalen;
- xl_btree_metadata md;
- BlockNumber cblkno = 0;
- int main_blk_index;
-
- datapos = (char *) xlrec + SizeOfBtreeInsert;
- datalen = record->xl_len - SizeOfBtreeInsert;
-
- /*
- * if this insert finishes a split at lower level, extract the block
- * number of the (left) child.
- */
- if (!isleaf && (record->xl_info & XLR_BKP_BLOCK(0)) == 0)
- {
- memcpy(&cblkno, datapos, sizeof(BlockNumber));
- Assert(cblkno != 0);
- datapos += sizeof(BlockNumber);
- datalen -= sizeof(BlockNumber);
- }
- if (ismeta)
- {
- memcpy(&md, datapos, sizeof(xl_btree_metadata));
- datapos += sizeof(xl_btree_metadata);
- datalen -= sizeof(xl_btree_metadata);
- }
/*
* Insertion to an internal page finishes an incomplete split at the child
@@ -183,21 +161,15 @@ btree_xlog_insert(bool isleaf, bool ismeta,
* cannot be updates happening.
*/
if (!isleaf)
+ _bt_clear_incomplete_split(record, 1);
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
- _bt_clear_incomplete_split(lsn, record, 0, xlrec->target.node, cblkno);
- main_blk_index = 1;
- }
- else
- main_blk_index = 0;
+ Size datalen;
+ char *datapos = XLogRecGetBlockData(record, 0, &datalen);
- if (XLogReadBufferForRedo(lsn, record, main_blk_index, xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->target.tid)),
- &buffer) == BLK_NEEDS_REDO)
- {
page = BufferGetPage(buffer);
- if (PageAddItem(page, (Item) datapos, datalen,
- ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
+ if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
false, false) == InvalidOffsetNumber)
elog(PANIC, "btree_insert_redo: failed to add item");
@@ -215,15 +187,13 @@ btree_xlog_insert(bool isleaf, bool ismeta,
* obsolete link from the metapage.
*/
if (ismeta)
- _bt_restore_meta(xlrec->target.node, lsn,
- md.root, md.level,
- md.fastroot, md.fastlevel);
+ _bt_restore_meta(record, 2);
}
static void
-btree_xlog_split(bool onleft, bool isroot,
- XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
bool isleaf = (xlrec->level == 0);
Buffer lbuf;
@@ -231,56 +201,17 @@ btree_xlog_split(bool onleft, bool isroot,
Page rpage;
BTPageOpaque ropaque;
char *datapos;
- int datalen;
- OffsetNumber newitemoff = 0;
- Item newitem = NULL;
- Size newitemsz = 0;
+ Size datalen;
Item left_hikey = NULL;
Size left_hikeysz = 0;
- BlockNumber cblkno = InvalidBlockNumber;
-
- datapos = (char *) xlrec + SizeOfBtreeSplit;
- datalen = record->xl_len - SizeOfBtreeSplit;
-
- /* Extract newitemoff and newitem, if present */
- if (onleft)
- {
- memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
- datapos += sizeof(OffsetNumber);
- datalen -= sizeof(OffsetNumber);
- }
- if (onleft && !(record->xl_info & XLR_BKP_BLOCK(0)))
- {
- /*
- * We assume that 16-bit alignment is enough to apply IndexTupleSize
- * (since it's fetching from a uint16 field) and also enough for
- * PageAddItem to insert the tuple.
- */
- newitem = (Item) datapos;
- newitemsz = MAXALIGN(IndexTupleSize(newitem));
- datapos += newitemsz;
- datalen -= newitemsz;
- }
-
- /* Extract left hikey and its size (still assuming 16-bit alignment) */
- if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(0)))
- {
- left_hikey = (Item) datapos;
- left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
- datapos += left_hikeysz;
- datalen -= left_hikeysz;
- }
+ BlockNumber leftsib;
+ BlockNumber rightsib;
+ BlockNumber rnext;
- /*
- * If this insertion finishes an incomplete split, get the block number of
- * the child.
- */
- if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
- {
- memcpy(&cblkno, datapos, sizeof(BlockNumber));
- datapos += sizeof(BlockNumber);
- datalen -= sizeof(BlockNumber);
- }
+ XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
+ if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext))
+ rnext = P_NONE;
/*
* Clear the incomplete split flag on the left sibling of the child page
@@ -288,18 +219,18 @@ btree_xlog_split(bool onleft, bool isroot,
* before locking the other pages)
*/
if (!isleaf)
- _bt_clear_incomplete_split(lsn, record, 1, xlrec->node, cblkno);
+ _bt_clear_incomplete_split(record, 3);
/* Reconstruct right (new) sibling page from scratch */
- rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
- Assert(BufferIsValid(rbuf));
+ rbuf = XLogInitBufferForRedo(record, 1);
+ datapos = XLogRecGetBlockData(record, 1, &datalen);
rpage = (Page) BufferGetPage(rbuf);
_bt_pageinit(rpage, BufferGetPageSize(rbuf));
ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
- ropaque->btpo_prev = xlrec->leftsib;
- ropaque->btpo_next = xlrec->rnext;
+ ropaque->btpo_prev = leftsib;
+ ropaque->btpo_next = rnext;
ropaque->btpo.level = xlrec->level;
ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
ropaque->btpo_cycleid = 0;
@@ -324,8 +255,7 @@ btree_xlog_split(bool onleft, bool isroot,
/* don't release the buffer yet; we touch right page's first item below */
/* Now reconstruct left (original) sibling page */
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->leftsib,
- &lbuf) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO)
{
/*
* To retain the same physical order of the tuples that they had, we
@@ -339,9 +269,31 @@ btree_xlog_split(bool onleft, bool isroot,
Page lpage = (Page) BufferGetPage(lbuf);
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
OffsetNumber off;
+ Item newitem;
+ Size newitemsz = 0;
Page newlpage;
OffsetNumber leftoff;
+ datapos = XLogRecGetBlockData(record, 0, &datalen);
+
+ if (onleft)
+ {
+ newitem = (Item) datapos;
+ newitemsz = MAXALIGN(IndexTupleSize(newitem));
+ datapos += newitemsz;
+ datalen -= newitemsz;
+ }
+
+ /* Extract left hikey and its size (assuming 16-bit alignment) */
+ if (!isleaf)
+ {
+ left_hikey = (Item) datapos;
+ left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
+ datapos += left_hikeysz;
+ datalen -= left_hikeysz;
+ }
+ Assert(datalen == 0);
+
newlpage = PageGetTempPageCopySpecial(lpage);
/* Set high key */
@@ -358,7 +310,7 @@ btree_xlog_split(bool onleft, bool isroot,
Item item;
/* add the new item if it was inserted on left page */
- if (onleft && off == newitemoff)
+ if (onleft && off == xlrec->newitemoff)
{
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber)
@@ -376,7 +328,7 @@ btree_xlog_split(bool onleft, bool isroot,
}
/* cope with possibility that newitem goes at the end */
- if (onleft && off == newitemoff)
+ if (onleft && off == xlrec->newitemoff)
{
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber)
@@ -390,7 +342,7 @@ btree_xlog_split(bool onleft, bool isroot,
lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
if (isleaf)
lopaque->btpo_flags |= BTP_LEAF;
- lopaque->btpo_next = xlrec->rightsib;
+ lopaque->btpo_next = rightsib;
lopaque->btpo_cycleid = 0;
PageSetLSN(lpage, lsn);
@@ -410,22 +362,16 @@ btree_xlog_split(bool onleft, bool isroot,
* replay, because no other index update can be in progress, and readers
* will cope properly when following an obsolete left-link.
*/
- if (xlrec->rnext != P_NONE)
+ if (rnext != P_NONE)
{
- /*
- * the backup block containing right sibling is 1 or 2, depending
- * whether this was a leaf or internal page.
- */
- int rnext_index = isleaf ? 1 : 2;
Buffer buffer;
- if (XLogReadBufferForRedo(lsn, record, rnext_index, xlrec->node,
- xlrec->rnext, &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(buffer);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
- pageop->btpo_prev = xlrec->rightsib;
+ pageop->btpo_prev = rightsib;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -436,8 +382,9 @@ btree_xlog_split(bool onleft, bool isroot,
}
static void
-btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_vacuum(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
Buffer buffer;
Page page;
@@ -466,9 +413,13 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
*/
if (HotStandbyActiveInReplay())
{
+ RelFileNode thisrnode;
+ BlockNumber thisblkno;
BlockNumber blkno;
- for (blkno = xlrec->lastBlockVacuumed + 1; blkno < xlrec->block; blkno++)
+ XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
+
+ for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
{
/*
* We use RBM_NORMAL_NO_LOG mode because it's not an error
@@ -483,7 +434,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* buffer manager we could optimise this so that if the block is
* not in shared_buffers we confirm it as unpinned.
*/
- buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno,
+ buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
RBM_NORMAL_NO_LOG);
if (BufferIsValid(buffer))
{
@@ -497,20 +448,23 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf
* page. See nbtree/README for details.
*/
- if (XLogReadBufferForRedoExtended(lsn, record, 0,
- xlrec->node, MAIN_FORKNUM, xlrec->block,
- RBM_NORMAL, true, &buffer)
+ if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
== BLK_NEEDS_REDO)
{
+ char *ptr;
+ Size len;
+
+ ptr = XLogRecGetBlockData(record, 0, &len);
+
page = (Page) BufferGetPage(buffer);
- if (record->xl_len > SizeOfBtreeVacuum)
+ if (len > 0)
{
OffsetNumber *unused;
OffsetNumber *unend;
- unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum);
- unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
+ unused = (OffsetNumber *) ptr;
+ unend = (OffsetNumber *) ((char *) ptr + len);
if ((unend - unused) > 0)
PageIndexMultiDelete(page, unused, unend - unused);
@@ -542,13 +496,16 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* XXX optimise later with something like XLogPrefetchBuffer()
*/
static TransactionId
-btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
+btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
{
+ xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
OffsetNumber *unused;
Buffer ibuffer,
hbuffer;
Page ipage,
hpage;
+ RelFileNode rnode;
+ BlockNumber blkno;
ItemId iitemid,
hitemid;
IndexTuple itup;
@@ -588,9 +545,11 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
* InvalidTransactionId to cancel all HS transactions. That's probably
* overkill, but it's safe, and certainly better than panicking here.
*/
- ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
if (!BufferIsValid(ibuffer))
return InvalidTransactionId;
+ LockBuffer(ibuffer, BT_READ);
ipage = (Page) BufferGetPage(ibuffer);
/*
@@ -611,12 +570,13 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
* Locate the heap page that the index tuple points at
*/
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
- hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false);
+ hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
if (!BufferIsValid(hbuffer))
{
UnlockReleaseBuffer(ibuffer);
return InvalidTransactionId;
}
+ LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
hpage = (Page) BufferGetPage(hbuffer);
/*
@@ -678,8 +638,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
}
static void
-btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_delete(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
Buffer buffer;
Page page;
@@ -698,21 +659,23 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
*/
if (InHotStandby)
{
- TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(xlrec);
+ TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
+ RelFileNode rnode;
- ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+
+ ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
}
/*
* We don't need to take a cleanup lock to apply these changes. See
* nbtree/README for details.
*/
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
- if (record->xl_len > SizeOfBtreeDelete)
+ if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
{
OffsetNumber *unused;
@@ -736,17 +699,15 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
}
static void
-btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
- BlockNumber parent;
Buffer buffer;
Page page;
BTPageOpaque pageop;
IndexTupleData trunctuple;
- parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
-
/*
* In normal operation, we would lock all the pages this WAL record
* touches before changing any of them. In WAL replay, it should be okay
@@ -756,8 +717,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/
/* parent page */
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node, parent,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
{
OffsetNumber poffset;
ItemId itemid;
@@ -768,7 +728,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
- poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ poffset = xlrec->poffset;
nextoffset = OffsetNumberNext(poffset);
itemid = PageGetItemId(page, nextoffset);
@@ -788,8 +748,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer);
/* Rewrite the leaf page as a halfdead page */
- buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
@@ -822,17 +781,16 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
static void
-btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
- BlockNumber target;
BlockNumber leftsib;
BlockNumber rightsib;
Buffer buffer;
Page page;
BTPageOpaque pageop;
- target = xlrec->deadblk;
leftsib = xlrec->leftsib;
rightsib = xlrec->rightsib;
@@ -845,8 +803,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/
/* Fix left-link of right sibling */
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, rightsib, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -861,8 +818,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
/* Fix right-link of left sibling, if any */
if (leftsib != P_NONE)
{
- if (XLogReadBufferForRedo(lsn, record, 1, xlrec->node, leftsib, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -876,8 +832,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
}
/* Rewrite target page as empty deleted page */
- buffer = XLogReadBuffer(xlrec->node, target, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
@@ -898,7 +853,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
* itself, update the leaf to point to the next remaining child in the
* branch.
*/
- if (target != xlrec->leafblk)
+ if (XLogRecHasBlockRef(record, 3))
{
/*
* There is no real data on the page, so we just re-create it from
@@ -906,8 +861,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/
IndexTupleData trunctuple;
- buffer = XLogReadBuffer(xlrec->node, xlrec->leafblk, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 3);
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -936,27 +890,21 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
/* Update metapage if needed */
if (info == XLOG_BTREE_UNLINK_PAGE_META)
- {
- xl_btree_metadata md;
-
- memcpy(&md, (char *) xlrec + SizeOfBtreeUnlinkPage,
- sizeof(xl_btree_metadata));
- _bt_restore_meta(xlrec->node, lsn,
- md.root, md.level,
- md.fastroot, md.fastlevel);
- }
+ _bt_restore_meta(record, 4);
}
static void
-btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_newroot(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
Buffer buffer;
Page page;
BTPageOpaque pageop;
+ char *ptr;
+ Size len;
- buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
@@ -969,34 +917,24 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
pageop->btpo_flags |= BTP_LEAF;
pageop->btpo_cycleid = 0;
- if (record->xl_len > SizeOfBtreeNewroot)
+ if (xlrec->level > 0)
{
- IndexTuple itup;
- BlockNumber cblkno;
-
- _bt_restore_page(page,
- (char *) xlrec + SizeOfBtreeNewroot,
- record->xl_len - SizeOfBtreeNewroot);
- /* extract block number of the left-hand split page */
- itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY));
- cblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
- Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
+ ptr = XLogRecGetBlockData(record, 0, &len);
+ _bt_restore_page(page, ptr, len);
/* Clear the incomplete-split flag in left child */
- _bt_clear_incomplete_split(lsn, record, 0, xlrec->node, cblkno);
+ _bt_clear_incomplete_split(record, 1);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
- _bt_restore_meta(xlrec->node, lsn,
- xlrec->rootblk, xlrec->level,
- xlrec->rootblk, xlrec->level);
+ _bt_restore_meta(record, 2);
}
static void
-btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_reuse_page(XLogReaderState *record)
{
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
@@ -1015,58 +953,55 @@ btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
xlrec->node);
}
-
- /* Backup blocks are not used in reuse_page records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
}
void
-btree_redo(XLogRecPtr lsn, XLogRecord *record)
+btree_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info)
{
case XLOG_BTREE_INSERT_LEAF:
- btree_xlog_insert(true, false, lsn, record);
+ btree_xlog_insert(true, false, record);
break;
case XLOG_BTREE_INSERT_UPPER:
- btree_xlog_insert(false, false, lsn, record);
+ btree_xlog_insert(false, false, record);
break;
case XLOG_BTREE_INSERT_META:
- btree_xlog_insert(false, true, lsn, record);
+ btree_xlog_insert(false, true, record);
break;
case XLOG_BTREE_SPLIT_L:
- btree_xlog_split(true, false, lsn, record);
+ btree_xlog_split(true, false, record);
break;
case XLOG_BTREE_SPLIT_R:
- btree_xlog_split(false, false, lsn, record);
+ btree_xlog_split(false, false, record);
break;
case XLOG_BTREE_SPLIT_L_ROOT:
- btree_xlog_split(true, true, lsn, record);
+ btree_xlog_split(true, true, record);
break;
case XLOG_BTREE_SPLIT_R_ROOT:
- btree_xlog_split(false, true, lsn, record);
+ btree_xlog_split(false, true, record);
break;
case XLOG_BTREE_VACUUM:
- btree_xlog_vacuum(lsn, record);
+ btree_xlog_vacuum(record);
break;
case XLOG_BTREE_DELETE:
- btree_xlog_delete(lsn, record);
+ btree_xlog_delete(record);
break;
case XLOG_BTREE_MARK_PAGE_HALFDEAD:
- btree_xlog_mark_page_halfdead(info, lsn, record);
+ btree_xlog_mark_page_halfdead(info, record);
break;
case XLOG_BTREE_UNLINK_PAGE:
case XLOG_BTREE_UNLINK_PAGE_META:
- btree_xlog_unlink_page(info, lsn, record);
+ btree_xlog_unlink_page(info, record);
break;
case XLOG_BTREE_NEWROOT:
- btree_xlog_newroot(lsn, record);
+ btree_xlog_newroot(record);
break;
case XLOG_BTREE_REUSE_PAGE:
- btree_xlog_reuse_page(lsn, record);
+ btree_xlog_reuse_page(record);
break;
default:
elog(PANIC, "btree_redo: unknown op code %u", info);
diff --git a/src/backend/access/rmgrdesc/brindesc.c b/src/backend/access/rmgrdesc/brindesc.c
index 97dc3c0fa91..6cda6f8ffd9 100644
--- a/src/backend/access/rmgrdesc/brindesc.c
+++ b/src/backend/access/rmgrdesc/brindesc.c
@@ -17,64 +17,49 @@
#include "access/brin_xlog.h"
void
-brin_desc(StringInfo buf, XLogRecord *record)
+brin_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
info &= XLOG_BRIN_OPMASK;
if (info == XLOG_BRIN_CREATE_INDEX)
{
xl_brin_createidx *xlrec = (xl_brin_createidx *) rec;
- appendStringInfo(buf, "v%d pagesPerRange %u rel %u/%u/%u",
- xlrec->version, xlrec->pagesPerRange,
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode);
+ appendStringInfo(buf, "v%d pagesPerRange %u",
+ xlrec->version, xlrec->pagesPerRange);
}
else if (info == XLOG_BRIN_INSERT)
{
xl_brin_insert *xlrec = (xl_brin_insert *) rec;
- appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode,
- xlrec->heapBlk, xlrec->revmapBlk,
+ appendStringInfo(buf, "heapBlk %u pagesPerRange %u offnum %u",
+ xlrec->heapBlk,
xlrec->pagesPerRange,
- ItemPointerGetBlockNumber(&xlrec->tid),
- ItemPointerGetOffsetNumber(&xlrec->tid));
+ xlrec->offnum);
}
else if (info == XLOG_BRIN_UPDATE)
{
xl_brin_update *xlrec = (xl_brin_update *) rec;
- appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)",
- xlrec->insert.node.spcNode, xlrec->insert.node.dbNode,
- xlrec->insert.node.relNode,
- xlrec->insert.heapBlk, xlrec->insert.revmapBlk,
+ appendStringInfo(buf, "heapBlk %u pagesPerRange %u old offnum %u, new offnum %u",
+ xlrec->insert.heapBlk,
xlrec->insert.pagesPerRange,
- ItemPointerGetBlockNumber(&xlrec->oldtid),
- ItemPointerGetOffsetNumber(&xlrec->oldtid),
- ItemPointerGetBlockNumber(&xlrec->insert.tid),
- ItemPointerGetOffsetNumber(&xlrec->insert.tid));
+ xlrec->oldOffnum,
+ xlrec->insert.offnum);
}
else if (info == XLOG_BRIN_SAMEPAGE_UPDATE)
{
xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) rec;
- appendStringInfo(buf, "rel %u/%u/%u TID (%u,%u)",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode,
- ItemPointerGetBlockNumber(&xlrec->tid),
- ItemPointerGetOffsetNumber(&xlrec->tid));
+ appendStringInfo(buf, "offnum %u", xlrec->offnum);
}
else if (info == XLOG_BRIN_REVMAP_EXTEND)
{
xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec;
- appendStringInfo(buf, "rel %u/%u/%u targetBlk %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->targetBlk);
+ appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
}
}
diff --git a/src/backend/access/rmgrdesc/clogdesc.c b/src/backend/access/rmgrdesc/clogdesc.c
index 4a12e286e4a..8de72963e6f 100644
--- a/src/backend/access/rmgrdesc/clogdesc.c
+++ b/src/backend/access/rmgrdesc/clogdesc.c
@@ -18,10 +18,10 @@
void
-clog_desc(StringInfo buf, XLogRecord *record)
+clog_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == CLOG_ZEROPAGE || info == CLOG_TRUNCATE)
{
diff --git a/src/backend/access/rmgrdesc/dbasedesc.c b/src/backend/access/rmgrdesc/dbasedesc.c
index 446e5f97f41..ee1d83baa4c 100644
--- a/src/backend/access/rmgrdesc/dbasedesc.c
+++ b/src/backend/access/rmgrdesc/dbasedesc.c
@@ -19,10 +19,10 @@
void
-dbase_desc(StringInfo buf, XLogRecord *record)
+dbase_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_DBASE_CREATE)
{
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index 2f783cee2bb..8754214f644 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -15,17 +15,11 @@
#include "postgres.h"
#include "access/gin_private.h"
+#include "access/xlogutils.h"
#include "lib/stringinfo.h"
#include "storage/relfilenode.h"
static void
-desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
-{
- appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
- node.spcNode, node.dbNode, node.relNode, blkno);
-}
-
-static void
desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
{
int i;
@@ -77,26 +71,25 @@ desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
}
void
-gin_desc(StringInfo buf, XLogRecord *record)
+gin_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info)
{
case XLOG_GIN_CREATE_INDEX:
- desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO);
+ /* no further information */
break;
case XLOG_GIN_CREATE_PTREE:
- desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_INSERT:
{
ginxlogInsert *xlrec = (ginxlogInsert *) rec;
char *payload = rec + sizeof(ginxlogInsert);
- desc_node(buf, xlrec->node, xlrec->blkno);
- appendStringInfo(buf, " isdata: %c isleaf: %c",
+ appendStringInfo(buf, "isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
if (!(xlrec->flags & GIN_INSERT_ISLEAF))
@@ -119,7 +112,7 @@ gin_desc(StringInfo buf, XLogRecord *record)
ginxlogRecompressDataLeaf *insertData =
(ginxlogRecompressDataLeaf *) payload;
- if (record->xl_info & XLR_BKP_BLOCK(0))
+ if (XLogRecHasBlockImage(record, 0))
appendStringInfo(buf, " (full page image)");
else
desc_recompress_leaf(buf, insertData);
@@ -139,39 +132,38 @@ gin_desc(StringInfo buf, XLogRecord *record)
{
ginxlogSplit *xlrec = (ginxlogSplit *) rec;
- desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
- appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
+ appendStringInfo(buf, "isrootsplit: %c",
+ (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
appendStringInfo(buf, " isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
}
break;
case XLOG_GIN_VACUUM_PAGE:
- desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
{
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
- desc_node(buf, xlrec->node, xlrec->blkno);
- if (record->xl_info & XLR_BKP_BLOCK(0))
+ if (XLogRecHasBlockImage(record, 0))
appendStringInfo(buf, " (full page image)");
else
desc_recompress_leaf(buf, &xlrec->data);
}
break;
case XLOG_GIN_DELETE_PAGE:
- desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_UPDATE_META_PAGE:
- desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, GIN_METAPAGE_BLKNO);
+ /* no further information */
break;
case XLOG_GIN_INSERT_LISTPAGE:
- desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_DELETE_LISTPAGE:
- appendStringInfo(buf, "%d pages, ", ((ginxlogDeleteListPages *) rec)->ndeleted);
- desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, GIN_METAPAGE_BLKNO);
+ appendStringInfo(buf, "ndeleted: %d",
+ ((ginxlogDeleteListPages *) rec)->ndeleted);
break;
}
}
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index db3ba13ccdd..576c644c2ac 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -19,33 +19,22 @@
#include "storage/relfilenode.h"
static void
-out_target(StringInfo buf, RelFileNode node)
-{
- appendStringInfo(buf, "rel %u/%u/%u",
- node.spcNode, node.dbNode, node.relNode);
-}
-
-static void
out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
{
- out_target(buf, xlrec->node);
- appendStringInfo(buf, "; block number %u", xlrec->blkno);
}
static void
out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
{
- appendStringInfoString(buf, "page_split: ");
- out_target(buf, xlrec->node);
- appendStringInfo(buf, "; block number %u splits to %d pages",
- xlrec->origblkno, xlrec->npage);
+ appendStringInfo(buf, "page_split: splits to %d pages",
+ xlrec->npage);
}
void
-gist_desc(StringInfo buf, XLogRecord *record)
+gist_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info)
{
@@ -56,10 +45,6 @@ gist_desc(StringInfo buf, XLogRecord *record)
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
case XLOG_GIST_CREATE_INDEX:
- appendStringInfo(buf, "rel %u/%u/%u",
- ((RelFileNode *) rec)->spcNode,
- ((RelFileNode *) rec)->dbNode,
- ((RelFileNode *) rec)->relNode);
break;
}
}
diff --git a/src/backend/access/rmgrdesc/hashdesc.c b/src/backend/access/rmgrdesc/hashdesc.c
index c58461c6ffc..71afaa9cbd6 100644
--- a/src/backend/access/rmgrdesc/hashdesc.c
+++ b/src/backend/access/rmgrdesc/hashdesc.c
@@ -17,7 +17,7 @@
#include "access/hash.h"
void
-hash_desc(StringInfo buf, XLogRecord *record)
+hash_desc(StringInfo buf, XLogReaderState *record)
{
}
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index ee2c073f71f..958b0b0e85c 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -17,15 +17,6 @@
#include "access/heapam_xlog.h"
static void
-out_target(StringInfo buf, xl_heaptid *target)
-{
- appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
- target->node.spcNode, target->node.dbNode, target->node.relNode,
- ItemPointerGetBlockNumber(&(target->tid)),
- ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
-static void
out_infobits(StringInfo buf, uint8 infobits)
{
if (infobits & XLHL_XMAX_IS_MULTI)
@@ -41,23 +32,23 @@ out_infobits(StringInfo buf, uint8 infobits)
}
void
-heap_desc(StringInfo buf, XLogRecord *record)
+heap_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
info &= XLOG_HEAP_OPMASK;
if (info == XLOG_HEAP_INSERT)
{
xl_heap_insert *xlrec = (xl_heap_insert *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
}
else if (info == XLOG_HEAP_DELETE)
{
xl_heap_delete *xlrec = (xl_heap_delete *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
appendStringInfoChar(buf, ' ');
out_infobits(buf, xlrec->infobits_set);
}
@@ -65,24 +56,24 @@ heap_desc(StringInfo buf, XLogRecord *record)
{
xl_heap_update *xlrec = (xl_heap_update *) rec;
- out_target(buf, &(xlrec->target));
- appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+ appendStringInfo(buf, "off %u xmax %u",
+ xlrec->old_offnum,
+ xlrec->old_xmax);
out_infobits(buf, xlrec->old_infobits_set);
- appendStringInfo(buf, "; new tid %u/%u xmax %u",
- ItemPointerGetBlockNumber(&(xlrec->newtid)),
- ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+ appendStringInfo(buf, "; new off %u xmax %u",
+ xlrec->new_offnum,
xlrec->new_xmax);
}
else if (info == XLOG_HEAP_HOT_UPDATE)
{
xl_heap_update *xlrec = (xl_heap_update *) rec;
- out_target(buf, &(xlrec->target));
- appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+ appendStringInfo(buf, "off %u xmax %u",
+ xlrec->old_offnum,
+ xlrec->old_xmax);
out_infobits(buf, xlrec->old_infobits_set);
- appendStringInfo(buf, "; new tid %u/%u xmax %u",
- ItemPointerGetBlockNumber(&(xlrec->newtid)),
- ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+ appendStringInfo(buf, "; new off %u xmax %u",
+ xlrec->new_offnum,
xlrec->new_xmax);
}
else if (info == XLOG_HEAP_LOCK)
@@ -90,40 +81,34 @@ heap_desc(StringInfo buf, XLogRecord *record)
xl_heap_lock *xlrec = (xl_heap_lock *) rec;
appendStringInfo(buf, "xid %u: ", xlrec->locking_xid);
- out_target(buf, &(xlrec->target));
- appendStringInfoChar(buf, ' ');
+ appendStringInfo(buf, "off %u ", xlrec->offnum);
out_infobits(buf, xlrec->infobits_set);
}
else if (info == XLOG_HEAP_INPLACE)
{
xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
}
}
void
-heap2_desc(StringInfo buf, XLogRecord *record)
+heap2_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
info &= XLOG_HEAP_OPMASK;
if (info == XLOG_HEAP2_CLEAN)
{
xl_heap_clean *xlrec = (xl_heap_clean *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u remxid %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block,
- xlrec->latestRemovedXid);
+ appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid);
}
else if (info == XLOG_HEAP2_FREEZE_PAGE)
{
xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block,
+ appendStringInfo(buf, "cutoff xid %u ntuples %u",
xlrec->cutoff_xid, xlrec->ntuples);
}
else if (info == XLOG_HEAP2_CLEANUP_INFO)
@@ -136,17 +121,13 @@ heap2_desc(StringInfo buf, XLogRecord *record)
{
xl_heap_visible *xlrec = (xl_heap_visible *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block);
+ appendStringInfo(buf, "cutoff xid %u", xlrec->cutoff_xid);
}
else if (info == XLOG_HEAP2_MULTI_INSERT)
{
xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples",
- xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
- xlrec->blkno, xlrec->ntuples);
+ appendStringInfo(buf, "%d tuples", xlrec->ntuples);
}
else if (info == XLOG_HEAP2_LOCK_UPDATED)
{
@@ -154,13 +135,18 @@ heap2_desc(StringInfo buf, XLogRecord *record)
appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax,
xlrec->infobits_set);
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
}
else if (info == XLOG_HEAP2_NEW_CID)
{
xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
+ xlrec->target_node.spcNode,
+ xlrec->target_node.dbNode,
+ xlrec->target_node.relNode,
+ ItemPointerGetBlockNumber(&(xlrec->target_tid)),
+ ItemPointerGetOffsetNumber(&(xlrec->target_tid)));
appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u",
xlrec->cmin, xlrec->cmax, xlrec->combocid);
}
diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c
index afc5aca1972..0902cb73c6a 100644
--- a/src/backend/access/rmgrdesc/mxactdesc.c
+++ b/src/backend/access/rmgrdesc/mxactdesc.c
@@ -47,10 +47,10 @@ out_member(StringInfo buf, MultiXactMember *member)
}
void
-multixact_desc(StringInfo buf, XLogRecord *record)
+multixact_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE ||
info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index 8b63f2b6ba9..85795f6409d 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -16,20 +16,11 @@
#include "access/nbtree.h"
-static void
-out_target(StringInfo buf, xl_btreetid *target)
-{
- appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
- target->node.spcNode, target->node.dbNode, target->node.relNode,
- ItemPointerGetBlockNumber(&(target->tid)),
- ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
void
-btree_desc(StringInfo buf, XLogRecord *record)
+btree_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info)
{
@@ -39,7 +30,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
{
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
break;
}
case XLOG_BTREE_SPLIT_L:
@@ -49,11 +40,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
{
xl_btree_split *xlrec = (xl_btree_split *) rec;
- appendStringInfo(buf, "rel %u/%u/%u ",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode);
- appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
- xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
+ appendStringInfo(buf, "level %u, firstright %d",
xlrec->level, xlrec->firstright);
break;
}
@@ -61,9 +48,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
{
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u, lastBlockVacuumed %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block,
+ appendStringInfo(buf, "lastBlockVacuumed %u",
xlrec->lastBlockVacuumed);
break;
}
@@ -71,18 +56,14 @@ btree_desc(StringInfo buf, XLogRecord *record)
{
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
- appendStringInfo(buf, "index %u/%u/%u; iblk %u, heap %u/%u/%u;",
- xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
- xlrec->block,
- xlrec->hnode.spcNode, xlrec->hnode.dbNode, xlrec->hnode.relNode);
+ appendStringInfo(buf, "%d items", xlrec->nitems);
break;
}
case XLOG_BTREE_MARK_PAGE_HALFDEAD:
{
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec;
- out_target(buf, &(xlrec->target));
- appendStringInfo(buf, "; topparent %u; leaf %u; left %u; right %u",
+ appendStringInfo(buf, "topparent %u; leaf %u; left %u; right %u",
xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
break;
}
@@ -91,22 +72,19 @@ btree_desc(StringInfo buf, XLogRecord *record)
{
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; ",
- xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
- appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ",
- xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
- appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u",
- xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
+ appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ",
+ xlrec->leftsib, xlrec->rightsib,
+ xlrec->btpo_xact);
+ appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u",
+ xlrec->leafleftsib, xlrec->leafrightsib,
+ xlrec->topparent);
break;
}
case XLOG_BTREE_NEWROOT:
{
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; root %u lev %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode,
- xlrec->rootblk, xlrec->level);
+ appendStringInfo(buf, "lev %u", xlrec->level);
break;
}
case XLOG_BTREE_REUSE_PAGE:
@@ -115,7 +93,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u",
xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->latestRemovedXid);
+ xlrec->node.relNode, xlrec->latestRemovedXid);
break;
}
}
diff --git a/src/backend/access/rmgrdesc/relmapdesc.c b/src/backend/access/rmgrdesc/relmapdesc.c
index ef7c533fe5f..5bda1da25c4 100644
--- a/src/backend/access/rmgrdesc/relmapdesc.c
+++ b/src/backend/access/rmgrdesc/relmapdesc.c
@@ -17,10 +17,10 @@
#include "utils/relmapper.h"
void
-relmap_desc(StringInfo buf, XLogRecord *record)
+relmap_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_RELMAP_UPDATE)
{
diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqdesc.c
index 73de3969df4..b8da96310cb 100644
--- a/src/backend/access/rmgrdesc/seqdesc.c
+++ b/src/backend/access/rmgrdesc/seqdesc.c
@@ -18,10 +18,10 @@
void
-seq_desc(StringInfo buf, XLogRecord *record)
+seq_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
xl_seq_rec *xlrec = (xl_seq_rec *) rec;
if (info == XLOG_SEQ_LOG)
diff --git a/src/backend/access/rmgrdesc/smgrdesc.c b/src/backend/access/rmgrdesc/smgrdesc.c
index 109e3eaf04d..4e8c06f5b90 100644
--- a/src/backend/access/rmgrdesc/smgrdesc.c
+++ b/src/backend/access/rmgrdesc/smgrdesc.c
@@ -19,10 +19,10 @@
void
-smgr_desc(StringInfo buf, XLogRecord *record)
+smgr_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_SMGR_CREATE)
{
diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c
index 3ee0427dcb6..319c5f9d709 100644
--- a/src/backend/access/rmgrdesc/spgdesc.c
+++ b/src/backend/access/rmgrdesc/spgdesc.c
@@ -16,70 +16,66 @@
#include "access/spgist_private.h"
-static void
-out_target(StringInfo buf, RelFileNode node)
-{
- appendStringInfo(buf, "rel %u/%u/%u ",
- node.spcNode, node.dbNode, node.relNode);
-}
-
void
-spg_desc(StringInfo buf, XLogRecord *record)
+spg_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info)
{
case XLOG_SPGIST_CREATE_INDEX:
- appendStringInfo(buf, "rel %u/%u/%u",
- ((RelFileNode *) rec)->spcNode,
- ((RelFileNode *) rec)->dbNode,
- ((RelFileNode *) rec)->relNode);
break;
case XLOG_SPGIST_ADD_LEAF:
- out_target(buf, ((spgxlogAddLeaf *) rec)->node);
- appendStringInfo(buf, "%u",
- ((spgxlogAddLeaf *) rec)->blknoLeaf);
+ {
+ spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
+
+ appendStringInfo(buf, "add leaf to page");
+ appendStringInfo(buf, "; off %u; headoff %u; parentoff %u",
+ xlrec->offnumLeaf, xlrec->offnumHeadLeaf,
+ xlrec->offnumParent);
+ if (xlrec->newPage)
+ appendStringInfo(buf, " (newpage)");
+ if (xlrec->storesNulls)
+ appendStringInfo(buf, " (nulls)");
+ }
break;
case XLOG_SPGIST_MOVE_LEAFS:
- out_target(buf, ((spgxlogMoveLeafs *) rec)->node);
- appendStringInfo(buf, "%u leafs from page %u to page %u",
- ((spgxlogMoveLeafs *) rec)->nMoves,
- ((spgxlogMoveLeafs *) rec)->blknoSrc,
- ((spgxlogMoveLeafs *) rec)->blknoDst);
+ appendStringInfo(buf, "%u leafs",
+ ((spgxlogMoveLeafs *) rec)->nMoves);
break;
case XLOG_SPGIST_ADD_NODE:
- out_target(buf, ((spgxlogAddNode *) rec)->node);
- appendStringInfo(buf, "%u:%u",
- ((spgxlogAddNode *) rec)->blkno,
+ appendStringInfo(buf, "off %u",
((spgxlogAddNode *) rec)->offnum);
break;
case XLOG_SPGIST_SPLIT_TUPLE:
- out_target(buf, ((spgxlogSplitTuple *) rec)->node);
- appendStringInfo(buf, "%u:%u to %u:%u",
- ((spgxlogSplitTuple *) rec)->blknoPrefix,
+ appendStringInfo(buf, "prefix off: %u, postfix off: %u (same %d, new %d)",
((spgxlogSplitTuple *) rec)->offnumPrefix,
- ((spgxlogSplitTuple *) rec)->blknoPostfix,
- ((spgxlogSplitTuple *) rec)->offnumPostfix);
+ ((spgxlogSplitTuple *) rec)->offnumPostfix,
+ ((spgxlogSplitTuple *) rec)->postfixBlkSame,
+ ((spgxlogSplitTuple *) rec)->newPage
+ );
break;
case XLOG_SPGIST_PICKSPLIT:
- out_target(buf, ((spgxlogPickSplit *) rec)->node);
+ {
+ spgxlogPickSplit *xlrec = (spgxlogPickSplit *) rec;
+
+ appendStringInfo(buf, "ndel %u; nins %u",
+ xlrec->nDelete, xlrec->nInsert);
+ if (xlrec->innerIsParent)
+ appendStringInfo(buf, " (innerIsParent)");
+ if (xlrec->isRootSplit)
+ appendStringInfo(buf, " (isRootSplit)");
+ }
break;
case XLOG_SPGIST_VACUUM_LEAF:
- out_target(buf, ((spgxlogVacuumLeaf *) rec)->node);
- appendStringInfo(buf, "page %u",
- ((spgxlogVacuumLeaf *) rec)->blkno);
+ /* no further information */
break;
case XLOG_SPGIST_VACUUM_ROOT:
- out_target(buf, ((spgxlogVacuumRoot *) rec)->node);
- appendStringInfo(buf, "page %u",
- ((spgxlogVacuumRoot *) rec)->blkno);
+ /* no further information */
break;
case XLOG_SPGIST_VACUUM_REDIRECT:
- out_target(buf, ((spgxlogVacuumRedirect *) rec)->node);
- appendStringInfo(buf, "page %u, newest XID %u",
- ((spgxlogVacuumRedirect *) rec)->blkno,
+ appendStringInfo(buf, "newest XID %u",
((spgxlogVacuumRedirect *) rec)->newestRedirectXid);
break;
}
diff --git a/src/backend/access/rmgrdesc/standbydesc.c b/src/backend/access/rmgrdesc/standbydesc.c
index d09041f8dfc..0ce1aa325c4 100644
--- a/src/backend/access/rmgrdesc/standbydesc.c
+++ b/src/backend/access/rmgrdesc/standbydesc.c
@@ -37,10 +37,10 @@ standby_desc_running_xacts(StringInfo buf, xl_running_xacts *xlrec)
}
void
-standby_desc(StringInfo buf, XLogRecord *record)
+standby_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_STANDBY_LOCK)
{
diff --git a/src/backend/access/rmgrdesc/tblspcdesc.c b/src/backend/access/rmgrdesc/tblspcdesc.c
index b6b0e6394df..8b2ebb4d926 100644
--- a/src/backend/access/rmgrdesc/tblspcdesc.c
+++ b/src/backend/access/rmgrdesc/tblspcdesc.c
@@ -18,10 +18,10 @@
void
-tblspc_desc(StringInfo buf, XLogRecord *record)
+tblspc_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_TBLSPC_CREATE)
{
diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c
index 22a22efc731..f5450a9b250 100644
--- a/src/backend/access/rmgrdesc/xactdesc.c
+++ b/src/backend/access/rmgrdesc/xactdesc.c
@@ -137,10 +137,10 @@ xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
}
void
-xact_desc(StringInfo buf, XLogRecord *record)
+xact_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_XACT_COMMIT_COMPACT)
{
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index e0957ff3a8c..4088ba99b7f 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -32,10 +32,10 @@ const struct config_enum_entry wal_level_options[] = {
};
void
-xlog_desc(StringInfo buf, XLogRecord *record)
+xlog_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == XLOG_CHECKPOINT_SHUTDOWN ||
info == XLOG_CHECKPOINT_ONLINE)
@@ -76,11 +76,7 @@ xlog_desc(StringInfo buf, XLogRecord *record)
}
else if (info == XLOG_FPI)
{
- BkpBlock *bkp = (BkpBlock *) rec;
-
- appendStringInfo(buf, "%s block %u",
- relpathperm(bkp->node, bkp->fork),
- bkp->block);
+ /* no further information to print */
}
else if (info == XLOG_BACKUP_END)
{
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 21a071ab199..1a17cc467ed 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -16,8 +16,8 @@
#include "postgres.h"
#include "access/genam.h"
-#include "access/xloginsert.h"
#include "access/spgist_private.h"
+#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
@@ -202,25 +202,17 @@ static void
addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
{
- XLogRecData rdata[4];
spgxlogAddLeaf xlrec;
- xlrec.node = index->rd_node;
- xlrec.blknoLeaf = current->blkno;
xlrec.newPage = isNew;
xlrec.storesNulls = isNulls;
/* these will be filled below as needed */
xlrec.offnumLeaf = InvalidOffsetNumber;
xlrec.offnumHeadLeaf = InvalidOffsetNumber;
- xlrec.blknoParent = InvalidBlockNumber;
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
- ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
- ACCEPT_RDATA_DATA(leafTuple, leafTuple->size, 1);
- ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
START_CRIT_SECTION();
if (current->offnum == InvalidOffsetNumber ||
@@ -237,13 +229,10 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
/* Must update parent's downlink if any */
if (parent->buffer != InvalidBuffer)
{
- xlrec.blknoParent = parent->blkno;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum);
-
- ACCEPT_RDATA_BUFFER(parent->buffer, 3);
}
}
else
@@ -303,12 +292,20 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) leafTuple, leafTuple->size);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+ if (xlrec.offnumParent != InvalidOffsetNumber)
+ XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
PageSetLSN(current->page, recptr);
/* update parent only if we actually changed it */
- if (xlrec.blknoParent != InvalidBlockNumber)
+ if (xlrec.offnumParent != InvalidOffsetNumber)
{
PageSetLSN(parent->page, recptr);
}
@@ -399,7 +396,6 @@ moveLeafs(Relation index, SpGistState *state,
OffsetNumber *toDelete;
OffsetNumber *toInsert;
BlockNumber nblkno;
- XLogRecData rdata[7];
spgxlogMoveLeafs xlrec;
char *leafdata,
*leafptr;
@@ -455,20 +451,6 @@ moveLeafs(Relation index, SpGistState *state,
nblkno = BufferGetBlockNumber(nbuf);
Assert(nblkno != current->blkno);
- /* prepare WAL info */
- xlrec.node = index->rd_node;
- STORE_STATE(state, xlrec.stateSrc);
-
- xlrec.blknoSrc = current->blkno;
- xlrec.blknoDst = nblkno;
- xlrec.nMoves = nDelete;
- xlrec.replaceDead = replaceDead;
- xlrec.storesNulls = isNulls;
-
- xlrec.blknoParent = parent->blkno;
- xlrec.offnumParent = parent->offnum;
- xlrec.nodeI = parent->node;
-
leafdata = leafptr = palloc(size);
START_CRIT_SECTION();
@@ -533,15 +515,29 @@ moveLeafs(Relation index, SpGistState *state,
{
XLogRecPtr recptr;
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogMoveLeafs, 0);
- ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * nDelete, 1);
- ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nInsert, 2);
- ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
- ACCEPT_RDATA_BUFFER(current->buffer, 4);
- ACCEPT_RDATA_BUFFER(nbuf, 5);
- ACCEPT_RDATA_BUFFER(parent->buffer, 6);
+ /* prepare WAL info */
+ STORE_STATE(state, xlrec.stateSrc);
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, rdata);
+ xlrec.nMoves = nDelete;
+ xlrec.replaceDead = replaceDead;
+ xlrec.storesNulls = isNulls;
+
+ xlrec.offnumParent = parent->offnum;
+ xlrec.nodeI = parent->node;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogMoveLeafs);
+ XLogRegisterData((char *) toDelete,
+ sizeof(OffsetNumber) * nDelete);
+ XLogRegisterData((char *) toInsert,
+ sizeof(OffsetNumber) * nInsert);
+ XLogRegisterData((char *) leafdata, leafptr - leafdata);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0));
+ XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS);
PageSetLSN(current->page, recptr);
PageSetLSN(npage, recptr);
@@ -701,8 +697,6 @@ doPickSplit(Relation index, SpGistState *state,
int currentFreeSpace;
int totalLeafSizes;
bool allTheSame;
- XLogRecData rdata[10];
- int nRdata;
spgxlogPickSplit xlrec;
char *leafdata,
*leafptr;
@@ -725,7 +719,6 @@ doPickSplit(Relation index, SpGistState *state,
newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
- xlrec.node = index->rd_node;
STORE_STATE(state, xlrec.stateSrc);
/*
@@ -971,10 +964,6 @@ doPickSplit(Relation index, SpGistState *state,
}
/*
- * Because a WAL record can't involve more than four buffers, we can only
- * afford to deal with two leaf pages in each picksplit action, ie the
- * current page and at most one other.
- *
* The new leaf tuples converted from the existing ones should require the
* same or less space, and therefore should all fit onto one page
* (although that's not necessarily the current page, since we can't
@@ -1108,17 +1097,13 @@ doPickSplit(Relation index, SpGistState *state,
}
/* Start preparing WAL record */
- xlrec.blknoSrc = current->blkno;
- xlrec.blknoDest = InvalidBlockNumber;
xlrec.nDelete = 0;
xlrec.initSrc = isNew;
xlrec.storesNulls = isNulls;
+ xlrec.isRootSplit = SpGistBlockIsRoot(current->blkno);
leafdata = leafptr = (char *) palloc(totalLeafSizes);
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogPickSplit, 0);
- nRdata = 1;
-
/* Here we begin making the changes to the target pages */
START_CRIT_SECTION();
@@ -1150,12 +1135,6 @@ doPickSplit(Relation index, SpGistState *state,
else
{
xlrec.nDelete = nToDelete;
- ACCEPT_RDATA_DATA(toDelete,
- sizeof(OffsetNumber) * nToDelete,
- nRdata);
- nRdata++;
- ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
- nRdata++;
if (!state->isBuild)
{
@@ -1240,25 +1219,8 @@ doPickSplit(Relation index, SpGistState *state,
if (newLeafBuffer != InvalidBuffer)
{
MarkBufferDirty(newLeafBuffer);
- /* also save block number for WAL */
- xlrec.blknoDest = BufferGetBlockNumber(newLeafBuffer);
- if (!xlrec.initDest)
- {
- ACCEPT_RDATA_BUFFER(newLeafBuffer, nRdata);
- nRdata++;
- }
}
- xlrec.nInsert = nToInsert;
- ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nToInsert, nRdata);
- nRdata++;
- ACCEPT_RDATA_DATA(leafPageSelect, sizeof(uint8) * nToInsert, nRdata);
- nRdata++;
- ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, nRdata);
- nRdata++;
- ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
- nRdata++;
-
/* Remember current buffer, since we're about to change "current" */
saveCurrent = *current;
@@ -1276,7 +1238,6 @@ doPickSplit(Relation index, SpGistState *state,
current->blkno = parent->blkno;
current->buffer = parent->buffer;
current->page = parent->page;
- xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum =
SpGistPageAddNewItem(state, current->page,
(Item) innerTuple, innerTuple->size,
@@ -1285,14 +1246,11 @@ doPickSplit(Relation index, SpGistState *state,
/*
* Update parent node link and mark parent page dirty
*/
- xlrec.blknoParent = parent->blkno;
+ xlrec.innerIsParent = true;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum);
- ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
- nRdata++;
-
/*
* Update redirection link (in old current buffer)
*/
@@ -1314,7 +1272,6 @@ doPickSplit(Relation index, SpGistState *state,
current->buffer = newInnerBuffer;
current->blkno = BufferGetBlockNumber(current->buffer);
current->page = BufferGetPage(current->buffer);
- xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum =
SpGistPageAddNewItem(state, current->page,
(Item) innerTuple, innerTuple->size,
@@ -1326,16 +1283,11 @@ doPickSplit(Relation index, SpGistState *state,
/*
* Update parent node link and mark parent page dirty
*/
- xlrec.blknoParent = parent->blkno;
+ xlrec.innerIsParent = (parent->buffer == current->buffer);
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum);
- ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
- nRdata++;
- ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
- nRdata++;
-
/*
* Update redirection link (in old current buffer)
*/
@@ -1357,8 +1309,8 @@ doPickSplit(Relation index, SpGistState *state,
SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0));
xlrec.initInner = true;
+ xlrec.innerIsParent = false;
- xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum =
PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
InvalidOffsetNumber, false, false);
@@ -1367,7 +1319,6 @@ doPickSplit(Relation index, SpGistState *state,
innerTuple->size);
/* No parent link to update, nor redirection to do */
- xlrec.blknoParent = InvalidBlockNumber;
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
@@ -1381,9 +1332,46 @@ doPickSplit(Relation index, SpGistState *state,
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
+ int flags;
+
+ XLogBeginInsert();
+
+ xlrec.nInsert = nToInsert;
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogPickSplit);
+
+ XLogRegisterData((char *) toDelete,
+ sizeof(OffsetNumber) * xlrec.nDelete);
+ XLogRegisterData((char *) toInsert,
+ sizeof(OffsetNumber) * xlrec.nInsert);
+ XLogRegisterData((char *) leafPageSelect,
+ sizeof(uint8) * xlrec.nInsert);
+ XLogRegisterData((char *) innerTuple, innerTuple->size);
+ XLogRegisterData(leafdata, leafptr - leafdata);
+
+ flags = REGBUF_STANDARD;
+ if (xlrec.initSrc)
+ flags |= REGBUF_WILL_INIT;
+ if (BufferIsValid(saveCurrent.buffer))
+ XLogRegisterBuffer(0, saveCurrent.buffer, flags);
+
+ if (BufferIsValid(newLeafBuffer))
+ {
+ flags = REGBUF_STANDARD;
+ if (xlrec.initDest)
+ flags |= REGBUF_WILL_INIT;
+ XLogRegisterBuffer(1, newLeafBuffer, flags);
+ }
+ XLogRegisterBuffer(2, current->buffer, REGBUF_STANDARD);
+ if (parent->buffer != InvalidBuffer)
+ {
+ if (parent->buffer != current->buffer)
+ XLogRegisterBuffer(3, parent->buffer, REGBUF_STANDARD);
+ else
+ Assert(xlrec.innerIsParent);
+ }
/* Issue the WAL record */
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, rdata);
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT);
/* Update page LSNs on all affected pages */
if (newLeafBuffer != InvalidBuffer)
@@ -1489,7 +1477,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
int nodeN, Datum nodeLabel)
{
SpGistInnerTuple newInnerTuple;
- XLogRecData rdata[5];
spgxlogAddNode xlrec;
/* Should not be applied to nulls */
@@ -1499,25 +1486,18 @@ spgAddNodeAction(Relation index, SpGistState *state,
newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
/* Prepare WAL record */
- xlrec.node = index->rd_node;
STORE_STATE(state, xlrec.stateSrc);
- xlrec.blkno = current->blkno;
xlrec.offnum = current->offnum;
/* we don't fill these unless we need to change the parent downlink */
- xlrec.blknoParent = InvalidBlockNumber;
+ xlrec.parentBlk = -1;
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
/* we don't fill these unless tuple has to be moved */
- xlrec.blknoNew = InvalidBlockNumber;
xlrec.offnumNew = InvalidOffsetNumber;
xlrec.newPage = false;
- ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
- ACCEPT_RDATA_DATA(newInnerTuple, newInnerTuple->size, 1);
- ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
if (PageGetExactFreeSpace(current->page) >=
newInnerTuple->size - innerTuple->size)
{
@@ -1539,7 +1519,13 @@ spgAddNodeAction(Relation index, SpGistState *state,
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
PageSetLSN(current->page, recptr);
}
@@ -1565,7 +1551,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
saveCurrent = *current;
- xlrec.blknoParent = parent->blkno;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
@@ -1580,8 +1565,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
current->blkno = BufferGetBlockNumber(current->buffer);
current->page = BufferGetPage(current->buffer);
- xlrec.blknoNew = current->blkno;
-
/*
* Let's just make real sure new current isn't same as old. Right now
* that's impossible, but if SpGistGetBuffer ever got smart enough to
@@ -1590,17 +1573,19 @@ spgAddNodeAction(Relation index, SpGistState *state,
* replay would be subtly wrong, so I think a mere assert isn't enough
* here.
*/
- if (xlrec.blknoNew == xlrec.blkno)
+ if (current->blkno == saveCurrent.blkno)
elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
/*
* New current and parent buffer will both be modified; but note that
* parent buffer could be same as either new or old current.
*/
- ACCEPT_RDATA_BUFFER(current->buffer, 3);
- if (parent->buffer != current->buffer &&
- parent->buffer != saveCurrent.buffer)
- ACCEPT_RDATA_BUFFER(parent->buffer, 4);
+ if (parent->buffer == saveCurrent.buffer)
+ xlrec.parentBlk = 0;
+ else if (parent->buffer == current->buffer)
+ xlrec.parentBlk = 1;
+ else
+ xlrec.parentBlk = 2;
START_CRIT_SECTION();
@@ -1647,7 +1632,20 @@ spgAddNodeAction(Relation index, SpGistState *state,
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+ XLogBeginInsert();
+
+ /* orig page */
+ XLogRegisterBuffer(0, saveCurrent.buffer, REGBUF_STANDARD);
+ /* new page */
+ XLogRegisterBuffer(1, current->buffer, REGBUF_STANDARD);
+ /* parent page (if different from orig and new) */
+ if (xlrec.parentBlk == 2)
+ XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
/* we don't bother to check if any of these are redundant */
PageSetLSN(current->page, recptr);
@@ -1682,7 +1680,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
BlockNumber postfixBlkno;
OffsetNumber postfixOffset;
int i;
- XLogRecData rdata[5];
spgxlogSplitTuple xlrec;
Buffer newBuffer = InvalidBuffer;
@@ -1725,14 +1722,8 @@ spgSplitNodeAction(Relation index, SpGistState *state,
postfixTuple->allTheSame = innerTuple->allTheSame;
/* prep data for WAL record */
- xlrec.node = index->rd_node;
xlrec.newPage = false;
- ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
- ACCEPT_RDATA_DATA(prefixTuple, prefixTuple->size, 1);
- ACCEPT_RDATA_DATA(postfixTuple, postfixTuple->size, 2);
- ACCEPT_RDATA_BUFFER(current->buffer, 3);
-
/*
* If we can't fit both tuples on the current page, get a new page for the
* postfix tuple. In particular, can't split to the root page.
@@ -1752,7 +1743,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
GBUF_INNER_PARITY(current->blkno + 1),
postfixTuple->size + sizeof(ItemIdData),
&xlrec.newPage);
- ACCEPT_RDATA_BUFFER(newBuffer, 4);
}
START_CRIT_SECTION();
@@ -1767,27 +1757,28 @@ spgSplitNodeAction(Relation index, SpGistState *state,
if (xlrec.offnumPrefix != current->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTuple->size);
- xlrec.blknoPrefix = current->blkno;
/*
* put postfix tuple into appropriate page
*/
if (newBuffer == InvalidBuffer)
{
- xlrec.blknoPostfix = postfixBlkno = current->blkno;
+ postfixBlkno = current->blkno;
xlrec.offnumPostfix = postfixOffset =
SpGistPageAddNewItem(state, current->page,
(Item) postfixTuple, postfixTuple->size,
NULL, false);
+ xlrec.postfixBlkSame = true;
}
else
{
- xlrec.blknoPostfix = postfixBlkno = BufferGetBlockNumber(newBuffer);
+ postfixBlkno = BufferGetBlockNumber(newBuffer);
xlrec.offnumPostfix = postfixOffset =
SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
(Item) postfixTuple, postfixTuple->size,
NULL, false);
MarkBufferDirty(newBuffer);
+ xlrec.postfixBlkSame = false;
}
/*
@@ -1808,7 +1799,23 @@ spgSplitNodeAction(Relation index, SpGistState *state,
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) prefixTuple, prefixTuple->size);
+ XLogRegisterData((char *) postfixTuple, postfixTuple->size);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+ if (newBuffer != InvalidBuffer)
+ {
+ int flags;
+
+ flags = REGBUF_STANDARD;
+ if (xlrec.newPage)
+ flags |= REGBUF_WILL_INIT;
+ XLogRegisterBuffer(1, newBuffer, flags);
+ }
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE);
PageSetLSN(current->page, recptr);
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index e1dfc8e3580..f168ac5c5cf 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -105,15 +105,18 @@ spgbuild(PG_FUNCTION_ARGS)
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata;
- /* WAL data is just the relfilenode */
- rdata.data = (char *) &(index->rd_node);
- rdata.len = sizeof(RelFileNode);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata);
+ /*
+ * Replay will re-initialize the pages, so don't take full pages
+ * images. No other data to log.
+ */
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+ XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
PageSetLSN(BufferGetPage(metabuffer), recptr);
PageSetLSN(BufferGetPage(rootbuffer), recptr);
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
index 2e05d22b749..c95b80b5c7c 100644
--- a/src/backend/access/spgist/spgvacuum.c
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -127,7 +127,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
{
Page page = BufferGetPage(buffer);
spgxlogVacuumLeaf xlrec;
- XLogRecData rdata[8];
OffsetNumber toDead[MaxIndexTuplesPerPage];
OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
OffsetNumber moveSrc[MaxIndexTuplesPerPage];
@@ -323,20 +322,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
elog(ERROR, "inconsistent counts of deletable tuples");
- /* Prepare WAL record */
- xlrec.node = index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
- STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumLeaf, 0);
- ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
- ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
- ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
- ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
- ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
- ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
- ACCEPT_RDATA_BUFFER(buffer, 7);
-
/* Do the updates */
START_CRIT_SECTION();
@@ -389,7 +374,22 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata);
+ XLogBeginInsert();
+
+ STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
+ /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+ XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
+ XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
+ XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
+ XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
+ XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
+ XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
PageSetLSN(page, recptr);
}
@@ -407,12 +407,10 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
{
Page page = BufferGetPage(buffer);
spgxlogVacuumRoot xlrec;
- XLogRecData rdata[3];
OffsetNumber toDelete[MaxIndexTuplesPerPage];
OffsetNumber i,
max = PageGetMaxOffsetNumber(page);
- xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.nDelete = 0;
/* Scan page, identify tuples to delete, accumulate stats */
@@ -448,15 +446,6 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
if (xlrec.nDelete == 0)
return; /* nothing more to do */
- /* Prepare WAL record */
- xlrec.node = index->rd_node;
- STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRoot, 0);
- /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
- ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
- ACCEPT_RDATA_BUFFER(buffer, 2);
-
/* Do the update */
START_CRIT_SECTION();
@@ -469,7 +458,19 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata);
+ XLogBeginInsert();
+
+ /* Prepare WAL record */
+ STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
+ /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+ XLogRegisterData((char *) toDelete,
+ sizeof(OffsetNumber) * xlrec.nDelete);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
PageSetLSN(page, recptr);
}
@@ -499,10 +500,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
OffsetNumber itemnos[MaxIndexTuplesPerPage];
spgxlogVacuumRedirect xlrec;
- XLogRecData rdata[3];
- xlrec.node = index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.nToPlaceholder = 0;
xlrec.newestRedirectXid = InvalidTransactionId;
@@ -585,11 +583,15 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
{
XLogRecPtr recptr;
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRedirect, 0);
- ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
- ACCEPT_RDATA_BUFFER(buffer, 2);
+ XLogBeginInsert();
+
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
+ XLogRegisterData((char *) itemToPlaceholder,
+ sizeof(OffsetNumber) * xlrec.nToPlaceholder);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata);
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
PageSetLSN(page, recptr);
}
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 920739436ac..ac6d4bd369a 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -71,33 +71,30 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
}
static void
-spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
+spgRedoCreateIndex(XLogReaderState *record)
{
- RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+ XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 0);
+ Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
page = (Page) BufferGetPage(buffer);
SpGistInitMetapage(page);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
- buffer = XLogReadBuffer(*node, SPGIST_ROOT_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 1);
+ Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
SpGistInitBuffer(buffer, SPGIST_LEAF);
page = (Page) BufferGetPage(buffer);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
- buffer = XLogReadBuffer(*node, SPGIST_NULL_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 2);
+ Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
page = (Page) BufferGetPage(buffer);
PageSetLSN(page, lsn);
@@ -106,8 +103,9 @@ spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
}
static void
-spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
+spgRedoAddLeaf(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr;
char *leafTuple;
@@ -128,15 +126,13 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
*/
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true);
+ buffer = XLogInitBufferForRedo(record, 0);
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 0,
- xldata->node, xldata->blknoLeaf,
- &buffer);
+ action = XLogReadBufferForRedo(record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
@@ -164,7 +160,8 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
{
/* replacing a DEAD tuple */
PageIndexTupleDelete(page, xldata->offnumLeaf);
- if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size,
+ if (PageAddItem(page,
+ (Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
leafTupleHdr.size);
@@ -177,13 +174,14 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer);
/* update parent downlink if necessary */
- if (xldata->blknoParent != InvalidBlockNumber)
+ if (xldata->offnumParent != InvalidOffsetNumber)
{
- if (XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoParent,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple tuple;
+ BlockNumber blknoLeaf;
+
+ XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf);
page = BufferGetPage(buffer);
@@ -191,7 +189,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI,
- xldata->blknoLeaf, xldata->offnumLeaf);
+ blknoLeaf, xldata->offnumLeaf);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -202,8 +200,9 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
}
static void
-spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
+spgRedoMoveLeafs(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr;
SpGistState state;
@@ -213,6 +212,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer;
Page page;
XLogRedoAction action;
+ BlockNumber blknoDst;
+
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
fillFakeState(&state, xldata->stateSrc);
@@ -235,15 +237,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
/* Insert tuples on the dest page (do first, so redirect is valid) */
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true);
+ buffer = XLogInitBufferForRedo(record, 1);
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoDst,
- &buffer);
+ action = XLogReadBufferForRedo(record, 1, &buffer);
+
if (action == BLK_NEEDS_REDO)
{
int i;
@@ -260,7 +261,8 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
* field.
*/
leafTuple = ptr;
- memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
+ memcpy(&leafTupleHdr, leafTuple,
+ sizeof(SpGistLeafTupleData));
addOrReplaceTuple(page, (Item) leafTuple,
leafTupleHdr.size, toInsert[i]);
@@ -274,14 +276,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer);
/* Delete tuples from the source page, inserting a redirection pointer */
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
+
spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
- xldata->blknoDst,
+ blknoDst,
toInsert[nInsert - 1]);
PageSetLSN(page, lsn);
@@ -291,8 +293,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(buffer);
/* And update the parent downlink */
- if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple tuple;
@@ -302,7 +303,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI,
- xldata->blknoDst, toInsert[nInsert - 1]);
+ blknoDst, toInsert[nInsert - 1]);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -312,8 +313,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
}
static void
-spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
+spgRedoAddNode(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogAddNode *xldata = (spgxlogAddNode *) ptr;
char *innerTuple;
@@ -321,7 +323,6 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
SpGistState state;
Buffer buffer;
Page page;
- int bbi;
XLogRedoAction action;
ptr += sizeof(spgxlogAddNode);
@@ -331,17 +332,18 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
fillFakeState(&state, xldata->stateSrc);
- if (xldata->blknoNew == InvalidBlockNumber)
+ if (!XLogRecHasBlockRef(record, 1))
{
/* update in place */
- Assert(xldata->blknoParent == InvalidBlockNumber);
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ Assert(xldata->parentBlk == -1);
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
+
PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
- xldata->offnum, false, false) != xldata->offnum)
+ xldata->offnum,
+ false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
innerTupleHdr.size);
@@ -353,30 +355,30 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
}
else
{
+ BlockNumber blkno;
+ BlockNumber blknoNew;
+
+ XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno);
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew);
+
/*
* In normal operation we would have all three pages (source, dest,
* and parent) locked simultaneously; but in WAL replay it should be
* safe to update them one at a time, as long as we do it in the right
- * order.
- *
- * The logic here depends on the assumption that blkno != blknoNew,
- * else we can't tell which BKP bit goes with which page, and the LSN
- * checks could go wrong too.
+ * order. We must insert the new tuple before replacing the old tuple
+ * with the redirect tuple.
*/
- Assert(xldata->blkno != xldata->blknoNew);
/* Install new tuple first so redirect is valid */
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true);
/* AddNode is not used for nulls pages */
+ buffer = XLogInitBufferForRedo(record, 1);
SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoNew,
- &buffer);
+ action = XLogReadBufferForRedo(record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
@@ -385,22 +387,26 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
innerTupleHdr.size, xldata->offnumNew);
/*
- * If parent is in this same page, don't advance LSN; doing so
- * would fool us into not applying the parent downlink update
- * below. We'll update the LSN when we fix the parent downlink.
+ * If parent is in this same page, update it now.
*/
- if (xldata->blknoParent != xldata->blknoNew)
+ if (xldata->parentBlk == 1)
{
- PageSetLSN(page, lsn);
+ SpGistInnerTuple parentTuple;
+
+ parentTuple = (SpGistInnerTuple) PageGetItem(page,
+ PageGetItemId(page, xldata->offnumParent));
+
+ spgUpdateNodeLink(parentTuple, xldata->nodeI,
+ blknoNew, xldata->offnumNew);
}
+ PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Delete old tuple, replacing it with redirect or placeholder tuple */
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
SpGistDeadTuple dt;
@@ -412,11 +418,12 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
InvalidOffsetNumber);
else
dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
- xldata->blknoNew,
+ blknoNew,
xldata->offnumNew);
PageIndexTupleDelete(page, xldata->offnum);
- if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum,
+ if (PageAddItem(page, (Item) dt, dt->size,
+ xldata->offnum,
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
dt->size);
@@ -427,67 +434,55 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
SpGistPageGetOpaque(page)->nRedirection++;
/*
- * If parent is in this same page, don't advance LSN; doing so
- * would fool us into not applying the parent downlink update
- * below. We'll update the LSN when we fix the parent downlink.
+ * If parent is in this same page, update it now.
*/
- if (xldata->blknoParent != xldata->blkno)
+ if (xldata->parentBlk == 0)
{
- PageSetLSN(page, lsn);
+ SpGistInnerTuple parentTuple;
+
+ parentTuple = (SpGistInnerTuple) PageGetItem(page,
+ PageGetItemId(page, xldata->offnumParent));
+
+ spgUpdateNodeLink(parentTuple, xldata->nodeI,
+ blknoNew, xldata->offnumNew);
}
+ PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/*
- * Update parent downlink. Since parent could be in either of the
- * previous two buffers, it's a bit tricky to determine which BKP bit
- * applies.
+ * Update parent downlink (if we didn't do it as part of the source or
+ * destination page update already).
*/
- if (xldata->blknoParent == xldata->blkno)
- bbi = 0;
- else if (xldata->blknoParent == xldata->blknoNew)
- bbi = 1;
- else
- bbi = 2;
-
- if (record->xl_info & XLR_BKP_BLOCK(bbi))
+ if (xldata->parentBlk == 2)
{
- if (bbi == 2) /* else we already did it */
- (void) RestoreBackupBlock(lsn, record, bbi, false, false);
- action = BLK_RESTORED;
- buffer = InvalidBuffer;
- }
- else
- {
- action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
- xldata->blknoParent, &buffer);
- Assert(action != BLK_RESTORED);
- }
- if (action == BLK_NEEDS_REDO)
- {
- SpGistInnerTuple innerTuple;
+ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
+ {
+ SpGistInnerTuple parentTuple;
- page = BufferGetPage(buffer);
+ page = BufferGetPage(buffer);
- innerTuple = (SpGistInnerTuple) PageGetItem(page,
+ parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
- spgUpdateNodeLink(innerTuple, xldata->nodeI,
- xldata->blknoNew, xldata->offnumNew);
+ spgUpdateNodeLink(parentTuple, xldata->nodeI,
+ blknoNew, xldata->offnumNew);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ }
+ if (BufferIsValid(buffer))
+ UnlockReleaseBuffer(buffer);
}
- if (BufferIsValid(buffer))
- UnlockReleaseBuffer(buffer);
}
}
static void
-spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
+spgRedoSplitTuple(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr;
char *prefixTuple;
@@ -496,6 +491,7 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
SpGistInnerTupleData postfixTupleHdr;
Buffer buffer;
Page page;
+ XLogRedoAction action;
ptr += sizeof(spgxlogSplitTuple);
prefixTuple = ptr;
@@ -513,22 +509,17 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
*/
/* insert postfix tuple first to avoid dangling link */
- if (xldata->blknoPostfix != xldata->blknoPrefix)
+ if (!xldata->postfixBlkSame)
{
- XLogRedoAction action;
-
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true);
+ buffer = XLogInitBufferForRedo(record, 1);
/* SplitTuple is not used for nulls pages */
SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoPostfix,
- &buffer);
-
+ action = XLogReadBufferForRedo(record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
@@ -544,18 +535,19 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
}
/* now handle the original page */
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
+
PageIndexTupleDelete(page, xldata->offnumPrefix);
if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTupleHdr.size);
- if (xldata->blknoPostfix == xldata->blknoPrefix)
- addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size,
+ if (xldata->postfixBlkSame)
+ addOrReplaceTuple(page, (Item) postfixTuple,
+ postfixTupleHdr.size,
xldata->offnumPostfix);
PageSetLSN(page, lsn);
@@ -566,8 +558,9 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
}
static void
-spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
+spgRedoPickSplit(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr;
char *innerTuple;
@@ -578,14 +571,16 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
uint8 *leafPageSelect;
Buffer srcBuffer;
Buffer destBuffer;
+ Buffer innerBuffer;
Page srcPage;
Page destPage;
- Buffer innerBuffer;
Page page;
- int bbi;
int i;
+ BlockNumber blknoInner;
XLogRedoAction action;
+ XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
+
fillFakeState(&state, xldata->stateSrc);
ptr += SizeOfSpgxlogPickSplit;
@@ -603,13 +598,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
/* now ptr points to the list of leaf tuples */
- /*
- * It's a bit tricky to identify which pages have been handled as
- * full-page images, so we explicitly count each referenced buffer.
- */
- bbi = 0;
-
- if (SpGistBlockIsRoot(xldata->blknoSrc))
+ if (xldata->isRootSplit)
{
/* when splitting root, we touch it only in the guise of new inner */
srcBuffer = InvalidBuffer;
@@ -618,8 +607,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
else if (xldata->initSrc)
{
/* just re-init the source page */
- srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true);
- Assert(BufferIsValid(srcBuffer));
+ srcBuffer = XLogInitBufferForRedo(record, 0);
srcPage = (Page) BufferGetPage(srcBuffer);
SpGistInitBuffer(srcBuffer,
@@ -634,9 +622,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
* inserting leaf tuples and the new inner tuple, else the added
* redirect tuple will be a dangling link.)
*/
- if (XLogReadBufferForRedo(lsn, record, bbi,
- xldata->node, xldata->blknoSrc,
- &srcBuffer) == BLK_NEEDS_REDO)
+ srcPage = NULL;
+ if (XLogReadBufferForRedo(record, 0, &srcBuffer) == BLK_NEEDS_REDO)
{
srcPage = BufferGetPage(srcBuffer);
@@ -650,7 +637,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
toDelete, xldata->nDelete,
SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
- xldata->blknoInner,
+ blknoInner,
xldata->offnumInner);
else
spgPageIndexMultiDelete(&state, srcPage,
@@ -662,15 +649,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
/* don't update LSN etc till we're done with it */
}
- else
- {
- srcPage = NULL; /* don't do any page updates */
- }
- bbi++;
}
/* try to access dest page if any */
- if (xldata->blknoDest == InvalidBlockNumber)
+ if (!XLogRecHasBlockRef(record, 1))
{
destBuffer = InvalidBuffer;
destPage = NULL;
@@ -678,8 +660,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
else if (xldata->initDest)
{
/* just re-init the dest page */
- destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true);
- Assert(BufferIsValid(destBuffer));
+ destBuffer = XLogInitBufferForRedo(record, 1);
destPage = (Page) BufferGetPage(destBuffer);
SpGistInitBuffer(destBuffer,
@@ -692,17 +673,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
* We could probably release the page lock immediately in the
* full-page-image case, but for safety let's hold it till later.
*/
- if (XLogReadBufferForRedo(lsn, record, bbi,
- xldata->node, xldata->blknoDest,
- &destBuffer) == BLK_NEEDS_REDO)
- {
+ if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO)
destPage = (Page) BufferGetPage(destBuffer);
- }
else
- {
destPage = NULL; /* don't do any page updates */
- }
- bbi++;
}
/* restore leaf tuples to src and/or dest page */
@@ -739,14 +713,12 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
/* restore new inner tuple */
if (xldata->initInner)
{
- innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true);
- SpGistInitBuffer(innerBuffer,
- (xldata->storesNulls ? SPGIST_NULLS : 0));
+ innerBuffer = XLogInitBufferForRedo(record, 2);
+ SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
- xldata->blknoInner, &innerBuffer);
+ action = XLogReadBufferForRedo(record, 2, &innerBuffer);
if (action == BLK_NEEDS_REDO)
{
@@ -756,14 +728,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
xldata->offnumInner);
/* if inner is also parent, update link while we're here */
- if (xldata->blknoInner == xldata->blknoParent)
+ if (xldata->innerIsParent)
{
SpGistInnerTuple parent;
parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI,
- xldata->blknoInner, xldata->offnumInner);
+ blknoInner, xldata->offnumInner);
}
PageSetLSN(page, lsn);
@@ -771,7 +743,6 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
}
if (BufferIsValid(innerBuffer))
UnlockReleaseBuffer(innerBuffer);
- bbi++;
/*
* Now we can release the leaf-page locks. It's okay to do this before
@@ -783,18 +754,11 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
UnlockReleaseBuffer(destBuffer);
/* update parent downlink, unless we did it above */
- if (xldata->blknoParent == InvalidBlockNumber)
- {
- /* no parent cause we split the root */
- Assert(SpGistBlockIsRoot(xldata->blknoInner));
- }
- else if (xldata->blknoInner != xldata->blknoParent)
+ if (XLogRecHasBlockRef(record, 3))
{
Buffer parentBuffer;
- if (XLogReadBufferForRedo(lsn, record, bbi,
- xldata->node, xldata->blknoParent,
- &parentBuffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 3, &parentBuffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple parent;
@@ -803,7 +767,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI,
- xldata->blknoInner, xldata->offnumInner);
+ blknoInner, xldata->offnumInner);
PageSetLSN(page, lsn);
MarkBufferDirty(parentBuffer);
@@ -811,11 +775,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
if (BufferIsValid(parentBuffer))
UnlockReleaseBuffer(parentBuffer);
}
+ else
+ Assert(xldata->innerIsParent || xldata->isRootSplit);
}
static void
-spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
+spgRedoVacuumLeaf(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr;
OffsetNumber *toDead;
@@ -844,8 +811,7 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
ptr += sizeof(OffsetNumber) * xldata->nChain;
chainDest = (OffsetNumber *) ptr;
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
@@ -897,8 +863,9 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
}
static void
-spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
+spgRedoVacuumRoot(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr;
OffsetNumber *toDelete;
@@ -907,8 +874,7 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
toDelete = xldata->offsets;
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
@@ -923,8 +889,9 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
}
static void
-spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
+spgRedoVacuumRedirect(XLogReaderState *record)
{
+ XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
OffsetNumber *itemToPlaceholder;
@@ -939,12 +906,16 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
if (InHotStandby)
{
if (TransactionIdIsValid(xldata->newestRedirectXid))
+ {
+ RelFileNode node;
+
+ XLogRecGetBlockTag(record, 0, &node, NULL, NULL);
ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid,
- xldata->node);
+ node);
+ }
}
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
@@ -995,40 +966,40 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
}
void
-spg_redo(XLogRecPtr lsn, XLogRecord *record)
+spg_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
MemoryContext oldCxt;
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
case XLOG_SPGIST_CREATE_INDEX:
- spgRedoCreateIndex(lsn, record);
+ spgRedoCreateIndex(record);
break;
case XLOG_SPGIST_ADD_LEAF:
- spgRedoAddLeaf(lsn, record);
+ spgRedoAddLeaf(record);
break;
case XLOG_SPGIST_MOVE_LEAFS:
- spgRedoMoveLeafs(lsn, record);
+ spgRedoMoveLeafs(record);
break;
case XLOG_SPGIST_ADD_NODE:
- spgRedoAddNode(lsn, record);
+ spgRedoAddNode(record);
break;
case XLOG_SPGIST_SPLIT_TUPLE:
- spgRedoSplitTuple(lsn, record);
+ spgRedoSplitTuple(record);
break;
case XLOG_SPGIST_PICKSPLIT:
- spgRedoPickSplit(lsn, record);
+ spgRedoPickSplit(record);
break;
case XLOG_SPGIST_VACUUM_LEAF:
- spgRedoVacuumLeaf(lsn, record);
+ spgRedoVacuumLeaf(record);
break;
case XLOG_SPGIST_VACUUM_ROOT:
- spgRedoVacuumRoot(lsn, record);
+ spgRedoVacuumRoot(record);
break;
case XLOG_SPGIST_VACUUM_REDIRECT:
- spgRedoVacuumRedirect(lsn, record);
+ spgRedoVacuumRedirect(record);
break;
default:
elog(PANIC, "spg_redo: unknown op code %u", info);
diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README
index 92b12fbb6c2..ba6ae05d653 100644
--- a/src/backend/access/transam/README
+++ b/src/backend/access/transam/README
@@ -440,96 +440,164 @@ happen before the WAL record is inserted; see notes in SyncOneBuffer().)
Note that marking a buffer dirty with MarkBufferDirty() should only
happen iff you write a WAL record; see Writing Hints below.
-5. If the relation requires WAL-logging, build a WAL log record and pass it
-to XLogInsert(); then update the page's LSN using the returned XLOG
-location. For instance,
+5. If the relation requires WAL-logging, build a WAL record using
+XLogBeginInsert and XLogRegister* functions, and insert it. (See
+"Constructing a WAL record" below). Then update the page's LSN using the
+returned XLOG location. For instance,
- recptr = XLogInsert(rmgr_id, info, rdata);
+ XLogBeginInsert();
+ XLogRegisterBuffer(...)
+ XLogRegisterData(...)
+ recptr = XLogInsert(rmgr_id, info);
PageSetLSN(dp, recptr);
- // Note that we no longer do PageSetTLI() from 9.3 onwards
- // since that field on a page has now changed its meaning.
6. END_CRIT_SECTION()
7. Unlock and unpin the buffer(s).
-XLogInsert's "rdata" argument is an array of pointer/size items identifying
-chunks of data to be written in the XLOG record, plus optional shared-buffer
-IDs for chunks that are in shared buffers rather than temporary variables.
-The "rdata" array must mention (at least once) each of the shared buffers
-being modified, unless the action is such that the WAL replay routine can
-reconstruct the entire page contents. XLogInsert includes the logic that
-tests to see whether a shared buffer has been modified since the last
-checkpoint. If not, the entire page contents are logged rather than just the
-portion(s) pointed to by "rdata".
-
-Because XLogInsert drops the rdata components associated with buffers it
-chooses to log in full, the WAL replay routines normally need to test to see
-which buffers were handled that way --- otherwise they may be misled about
-what the XLOG record actually contains. XLOG records that describe multi-page
-changes therefore require some care to design: you must be certain that you
-know what data is indicated by each "BKP" bit. An example of the trickiness
-is that in a HEAP_UPDATE record, BKP(0) normally is associated with the source
-page and BKP(1) is associated with the destination page --- but if these are
-the same page, only BKP(0) would have been set.
-
-For this reason as well as the risk of deadlocking on buffer locks, it's best
-to design WAL records so that they reflect small atomic actions involving just
-one or a few pages. The current XLOG infrastructure cannot handle WAL records
-involving references to more than four shared buffers, anyway.
-
-In the case where the WAL record contains enough information to re-generate
-the entire contents of a page, do *not* show that page's buffer ID in the
-rdata array, even if some of the rdata items point into the buffer. This is
-because you don't want XLogInsert to log the whole page contents. The
-standard replay-routine pattern for this case is
-
- buffer = XLogReadBuffer(rnode, blkno, true);
- Assert(BufferIsValid(buffer));
- page = (Page) BufferGetPage(buffer);
-
- ... initialize the page ...
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
-In the case where the WAL record provides only enough information to
-incrementally update the page, the rdata array *must* mention the buffer
-ID at least once; otherwise there is no defense against torn-page problems.
-The standard replay-routine pattern for this case is
-
- if (XLogReadBufferForRedo(lsn, record, N, rnode, blkno, &buffer) == BLK_NEEDS_REDO)
- {
- page = (Page) BufferGetPage(buffer);
-
- ... apply the change ...
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- }
- if (BufferIsValid(buffer))
- UnlockReleaseBuffer(buffer);
-
-XLogReadBufferForRedo reads the page from disk, and checks what action needs to
-be taken to the page. If the XLR_BKP_BLOCK(N) flag is set, it restores the
-full page image and returns BLK_RESTORED. If there is no full page image, but
-page cannot be found or if the change has already been replayed (i.e. the
-page's LSN >= the record we're replaying), it returns BLK_NOTFOUND or BLK_DONE,
-respectively. Usually, the redo routine only needs to pay attention to the
-BLK_NEEDS_REDO return code, which means that the routine should apply the
-incremental change. In any case, the caller is responsible for unlocking and
-releasing the buffer. Note that XLogReadBufferForRedo returns the buffer
-locked even if no redo is required, unless the page does not exist.
-
-As noted above, for a multi-page update you need to be able to determine
-which XLR_BKP_BLOCK(N) flag applies to each page. If a WAL record reflects
-a combination of fully-rewritable and incremental updates, then the rewritable
-pages don't count for the XLR_BKP_BLOCK(N) numbering. (XLR_BKP_BLOCK(N) is
-associated with the N'th distinct buffer ID seen in the "rdata" array, and
-per the above discussion, fully-rewritable buffers shouldn't be mentioned in
-"rdata".)
+Complex changes (such as a multilevel index insertion) normally need to be
+described by a series of atomic-action WAL records. The intermediate states
+must be self-consistent, so that if the replay is interrupted between any
+two actions, the system is fully functional. In btree indexes, for example,
+a page split requires a new page to be allocated, and an insertion of a new
+key in the parent btree level, but for locking reasons this has to be
+reflected by two separate WAL records. Replaying the first record, to
+allocate the new page and move tuples to it, sets a flag on the page to
+indicate that the key has not been inserted to the parent yet. Replaying the
+second record clears the flag. This intermediate state is never seen by
+other backends during normal operation, because the lock on the child page
+is held across the two actions, but will be seen if the operation is
+interrupted before writing the second WAL record. The search algorithm works
+with the intermediate state as normal, but if an insertion encounters a page
+with the incomplete-split flag set, it will finish the interrupted split by
+inserting the key to the parent, before proceeding.
+
+
+Constructing a WAL record
+-------------------------
+
+A WAL record consists of a header common to all WAL record types,
+record-specific data, and information about the data blocks modified. Each
+modified data block is identified by an ID number, and can optionally have
+more record-specific data associated with the block. If XLogInsert decides
+that a full-page image of a block needs to be taken, the data associated
+with that block is not included.
+
+The API for constructing a WAL record consists of five functions:
+XLogBeginInsert, XLogRegisterBuffer, XLogRegisterData, XLogRegisterBufData,
+and XLogInsert. First, call XLogBeginInsert(). Then register all the buffers
+modified, and data needed to replay the changes, using XLogRegister*
+functions. Finally, insert the constructed record to the WAL by calling
+XLogInsert().
+
+ XLogBeginInsert();
+
+ /* register buffers modified as part of this WAL-logged action */
+ XLogRegisterBuffer(0, lbuffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuffer, REGBUF_STANDARD);
+
+ /* register data that is always included in the WAL record */
+ XLogRegisterData(&xlrec, SizeOfFictionalAction);
+
+ /*
+ * register data associated with a buffer. This will not be included
+ * in the record if a full-page image is taken.
+ */
+ XLogRegisterBufData(0, tuple->data, tuple->len);
+
+ /* more data associated with the buffer */
+ XLogRegisterBufData(0, data2, len2);
+
+ /*
+ * Ok, all the data and buffers to include in the WAL record have
+ * been registered. Insert the record.
+ */
+ recptr = XLogInsert(RM_FOO_ID, XLOG_FOOBAR_DO_STUFF);
+
+Details of the API functions:
+
+void XLogBeginInsert(void)
+
+ Must be called before XLogRegisterBuffer and XLogRegisterData.
+
+void XLogResetInsertion(void)
+
+ Clear any currently registered data and buffers from the WAL record
+ construction workspace. This is only needed if you have already called
+ XLogBeginInsert(), but decide to not insert the record after all.
+
+void XLogEnsureRecordSpace(int max_block_id, int nrdatas)
+
+ Normally, the WAL record construction buffers have the following limits:
+
+ * highest block ID that can be used is 4 (allowing five block references)
+ * Max 20 chunks of registered data
+
+ These default limits are enough for most record types that change some
+ on-disk structures. For the odd case that requires more data, or needs to
+ modify more buffers, these limits can be raised by calling
+ XLogEnsureRecordSpace(). XLogEnsureRecordSpace() must be called before
+ XLogBeginInsert(), and outside a critical section.
+
+void XLogRegisterBuffer(uint8 block_id, Buffer buf, uint8 flags);
+
+ XLogRegisterBuffer adds information about a data block to the WAL record.
+ block_id is an arbitrary number used to identify this page reference in
+ the redo routine. The information needed to re-find the page at redo -
+ relfilenode, fork, and block number - are included in the WAL record.
+
+ XLogInsert will automatically include a full copy of the page contents, if
+ this is the first modification of the buffer since the last checkpoint.
+ It is important to register every buffer modified by the action with
+ XLogRegisterBuffer, to avoid torn-page hazards.
+
+ The flags control when and how the buffer contents are included in the
+ WAL record. Normally, a full-page image is taken only if the page has not
+ been modified since the last checkpoint, and only if full_page_writes=on
+ or an online backup is in progress. The REGBUF_FORCE_IMAGE flag can be
+ used to force a full-page image to always be included; that is useful
+ e.g. for an operation that rewrites most of the page, so that tracking the
+ details is not worth it. For the rare case where it is not necessary to
+ protect from torn pages, REGBUF_NO_IMAGE flag can be used to suppress
+ full page image from being taken. REGBUF_WILL_INIT also suppresses a full
+ page image, but the redo routine must re-generate the page from scratch,
+ without looking at the old page contents. Re-initializing the page
+ protects from torn page hazards like a full page image does.
+
+ The REGBUF_STANDARD flag can be specified together with the other flags to
+ indicate that the page follows the standard page layout. It causes the
+ area between pd_lower and pd_upper to be left out from the image, reducing
+ WAL volume.
+
+ If the REGBUF_KEEP_DATA flag is given, any per-buffer data registered with
+ XLogRegisterBufData() is included in the WAL record even if a full-page
+ image is taken.
+
+void XLogRegisterData(char *data, int len);
+
+ XLogRegisterData is used to include arbitrary data in the WAL record. If
+ XLogRegisterData() is called multiple times, the data are appended, and
+ will be made available to the redo routine as one contiguous chunk.
+
+void XLogRegisterBufData(uint8 block_id, char *data, int len);
+
+ XLogRegisterBufData is used to include data associated with a particular
+ buffer that was registered earlier with XLogRegisterBuffer(). If
+ XLogRegisterBufData() is called multiple times with the same block ID, the
+ data are appended, and will be made available to the redo routine as one
+ contiguous chunk.
+
+ If a full-page image of the buffer is taken at insertion, the data is not
+ included in the WAL record, unless the REGBUF_KEEP_DATA flag is used.
+
+
+Writing a REDO routine
+----------------------
+
+A REDO routine uses the data and page references included in the WAL record
+to reconstruct the new state of the page. The record decoding functions
+and macros in xlogreader.c/h can be used to extract the data from the record.
When replaying a WAL record that describes changes on multiple pages, you
must be careful to lock the pages properly to prevent concurrent Hot Standby
@@ -545,23 +613,6 @@ either an exclusive buffer lock or a shared lock plus buffer header lock,
or be writing the data block directly rather than through shared buffers
while holding AccessExclusiveLock on the relation.
-Due to all these constraints, complex changes (such as a multilevel index
-insertion) normally need to be described by a series of atomic-action WAL
-records. The intermediate states must be self-consistent, so that if the
-replay is interrupted between any two actions, the system is fully
-functional. In btree indexes, for example, a page split requires a new page
-to be allocated, and an insertion of a new key in the parent btree level,
-but for locking reasons this has to be reflected by two separate WAL
-records. Replaying the first record, to allocate the new page and move
-tuples to it, sets a flag on the page to indicate that the key has not been
-inserted to the parent yet. Replaying the second record clears the flag.
-This intermediate state is never seen by other backends during normal
-operation, because the lock on the child page is held across the two
-actions, but will be seen if the operation is interrupted before writing
-the second WAL record. The search algorithm works with the intermediate
-state as normal, but if an insertion encounters a page with the
-incomplete-split flag set, it will finish the interrupted split by
-inserting the key to the parent, before proceeding.
Writing Hints
-------------
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 5ee070bd0a9..313bd042404 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -699,13 +699,9 @@ CLOGPagePrecedes(int page1, int page2)
static void
WriteZeroPageXlogRec(int pageno)
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&pageno);
- rdata.len = sizeof(int);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&pageno), sizeof(int));
+ (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
}
/*
@@ -717,14 +713,11 @@ WriteZeroPageXlogRec(int pageno)
static void
WriteTruncateXlogRec(int pageno)
{
- XLogRecData rdata;
XLogRecPtr recptr;
- rdata.data = (char *) (&pageno);
- rdata.len = sizeof(int);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&pageno), sizeof(int));
+ recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
XLogFlush(recptr);
}
@@ -732,12 +725,12 @@ WriteTruncateXlogRec(int pageno)
* CLOG resource manager's routines
*/
void
-clog_redo(XLogRecPtr lsn, XLogRecord *record)
+clog_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in clog records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == CLOG_ZEROPAGE)
{
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 3c20bb37e4c..fff9f837330 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -720,7 +720,6 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
{
MultiXactId multi;
MultiXactOffset offset;
- XLogRecData rdata[2];
xl_multixact_create xlrec;
debug_elog3(DEBUG2, "Create: %s",
@@ -796,17 +795,11 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
* the status flags in one XLogRecData, then all the xids in another one?
* Not clear that it's worth the trouble though.
*/
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = SizeOfMultiXactCreate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
+ XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
- rdata[1].data = (char *) members;
- rdata[1].len = nmembers * sizeof(MultiXactMember);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
-
- (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
+ (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
/* Now enter the information into the OFFSETs and MEMBERs logs */
RecordNewMultiXact(multi, offset, nmembers, members);
@@ -2705,25 +2698,21 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
static void
WriteMZeroPageXlogRec(int pageno, uint8 info)
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&pageno);
- rdata.len = sizeof(int);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&pageno), sizeof(int));
+ (void) XLogInsert(RM_MULTIXACT_ID, info);
}
/*
* MULTIXACT resource manager's routines
*/
void
-multixact_redo(XLogRecPtr lsn, XLogRecord *record)
+multixact_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in multixact records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
{
@@ -2775,7 +2764,7 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
* should be unnecessary, since any XID found here ought to have other
* evidence in the XLOG, but let's be safe.
*/
- max_xid = record->xl_xid;
+ max_xid = XLogRecGetXid(record);
for (i = 0; i < xlrec->nmembers; i++)
{
if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index d23c292edcd..40de84e934e 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -889,14 +889,21 @@ typedef struct TwoPhaseRecordOnDisk
/*
* During prepare, the state file is assembled in memory before writing it
- * to WAL and the actual state file. We use a chain of XLogRecData blocks
- * so that we will be able to pass the state file contents directly to
- * XLogInsert.
+ * to WAL and the actual state file. We use a chain of StateFileChunk blocks
+ * for that.
*/
+typedef struct StateFileChunk
+{
+ char *data;
+ uint32 len;
+ struct StateFileChunk *next;
+} StateFileChunk;
+
static struct xllist
{
- XLogRecData *head; /* first data block in the chain */
- XLogRecData *tail; /* last block in chain */
+ StateFileChunk *head; /* first data block in the chain */
+ StateFileChunk *tail; /* last block in chain */
+ uint32 num_chunks;
uint32 bytes_free; /* free bytes left in tail block */
uint32 total_len; /* total data bytes in chain */
} records;
@@ -917,11 +924,11 @@ save_state_data(const void *data, uint32 len)
if (padlen > records.bytes_free)
{
- records.tail->next = palloc0(sizeof(XLogRecData));
+ records.tail->next = palloc0(sizeof(StateFileChunk));
records.tail = records.tail->next;
- records.tail->buffer = InvalidBuffer;
records.tail->len = 0;
records.tail->next = NULL;
+ records.num_chunks++;
records.bytes_free = Max(padlen, 512);
records.tail->data = palloc(records.bytes_free);
@@ -951,8 +958,7 @@ StartPrepare(GlobalTransaction gxact)
SharedInvalidationMessage *invalmsgs;
/* Initialize linked list */
- records.head = palloc0(sizeof(XLogRecData));
- records.head->buffer = InvalidBuffer;
+ records.head = palloc0(sizeof(StateFileChunk));
records.head->len = 0;
records.head->next = NULL;
@@ -960,6 +966,7 @@ StartPrepare(GlobalTransaction gxact)
records.head->data = palloc(records.bytes_free);
records.tail = records.head;
+ records.num_chunks = 1;
records.total_len = 0;
@@ -1019,7 +1026,7 @@ EndPrepare(GlobalTransaction gxact)
TransactionId xid = pgxact->xid;
TwoPhaseFileHeader *hdr;
char path[MAXPGPATH];
- XLogRecData *record;
+ StateFileChunk *record;
pg_crc32 statefile_crc;
pg_crc32 bogus_crc;
int fd;
@@ -1117,12 +1124,16 @@ EndPrepare(GlobalTransaction gxact)
* We save the PREPARE record's location in the gxact for later use by
* CheckPointTwoPhase.
*/
+ XLogEnsureRecordSpace(0, records.num_chunks);
+
START_CRIT_SECTION();
MyPgXact->delayChkpt = true;
- gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
- records.head);
+ XLogBeginInsert();
+ for (record = records.head; record != NULL; record = record->next)
+ XLogRegisterData(record->data, record->len);
+ gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
XLogFlush(gxact->prepare_lsn);
/* If we crash now, we have prepared: WAL replay will fix things */
@@ -1180,6 +1191,7 @@ EndPrepare(GlobalTransaction gxact)
SyncRepWaitForLSN(gxact->prepare_lsn);
records.tail = records.head = NULL;
+ records.num_chunks = 0;
}
/*
@@ -2071,8 +2083,6 @@ RecordTransactionCommitPrepared(TransactionId xid,
SharedInvalidationMessage *invalmsgs,
bool initfileinval)
{
- XLogRecData rdata[4];
- int lastrdata = 0;
xl_xact_commit_prepared xlrec;
XLogRecPtr recptr;
@@ -2094,39 +2104,24 @@ RecordTransactionCommitPrepared(TransactionId xid,
xlrec.crec.nsubxacts = nchildren;
xlrec.crec.nmsgs = ninvalmsgs;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactCommitPrepared;
- rdata[0].buffer = InvalidBuffer;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitPrepared);
+
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
+
/* dump cache invalidation messages */
if (ninvalmsgs > 0)
- {
- rdata[lastrdata].next = &(rdata[3]);
- rdata[3].data = (char *) invalmsgs;
- rdata[3].len = ninvalmsgs * sizeof(SharedInvalidationMessage);
- rdata[3].buffer = InvalidBuffer;
- lastrdata = 3;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) invalmsgs,
+ ninvalmsgs * sizeof(SharedInvalidationMessage));
- recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata);
+ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED);
/*
* We don't currently try to sleep before flush here ... nor is there any
@@ -2169,8 +2164,6 @@ RecordTransactionAbortPrepared(TransactionId xid,
int nrels,
RelFileNode *rels)
{
- XLogRecData rdata[3];
- int lastrdata = 0;
xl_xact_abort_prepared xlrec;
XLogRecPtr recptr;
@@ -2189,30 +2182,20 @@ RecordTransactionAbortPrepared(TransactionId xid,
xlrec.arec.xact_time = GetCurrentTimestamp();
xlrec.arec.nrels = nrels;
xlrec.arec.nsubxacts = nchildren;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactAbortPrepared;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbortPrepared);
+
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
- recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata);
+ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED);
/* Always flush, since we're about to remove the 2PC state file */
XLogFlush(recptr);
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 6f92bad07ca..763e9deb6f5 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -571,7 +571,6 @@ AssignTransactionId(TransactionState s)
if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS ||
log_unknown_top)
{
- XLogRecData rdata[2];
xl_xact_assignment xlrec;
/*
@@ -582,17 +581,12 @@ AssignTransactionId(TransactionState s)
Assert(TransactionIdIsValid(xlrec.xtop));
xlrec.nsubxacts = nUnreportedXids;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = MinSizeOfXactAssignment;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, MinSizeOfXactAssignment);
+ XLogRegisterData((char *) unreportedXids,
+ nUnreportedXids * sizeof(TransactionId));
- rdata[1].data = (char *) unreportedXids;
- rdata[1].len = nUnreportedXids * sizeof(TransactionId);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
-
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT);
nUnreportedXids = 0;
/* mark top, not current xact as having been logged */
@@ -1087,8 +1081,6 @@ RecordTransactionCommit(void)
if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
XLogLogicalInfoActive())
{
- XLogRecData rdata[4];
- int lastrdata = 0;
xl_xact_commit xlrec;
/*
@@ -1107,63 +1099,38 @@ RecordTransactionCommit(void)
xlrec.nrels = nrels;
xlrec.nsubxacts = nchildren;
xlrec.nmsgs = nmsgs;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactCommit;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommit);
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels,
+ nrels * sizeof(RelFileNode));
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
/* dump shared cache invalidation messages */
if (nmsgs > 0)
- {
- rdata[lastrdata].next = &(rdata[3]);
- rdata[3].data = (char *) invalMessages;
- rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
- rdata[3].buffer = InvalidBuffer;
- lastrdata = 3;
- }
- rdata[lastrdata].next = NULL;
-
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+ XLogRegisterData((char *) invalMessages,
+ nmsgs * sizeof(SharedInvalidationMessage));
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT);
}
else
{
- XLogRecData rdata[2];
- int lastrdata = 0;
xl_xact_commit_compact xlrec;
xlrec.xact_time = xactStopTimestamp;
xlrec.nsubxacts = nchildren;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactCommitCompact;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitCompact);
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) children;
- rdata[1].len = nchildren * sizeof(TransactionId);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT);
}
}
@@ -1436,8 +1403,6 @@ RecordTransactionAbort(bool isSubXact)
RelFileNode *rels;
int nchildren;
TransactionId *children;
- XLogRecData rdata[3];
- int lastrdata = 0;
xl_xact_abort xlrec;
/*
@@ -1486,30 +1451,20 @@ RecordTransactionAbort(bool isSubXact)
}
xlrec.nrels = nrels;
xlrec.nsubxacts = nchildren;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactAbort;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbort);
+
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT);
/*
* Report the latest async abort LSN, so that the WAL writer knows to
@@ -2351,6 +2306,9 @@ AbortTransaction(void)
AbortBufferIO();
UnlockBuffers();
+ /* Reset WAL record construction state */
+ XLogResetInsertion();
+
/*
* Also clean up any open wait for lock, since the lock manager will choke
* if we try to wait for another lock before doing this.
@@ -4299,6 +4257,9 @@ AbortSubTransaction(void)
AbortBufferIO();
UnlockBuffers();
+ /* Reset WAL record construction state */
+ XLogResetInsertion();
+
/*
* Also clean up any open wait for lock, since the lock manager will choke
* if we try to wait for another lock before doing this.
@@ -4938,42 +4899,42 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
}
void
-xact_redo(XLogRecPtr lsn, XLogRecord *record)
+xact_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in xact records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_XACT_COMMIT_COMPACT)
{
xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
- xact_redo_commit_compact(xlrec, record->xl_xid, lsn);
+ xact_redo_commit_compact(xlrec, XLogRecGetXid(record), record->EndRecPtr);
}
else if (info == XLOG_XACT_COMMIT)
{
xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
- xact_redo_commit(xlrec, record->xl_xid, lsn);
+ xact_redo_commit(xlrec, XLogRecGetXid(record), record->EndRecPtr);
}
else if (info == XLOG_XACT_ABORT)
{
xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
- xact_redo_abort(xlrec, record->xl_xid);
+ xact_redo_abort(xlrec, XLogRecGetXid(record));
}
else if (info == XLOG_XACT_PREPARE)
{
/* the record contents are exactly the 2PC file */
- RecreateTwoPhaseFile(record->xl_xid,
- XLogRecGetData(record), record->xl_len);
+ RecreateTwoPhaseFile(XLogRecGetXid(record),
+ XLogRecGetData(record), XLogRecGetDataLen(record));
}
else if (info == XLOG_XACT_COMMIT_PREPARED)
{
xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
- xact_redo_commit(&xlrec->crec, xlrec->xid, lsn);
+ xact_redo_commit(&xlrec->crec, xlrec->xid, record->EndRecPtr);
RemoveTwoPhaseFile(xlrec->xid, false);
}
else if (info == XLOG_XACT_ABORT_PREPARED)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 60531277dc6..2059bbeda4a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -757,10 +757,10 @@ static MemoryContext walDebugCxt = NULL;
static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
-static bool recoveryStopsBefore(XLogRecord *record);
-static bool recoveryStopsAfter(XLogRecord *record);
+static bool recoveryStopsBefore(XLogReaderState *record);
+static bool recoveryStopsAfter(XLogReaderState *record);
static void recoveryPausesHere(void);
-static bool recoveryApplyDelay(XLogRecord *record);
+static bool recoveryApplyDelay(XLogReaderState *record);
static void SetLatestXTime(TimestampTz xtime);
static void SetCurrentChunkStartTime(TimestampTz xtime);
static void CheckRequiredParameterValues(void);
@@ -807,9 +807,9 @@ static char *str_time(pg_time_t tnow);
static bool CheckForStandbyTrigger(void);
#ifdef WAL_DEBUG
-static void xlog_outrec(StringInfo buf, XLogRecord *record);
+static void xlog_outrec(StringInfo buf, XLogReaderState *record);
#endif
-static void xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record);
+static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
@@ -861,7 +861,6 @@ XLogRecPtr
XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
{
XLogCtlInsert *Insert = &XLogCtl->Insert;
- XLogRecData *rdt;
pg_crc32 rdata_crc;
bool inserted;
XLogRecord *rechdr = (XLogRecord *) rdata->data;
@@ -870,28 +869,13 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
XLogRecPtr StartPos;
XLogRecPtr EndPos;
+ /* we assume that all of the record header is in the first chunk */
+ Assert(rdata->len >= SizeOfXLogRecord);
+
/* cross-check on whether we should be here or not */
if (!XLogInsertAllowed())
elog(ERROR, "cannot make new WAL entries during recovery");
- /*
- * Calculate CRC of the data, including all the backup blocks
- *
- * Note that the record header isn't added into the CRC initially since we
- * don't know the prev-link yet. Thus, the CRC will represent the CRC of
- * the whole record in the order: rdata, then backup blocks, then record
- * header.
- */
- INIT_CRC32C(rdata_crc);
- for (rdt = rdata->next; rdt != NULL; rdt = rdt->next)
- COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
-
- /*
- * Calculate CRC of the header, except for prev-link, because we don't
- * know it yet. It will be added later.
- */
- COMP_CRC32C(rdata_crc, ((char *) rechdr), offsetof(XLogRecord, xl_prev));
-
/*----------
*
* We have now done all the preparatory work we can without holding a
@@ -976,10 +960,11 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
if (inserted)
{
/*
- * Now that xl_prev has been filled in, finish CRC calculation of the
- * record header.
+ * Now that xl_prev has been filled in, calculate CRC of the record
+ * header.
*/
- COMP_CRC32C(rdata_crc, ((char *) &rechdr->xl_prev), sizeof(XLogRecPtr));
+ rdata_crc = rechdr->xl_crc;
+ COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(rdata_crc);
rechdr->xl_crc = rdata_crc;
@@ -1053,34 +1038,47 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
{
+ static XLogReaderState *debug_reader = NULL;
StringInfoData buf;
- MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
+ StringInfoData recordBuf;
+ char *errormsg = NULL;
+ MemoryContext oldCxt;
+
+ oldCxt = MemoryContextSwitchTo(walDebugCxt);
initStringInfo(&buf);
appendStringInfo(&buf, "INSERT @ %X/%X: ",
(uint32) (EndPos >> 32), (uint32) EndPos);
- xlog_outrec(&buf, rechdr);
- if (rdata->data != NULL)
- {
- StringInfoData recordbuf;
- /*
- * We have to piece together the WAL record data from the
- * XLogRecData entries, so that we can pass it to the rm_desc
- * function as one contiguous chunk.
- */
- initStringInfo(&recordbuf);
- appendBinaryStringInfo(&recordbuf, (char *) rechdr, sizeof(XLogRecord));
- for (; rdata != NULL; rdata = rdata->next)
- appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
+ /*
+ * We have to piece together the WAL record data from the XLogRecData
+ * entries, so that we can pass it to the rm_desc function as one
+ * contiguous chunk.
+ */
+ initStringInfo(&recordBuf);
+ for (; rdata != NULL; rdata = rdata->next)
+ appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
+
+ if (!debug_reader)
+ debug_reader = XLogReaderAllocate(NULL, NULL);
+ if (!debug_reader ||
+ !DecodeXLogRecord(debug_reader, (XLogRecord *) recordBuf.data,
+ &errormsg))
+ {
+ appendStringInfo(&buf, "error decoding record: %s",
+ errormsg ? errormsg : "no error message");
+ }
+ else
+ {
appendStringInfoString(&buf, " - ");
- xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data);
+ xlog_outdesc(&buf, debug_reader);
}
elog(LOG, "%s", buf.data);
+ pfree(buf.data);
+ pfree(recordBuf.data);
MemoryContextSwitchTo(oldCxt);
- MemoryContextReset(walDebugCxt);
}
#endif
@@ -1170,7 +1168,7 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
uint64 startbytepos;
uint64 endbytepos;
uint64 prevbytepos;
- uint32 size = SizeOfXLogRecord;
+ uint32 size = MAXALIGN(SizeOfXLogRecord);
XLogRecPtr ptr;
uint32 segleft;
@@ -1234,9 +1232,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
XLogRecPtr CurrPos;
XLogPageHeader pagehdr;
- /* The first chunk is the record header */
- Assert(rdata->len == SizeOfXLogRecord);
-
/*
* Get a pointer to the right place in the right WAL buffer to start
* inserting to.
@@ -1309,9 +1304,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
}
Assert(written == write_len);
- /* Align the end position, so that the next record starts aligned */
- CurrPos = MAXALIGN64(CurrPos);
-
/*
* If this was an xlog-switch, it's not enough to write the switch record,
* we also have to consume all the remaining space in the WAL segment. We
@@ -1341,6 +1333,11 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
CurrPos += XLOG_BLCKSZ;
}
}
+ else
+ {
+ /* Align the end position, so that the next record starts aligned */
+ CurrPos = MAXALIGN64(CurrPos);
+ }
if (CurrPos != EndPos)
elog(PANIC, "space reserved for WAL record does not match what was written");
@@ -4470,6 +4467,7 @@ BootStrapXLOG(void)
XLogPageHeader page;
XLogLongPageHeader longpage;
XLogRecord *record;
+ char *recptr;
bool use_existent;
uint64 sysidentifier;
struct timeval tv;
@@ -4541,17 +4539,23 @@ BootStrapXLOG(void)
longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
/* Insert the initial checkpoint record */
- record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
+ recptr = ((char *) page + SizeOfXLogLongPHD);
+ record = (XLogRecord *) recptr;
record->xl_prev = 0;
record->xl_xid = InvalidTransactionId;
- record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint);
- record->xl_len = sizeof(checkPoint);
+ record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
record->xl_rmid = RM_XLOG_ID;
- memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
+ recptr += SizeOfXLogRecord;
+ /* fill the XLogRecordDataHeaderShort struct */
+ *(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
+ *(recptr++) = sizeof(checkPoint);
+ memcpy(recptr, &checkPoint, sizeof(checkPoint));
+ recptr += sizeof(checkPoint);
+ Assert(recptr - (char *) record == record->xl_tot_len);
INIT_CRC32C(crc);
- COMP_CRC32C(crc, &checkPoint, sizeof(checkPoint));
+ COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc);
record->xl_crc = crc;
@@ -4984,36 +4988,37 @@ exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo)
* timestamps.
*/
static bool
-getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime)
+getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
{
- uint8 record_info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+ uint8 rmid = XLogRecGetRmid(record);
- if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
+ if (rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
{
*recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
return true;
}
- if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
+ if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
{
*recordXtime = ((xl_xact_commit_compact *) XLogRecGetData(record))->xact_time;
return true;
}
- if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
+ if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
{
*recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
return true;
}
- if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
+ if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
{
*recordXtime = ((xl_xact_commit_prepared *) XLogRecGetData(record))->crec.xact_time;
return true;
}
- if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
+ if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
{
*recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
return true;
}
- if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
+ if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
{
*recordXtime = ((xl_xact_abort_prepared *) XLogRecGetData(record))->arec.xact_time;
return true;
@@ -5030,7 +5035,7 @@ getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime)
* new timeline's history file.
*/
static bool
-recoveryStopsBefore(XLogRecord *record)
+recoveryStopsBefore(XLogReaderState *record)
{
bool stopsHere = false;
uint8 record_info;
@@ -5052,14 +5057,14 @@ recoveryStopsBefore(XLogRecord *record)
}
/* Otherwise we only consider stopping before COMMIT or ABORT records. */
- if (record->xl_rmid != RM_XACT_ID)
+ if (XLogRecGetRmid(record) != RM_XACT_ID)
return false;
- record_info = record->xl_info & ~XLR_INFO_MASK;
+ record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (record_info == XLOG_XACT_COMMIT_COMPACT || record_info == XLOG_XACT_COMMIT)
{
isCommit = true;
- recordXid = record->xl_xid;
+ recordXid = XLogRecGetXid(record);
}
else if (record_info == XLOG_XACT_COMMIT_PREPARED)
{
@@ -5069,7 +5074,7 @@ recoveryStopsBefore(XLogRecord *record)
else if (record_info == XLOG_XACT_ABORT)
{
isCommit = false;
- recordXid = record->xl_xid;
+ recordXid = XLogRecGetXid(record);
}
else if (record_info == XLOG_XACT_ABORT_PREPARED)
{
@@ -5140,19 +5145,21 @@ recoveryStopsBefore(XLogRecord *record)
* record in XLogCtl->recoveryLastXTime.
*/
static bool
-recoveryStopsAfter(XLogRecord *record)
+recoveryStopsAfter(XLogReaderState *record)
{
uint8 record_info;
+ uint8 rmid;
TimestampTz recordXtime;
- record_info = record->xl_info & ~XLR_INFO_MASK;
+ record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+ rmid = XLogRecGetRmid(record);
/*
* There can be many restore points that share the same name; we stop at
* the first one.
*/
if (recoveryTarget == RECOVERY_TARGET_NAME &&
- record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
+ rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
{
xl_restore_point *recordRestorePointData;
@@ -5173,7 +5180,7 @@ recoveryStopsAfter(XLogRecord *record)
}
}
- if (record->xl_rmid == RM_XACT_ID &&
+ if (rmid == RM_XACT_ID &&
(record_info == XLOG_XACT_COMMIT_COMPACT ||
record_info == XLOG_XACT_COMMIT ||
record_info == XLOG_XACT_COMMIT_PREPARED ||
@@ -5192,7 +5199,7 @@ recoveryStopsAfter(XLogRecord *record)
else if (record_info == XLOG_XACT_ABORT_PREPARED)
recordXid = ((xl_xact_abort_prepared *) XLogRecGetData(record))->xid;
else
- recordXid = record->xl_xid;
+ recordXid = XLogRecGetXid(record);
/*
* There can be only one transaction end record with this exact
@@ -5307,7 +5314,7 @@ SetRecoveryPause(bool recoveryPause)
* usability.
*/
static bool
-recoveryApplyDelay(XLogRecord *record)
+recoveryApplyDelay(XLogReaderState *record)
{
uint8 record_info;
TimestampTz xtime;
@@ -5326,8 +5333,8 @@ recoveryApplyDelay(XLogRecord *record)
* so there is already opportunity for issues caused by early conflicts on
* standbys.
*/
- record_info = record->xl_info & ~XLR_INFO_MASK;
- if (!(record->xl_rmid == RM_XACT_ID &&
+ record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+ if (!(XLogRecGetRmid(record) == RM_XACT_ID &&
(record_info == XLOG_XACT_COMMIT_COMPACT ||
record_info == XLOG_XACT_COMMIT ||
record_info == XLOG_XACT_COMMIT_PREPARED)))
@@ -5696,7 +5703,7 @@ StartupXLOG(void)
record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
if (record != NULL)
{
- memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
+ memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
ereport(DEBUG1,
(errmsg("checkpoint record is at %X/%X",
@@ -5793,7 +5800,7 @@ StartupXLOG(void)
ereport(PANIC,
(errmsg("could not locate a valid checkpoint record")));
}
- memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
+ memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
}
@@ -6230,9 +6237,9 @@ StartupXLOG(void)
appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
(uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
(uint32) (EndRecPtr >> 32), (uint32) EndRecPtr);
- xlog_outrec(&buf, record);
+ xlog_outrec(&buf, xlogreader);
appendStringInfoString(&buf, " - ");
- xlog_outdesc(&buf, record->xl_rmid, record);
+ xlog_outdesc(&buf, xlogreader);
elog(LOG, "%s", buf.data);
pfree(buf.data);
}
@@ -6260,7 +6267,7 @@ StartupXLOG(void)
/*
* Have we reached our recovery target?
*/
- if (recoveryStopsBefore(record))
+ if (recoveryStopsBefore(xlogreader))
{
reachedStopPoint = true; /* see below */
break;
@@ -6270,7 +6277,7 @@ StartupXLOG(void)
* If we've been asked to lag the master, wait on latch until
* enough time has passed.
*/
- if (recoveryApplyDelay(record))
+ if (recoveryApplyDelay(xlogreader))
{
/*
* We test for paused recovery again here. If user sets
@@ -6285,7 +6292,7 @@ StartupXLOG(void)
/* Setup error traceback support for ereport() */
errcallback.callback = rm_redo_error_callback;
- errcallback.arg = (void *) record;
+ errcallback.arg = (void *) xlogreader;
errcallback.previous = error_context_stack;
error_context_stack = &errcallback;
@@ -6324,7 +6331,7 @@ StartupXLOG(void)
{
CheckPoint checkPoint;
- memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
+ memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
newTLI = checkPoint.ThisTimeLineID;
prevTLI = checkPoint.PrevTimeLineID;
}
@@ -6332,7 +6339,7 @@ StartupXLOG(void)
{
xl_end_of_recovery xlrec;
- memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
+ memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
newTLI = xlrec.ThisTimeLineID;
prevTLI = xlrec.PrevTimeLineID;
}
@@ -6366,7 +6373,7 @@ StartupXLOG(void)
RecordKnownAssignedTransactionIds(record->xl_xid);
/* Now apply the WAL record itself */
- RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
+ RmgrTable[record->xl_rmid].rm_redo(xlogreader);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
@@ -6394,7 +6401,7 @@ StartupXLOG(void)
WalSndWakeup();
/* Exit loop if we reached inclusive recovery target */
- if (recoveryStopsAfter(record))
+ if (recoveryStopsAfter(xlogreader))
{
reachedStopPoint = true;
break;
@@ -7148,8 +7155,7 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
}
return NULL;
}
- if (record->xl_len != sizeof(CheckPoint) ||
- record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint))
+ if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
{
switch (whichChkpt)
{
@@ -7194,6 +7200,9 @@ InitXLOGAccess(void)
(void) GetRedoRecPtr();
/* Also update our copy of doPageWrites. */
doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
+
+ /* Also initialize the working areas for constructing WAL records */
+ InitXLogInsert();
}
/*
@@ -7490,7 +7499,6 @@ CreateCheckPoint(int flags)
CheckPoint checkPoint;
XLogRecPtr recptr;
XLogCtlInsert *Insert = &XLogCtl->Insert;
- XLogRecData rdata;
uint32 freespace;
XLogSegNo _logSegNo;
XLogRecPtr curInsert;
@@ -7760,15 +7768,11 @@ CreateCheckPoint(int flags)
/*
* Now insert the checkpoint record into XLOG.
*/
- rdata.data = (char *) (&checkPoint);
- rdata.len = sizeof(checkPoint);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
recptr = XLogInsert(RM_XLOG_ID,
shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
- XLOG_CHECKPOINT_ONLINE,
- &rdata);
+ XLOG_CHECKPOINT_ONLINE);
XLogFlush(recptr);
@@ -7908,7 +7912,6 @@ static void
CreateEndOfRecoveryRecord(void)
{
xl_end_of_recovery xlrec;
- XLogRecData rdata;
XLogRecPtr recptr;
/* sanity check */
@@ -7926,12 +7929,9 @@ CreateEndOfRecoveryRecord(void)
START_CRIT_SECTION();
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xl_end_of_recovery);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
- recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
XLogFlush(recptr);
@@ -8307,13 +8307,9 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
void
XLogPutNextOid(Oid nextOid)
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&nextOid);
- rdata.len = sizeof(Oid);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&nextOid), sizeof(Oid));
+ (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
/*
* We need not flush the NEXTOID record immediately, because any of the
@@ -8349,15 +8345,10 @@ XLogRecPtr
RequestXLogSwitch(void)
{
XLogRecPtr RecPtr;
- XLogRecData rdata;
-
- /* XLOG SWITCH, alone among xlog record types, has no data */
- rdata.buffer = InvalidBuffer;
- rdata.data = NULL;
- rdata.len = 0;
- rdata.next = NULL;
- RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, &rdata);
+ /* XLOG SWITCH has no data */
+ XLogBeginInsert();
+ RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
return RecPtr;
}
@@ -8369,18 +8360,15 @@ XLogRecPtr
XLogRestorePoint(const char *rpName)
{
XLogRecPtr RecPtr;
- XLogRecData rdata;
xl_restore_point xlrec;
xlrec.rp_time = GetCurrentTimestamp();
strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xl_restore_point);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
- RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, &rdata);
+ RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
ereport(LOG,
(errmsg("restore point \"%s\" created at %X/%X",
@@ -8412,7 +8400,6 @@ XLogReportParameters(void)
*/
if (wal_level != ControlFile->wal_level || XLogIsNeeded())
{
- XLogRecData rdata;
xl_parameter_change xlrec;
XLogRecPtr recptr;
@@ -8423,12 +8410,10 @@ XLogReportParameters(void)
xlrec.wal_level = wal_level;
xlrec.wal_log_hints = wal_log_hints;
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xlrec);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
- recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, &rdata);
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
XLogFlush(recptr);
}
@@ -8486,14 +8471,10 @@ UpdateFullPageWrites(void)
*/
if (XLogStandbyInfoActive() && !RecoveryInProgress())
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&fullPageWrites);
- rdata.len = sizeof(bool);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
- XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
+ XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
}
if (!fullPageWrites)
@@ -8558,12 +8539,13 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI)
* not all record types are related to control file updates.
*/
void
-xlog_redo(XLogRecPtr lsn, XLogRecord *record)
+xlog_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+ XLogRecPtr lsn = record->EndRecPtr;
- /* Backup blocks are not used by XLOG rmgr */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ /* in XLOG rmgr, backup blocks are only used by XLOG_FPI records */
+ Assert(!XLogRecHasAnyBlockRefs(record) || info == XLOG_FPI);
if (info == XLOG_NEXTOID)
{
@@ -8750,14 +8732,12 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
}
else if (info == XLOG_FPI)
{
- char *data;
- BkpBlock bkpb;
+ Buffer buffer;
/*
- * Full-page image (FPI) records contain a backup block stored
- * "inline" in the normal data since the locking when writing hint
- * records isn't sufficient to use the normal backup block mechanism,
- * which assumes exclusive lock on the buffer supplied.
+ * Full-page image (FPI) records contain nothing else but a backup
+ * block. The block reference must include a full-page image -
+ * otherwise there would be no point in this record.
*
* Since the only change in these backup block are hint bits, there
* are no recovery conflicts generated.
@@ -8766,11 +8746,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
* smgr implementation has no need to implement anything. Which means
* nothing is needed in md.c etc
*/
- data = XLogRecGetData(record);
- memcpy(&bkpb, data, sizeof(BkpBlock));
- data += sizeof(BkpBlock);
-
- RestoreBackupBlockContents(lsn, bkpb, data, false, false);
+ if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
+ elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
+ UnlockReleaseBuffer(buffer);
}
else if (info == XLOG_BACKUP_END)
{
@@ -8867,22 +8845,42 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
#ifdef WAL_DEBUG
static void
-xlog_outrec(StringInfo buf, XLogRecord *record)
+xlog_outrec(StringInfo buf, XLogReaderState *record)
{
- int i;
+ int block_id;
appendStringInfo(buf, "prev %X/%X; xid %u",
- (uint32) (record->xl_prev >> 32),
- (uint32) record->xl_prev,
- record->xl_xid);
+ (uint32) (XLogRecGetPrev(record) >> 32),
+ (uint32) XLogRecGetPrev(record),
+ XLogRecGetXid(record));
appendStringInfo(buf, "; len %u",
- record->xl_len);
+ XLogRecGetDataLen(record));
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+ /* decode block references */
+ for (block_id = 0; block_id <= record->max_block_id; block_id++)
{
- if (record->xl_info & XLR_BKP_BLOCK(i))
- appendStringInfo(buf, "; bkpb%d", i);
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blk;
+
+ if (!XLogRecHasBlockRef(record, block_id))
+ continue;
+
+ XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+ if (forknum != MAIN_FORKNUM)
+ appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, fork %u, blk %u",
+ block_id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forknum,
+ blk);
+ else
+ appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, blk %u",
+ block_id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ blk);
+ if (XLogRecHasBlockImage(record, block_id))
+ appendStringInfo(buf, " FPW");
}
}
#endif /* WAL_DEBUG */
@@ -8892,17 +8890,18 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
* optionally followed by a colon, a space, and a further description.
*/
static void
-xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record)
+xlog_outdesc(StringInfo buf, XLogReaderState *record)
{
+ RmgrId rmid = XLogRecGetRmid(record);
+ uint8 info = XLogRecGetInfo(record);
const char *id;
appendStringInfoString(buf, RmgrTable[rmid].rm_name);
appendStringInfoChar(buf, '/');
- id = RmgrTable[rmid].rm_identify(record->xl_info);
+ id = RmgrTable[rmid].rm_identify(info);
if (id == NULL)
- appendStringInfo(buf, "UNKNOWN (%X): ",
- record->xl_info & ~XLR_INFO_MASK);
+ appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
else
appendStringInfo(buf, "%s: ", id);
@@ -9411,7 +9410,6 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
XLogRecPtr startpoint;
XLogRecPtr stoppoint;
TimeLineID stoptli;
- XLogRecData rdata;
pg_time_t stamp_time;
char strfbuf[128];
char histfilepath[MAXPGPATH];
@@ -9618,11 +9616,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
/*
* Write the backup-end xlog record
*/
- rdata.data = (char *) (&startpoint);
- rdata.len = sizeof(startpoint);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
+ stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
stoptli = ThisTimeLineID;
/*
@@ -9930,15 +9926,13 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
static void
rm_redo_error_callback(void *arg)
{
- XLogRecord *record = (XLogRecord *) arg;
+ XLogReaderState *record = (XLogReaderState *) arg;
StringInfoData buf;
initStringInfo(&buf);
- xlog_outdesc(&buf, record->xl_rmid, record);
+ xlog_outdesc(&buf, record);
- /* don't bother emitting empty description */
- if (buf.len > 0)
- errcontext("xlog redo %s", buf.data);
+ errcontext("xlog redo %s", buf.data);
pfree(buf.data);
}
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index b83343bf5bd..89c407e521b 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -3,6 +3,12 @@
* xloginsert.c
* Functions for constructing WAL records
*
+ * Constructing a WAL record begins with a call to XLogBeginInsert,
+ * followed by a number of XLogRegister* calls. The registered data is
+ * collected in private working memory, and finally assembled into a chain
+ * of XLogRecData structs by a call to XLogRecordAssemble(). See
+ * access/transam/README for details.
+ *
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -24,39 +30,366 @@
#include "utils/memutils.h"
#include "pg_trace.h"
+/*
+ * For each block reference registered with XLogRegisterBuffer, we fill in
+ * a registered_buffer struct.
+ */
+typedef struct
+{
+ bool in_use; /* is this slot in use? */
+ uint8 flags; /* REGBUF_* flags */
+ RelFileNode rnode; /* identifies the relation and block */
+ ForkNumber forkno;
+ BlockNumber block;
+ Page page; /* page content */
+ uint32 rdata_len; /* total length of data in rdata chain */
+ XLogRecData *rdata_head; /* head of the chain of data registered with
+ * this block */
+ XLogRecData *rdata_tail; /* last entry in the chain, or &rdata_head if
+ * empty */
+
+ XLogRecData bkp_rdatas[2]; /* temporary rdatas used to hold references to
+ * backup block data in XLogRecordAssemble() */
+} registered_buffer;
+
+static registered_buffer *registered_buffers;
+static int max_registered_buffers; /* allocated size */
+static int max_registered_block_id = 0; /* highest block_id + 1
+ * currently registered */
+
+/*
+ * A chain of XLogRecDatas to hold the "main data" of a WAL record, registered
+ * with XLogRegisterData(...).
+ */
+static XLogRecData *mainrdata_head;
+static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
+static uint32 mainrdata_len; /* total # of bytes in chain */
+
+/*
+ * These are used to hold the record header while constructing a record.
+ * 'hdr_scratch' is not a plain variable, but is palloc'd at initialization,
+ * because we want it to be MAXALIGNed and padding bytes zeroed.
+ *
+ * For simplicity, it's allocated large enough to hold the headers for any
+ * WAL record.
+ */
+static XLogRecData hdr_rdt;
+static char *hdr_scratch = NULL;
+
+#define HEADER_SCRATCH_SIZE \
+ (SizeOfXLogRecord + \
+ MaxSizeOfXLogRecordBlockHeader * (XLR_MAX_BLOCK_ID + 1) + \
+ SizeOfXLogRecordDataHeaderLong)
+
+/*
+ * An array of XLogRecData structs, to hold registered data.
+ */
+static XLogRecData *rdatas;
+static int num_rdatas; /* entries currently used */
+static int max_rdatas; /* allocated size */
+
+static bool begininsert_called = false;
+
+/* Memory context to hold the registered buffer and data references. */
+static MemoryContext xloginsert_cxt;
+
static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
- XLogRecData *rdata,
XLogRecPtr RedoRecPtr, bool doPageWrites,
- XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal);
-static void XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb);
+ XLogRecPtr *fpw_lsn);
+
+/*
+ * Begin constructing a WAL record. This must be called before the
+ * XLogRegister* functions and XLogInsert().
+ */
+void
+XLogBeginInsert(void)
+{
+ Assert(max_registered_block_id == 0);
+ Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
+ Assert(mainrdata_len == 0);
+ Assert(!begininsert_called);
+
+ /* cross-check on whether we should be here or not */
+ if (!XLogInsertAllowed())
+ elog(ERROR, "cannot make new WAL entries during recovery");
+
+ begininsert_called = true;
+}
/*
- * Insert an XLOG record having the specified RMID and info bytes,
- * with the body of the record being the data chunk(s) described by
- * the rdata chain (see xloginsert.h for notes about rdata).
+ * Ensure that there are enough buffer and data slots in the working area,
+ * for subsequent XLogRegisterBuffer, XLogRegisterData and XLogRegisterBufData
+ * calls.
+ *
+ * There is always space for a small number of buffers and data chunks, enough
+ * for most record types. This function is for the exceptional cases that need
+ * more.
+ */
+void
+XLogEnsureRecordSpace(int max_block_id, int ndatas)
+{
+ int nbuffers;
+
+ /*
+ * This must be called before entering a critical section, because
+ * allocating memory inside a critical section can fail. repalloc() will
+ * check the same, but better to check it here too so that we fail
+ * consistently even if the arrays happen to be large enough already.
+ */
+ Assert(CritSectionCount == 0);
+
+ /* the minimum values can't be decreased */
+ if (max_block_id < XLR_NORMAL_MAX_BLOCK_ID)
+ max_block_id = XLR_NORMAL_MAX_BLOCK_ID;
+ if (ndatas < XLR_NORMAL_RDATAS)
+ ndatas = XLR_NORMAL_RDATAS;
+
+ if (max_block_id > XLR_MAX_BLOCK_ID)
+ elog(ERROR, "maximum number of WAL record block references exceeded");
+ nbuffers = max_block_id + 1;
+
+ if (nbuffers > max_registered_buffers)
+ {
+ registered_buffers = (registered_buffer *)
+ repalloc(registered_buffers, sizeof(registered_buffer) * nbuffers);
+
+ /*
+ * At least the padding bytes in the structs must be zeroed, because
+ * they are included in WAL data, but initialize it all for tidiness.
+ */
+ MemSet(&registered_buffers[max_registered_buffers], 0,
+ (nbuffers - max_registered_buffers) * sizeof(registered_buffer));
+ max_registered_buffers = nbuffers;
+ }
+
+ if (ndatas > max_rdatas)
+ {
+ rdatas = (XLogRecData *) repalloc(rdatas, sizeof(XLogRecData) * ndatas);
+ max_rdatas = ndatas;
+ }
+}
+
+/*
+ * Reset WAL record construction buffers.
+ */
+void
+XLogResetInsertion(void)
+{
+ int i;
+
+ for (i = 0; i < max_registered_block_id; i++)
+ registered_buffers[i].in_use = false;
+
+ num_rdatas = 0;
+ max_registered_block_id = 0;
+ mainrdata_len = 0;
+ mainrdata_last = (XLogRecData *) &mainrdata_head;
+ begininsert_called = false;
+}
+
+/*
+ * Register a reference to a buffer with the WAL record being constructed.
+ * This must be called for every page that the WAL-logged operation modifies.
+ */
+void
+XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
+{
+ registered_buffer *regbuf;
+
+ /* NO_IMAGE doesn't make sense with FORCE_IMAGE */
+ Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
+ Assert(begininsert_called);
+
+ if (block_id >= max_registered_block_id)
+ {
+ if (block_id >= max_registered_buffers)
+ elog(ERROR, "too many registered buffers");
+ max_registered_block_id = block_id + 1;
+ }
+
+ regbuf = &registered_buffers[block_id];
+
+ BufferGetTag(buffer, &regbuf->rnode, &regbuf->forkno, &regbuf->block);
+ regbuf->page = BufferGetPage(buffer);
+ regbuf->flags = flags;
+ regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
+ regbuf->rdata_len = 0;
+
+ /*
+ * Check that this page hasn't already been registered with some other
+ * block_id.
+ */
+#ifdef USE_ASSERT_CHECKING
+ {
+ int i;
+
+ for (i = 0; i < max_registered_block_id; i++)
+ {
+ registered_buffer *regbuf_old = &registered_buffers[i];
+
+ if (i == block_id || !regbuf_old->in_use)
+ continue;
+
+ Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
+ regbuf_old->forkno != regbuf->forkno ||
+ regbuf_old->block != regbuf->block);
+ }
+ }
+#endif
+
+ regbuf->in_use = true;
+}
+
+/*
+ * Like XLogRegisterBuffer, but for registering a block that's not in the
+ * shared buffer pool (i.e. when you don't have a Buffer for it).
+ */
+void
+XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
+ BlockNumber blknum, Page page, uint8 flags)
+{
+ registered_buffer *regbuf;
+
+ /* This is currently only used to WAL-log a full-page image of a page */
+ Assert(flags & REGBUF_FORCE_IMAGE);
+ Assert(begininsert_called);
+
+ if (block_id >= max_registered_block_id)
+ max_registered_block_id = block_id + 1;
+
+ if (block_id >= max_registered_buffers)
+ elog(ERROR, "too many registered buffers");
+
+ regbuf = &registered_buffers[block_id];
+
+ regbuf->rnode = *rnode;
+ regbuf->forkno = forknum;
+ regbuf->block = blknum;
+ regbuf->page = page;
+ regbuf->flags = flags;
+ regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
+ regbuf->rdata_len = 0;
+
+ /*
+ * Check that this page hasn't already been registered with some other
+ * block_id.
+ */
+#ifdef USE_ASSERT_CHECKING
+ {
+ int i;
+
+ for (i = 0; i < max_registered_block_id; i++)
+ {
+ registered_buffer *regbuf_old = &registered_buffers[i];
+
+ if (i == block_id || !regbuf_old->in_use)
+ continue;
+
+ Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
+ regbuf_old->forkno != regbuf->forkno ||
+ regbuf_old->block != regbuf->block);
+ }
+ }
+#endif
+
+ regbuf->in_use = true;
+}
+
+/*
+ * Add data to the WAL record that's being constructed.
+ *
+ * The data is appended to the "main chunk", available at replay with
+ * XLogGetRecData().
+ */
+void
+XLogRegisterData(char *data, int len)
+{
+ XLogRecData *rdata;
+
+ Assert(begininsert_called);
+
+ if (num_rdatas >= max_rdatas)
+ elog(ERROR, "too much WAL data");
+ rdata = &rdatas[num_rdatas++];
+
+ rdata->data = data;
+ rdata->len = len;
+
+ /*
+ * we use the mainrdata_last pointer to track the end of the chain, so no
+ * need to clear 'next' here.
+ */
+
+ mainrdata_last->next = rdata;
+ mainrdata_last = rdata;
+
+ mainrdata_len += len;
+}
+
+/*
+ * Add buffer-specific data to the WAL record that's being constructed.
+ *
+ * Block_id must reference a block previously registered with
+ * XLogRegisterBuffer(). If this is called more than once for the same
+ * block_id, the data is appended.
+ *
+ * The maximum amount of data that can be registered per block is 65535
+ * bytes. That should be plenty; if you need more than BLCKSZ bytes to
+ * reconstruct the changes to the page, you might as well just log a full
+ * copy of it. (the "main data" that's not associated with a block is not
+ * limited)
+ */
+void
+XLogRegisterBufData(uint8 block_id, char *data, int len)
+{
+ registered_buffer *regbuf;
+ XLogRecData *rdata;
+
+ Assert(begininsert_called);
+
+ /* find the registered buffer struct */
+ regbuf = &registered_buffers[block_id];
+ if (!regbuf->in_use)
+ elog(ERROR, "no block with id %d registered with WAL insertion",
+ block_id);
+
+ if (num_rdatas >= max_rdatas)
+ elog(ERROR, "too much WAL data");
+ rdata = &rdatas[num_rdatas++];
+
+ rdata->data = data;
+ rdata->len = len;
+
+ regbuf->rdata_tail->next = rdata;
+ regbuf->rdata_tail = rdata;
+ regbuf->rdata_len += len;
+}
+
+/*
+ * Insert an XLOG record having the specified RMID and info bytes, with the
+ * body of the record being the data and buffer references registered earlier
+ * with XLogRegister* calls.
*
* Returns XLOG pointer to end of record (beginning of next record).
* This can be used as LSN for data pages affected by the logged action.
* (LSN is the XLOG point up to which the XLOG must be flushed to disk
* before the data page can be written out. This implements the basic
* WAL rule "write the log before the data".)
- *
- * NB: this routine feels free to scribble on the XLogRecData structs,
- * though not on the data they reference. This is OK since the XLogRecData
- * structs are always just temporaries in the calling code.
*/
XLogRecPtr
-XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
+XLogInsert(RmgrId rmid, uint8 info)
{
- XLogRecPtr RedoRecPtr;
- bool doPageWrites;
XLogRecPtr EndPos;
- XLogRecPtr fpw_lsn;
- XLogRecData *rdt;
- XLogRecData *rdt_lastnormal;
- /* info's high bits are reserved for use by me */
- if (info & XLR_INFO_MASK)
+ /* XLogBeginInsert() must have been called. */
+ if (!begininsert_called)
+ elog(ERROR, "XLogBeginInsert was not called");
+
+ /*
+ * The caller can set rmgr bits and XLR_SPECIAL_REL_UPDATE; the rest are
+ * reserved for use by me.
+ */
+ if ((info & ~(XLR_RMGR_INFO_MASK | XLR_SPECIAL_REL_UPDATE)) != 0)
elog(PANIC, "invalid xlog info mask %02X", info);
TRACE_POSTGRESQL_XLOG_INSERT(rmid, info);
@@ -67,292 +400,282 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
*/
if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
{
+ XLogResetInsertion();
EndPos = SizeOfXLogLongPHD; /* start of 1st chkpt record */
return EndPos;
}
- /*
- * Get values needed to decide whether to do full-page writes. Since we
- * don't yet have an insertion lock, these could change under us, but
- * XLogInsertRecord will recheck them once it has a lock.
- */
- GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
-
- /*
- * Assemble an XLogRecData chain representing the WAL record, including
- * any backup blocks needed.
- *
- * We may have to loop back to here if a race condition is detected in
- * XLogInsertRecord. We could prevent the race by doing all this work
- * while holding an insertion lock, but it seems better to avoid doing CRC
- * calculations while holding one.
- */
-retry:
- rdt = XLogRecordAssemble(rmid, info, rdata, RedoRecPtr, doPageWrites,
- &fpw_lsn, &rdt_lastnormal);
-
- EndPos = XLogInsertRecord(rdt, fpw_lsn);
-
- if (EndPos == InvalidXLogRecPtr)
+ do
{
+ XLogRecPtr RedoRecPtr;
+ bool doPageWrites;
+ XLogRecPtr fpw_lsn;
+ XLogRecData *rdt;
+
/*
- * Undo the changes we made to the rdata chain, and retry.
- *
- * XXX: This doesn't undo *all* the changes; the XLogRecData
- * entries for buffers that we had already decided to back up have
- * had their data-pointers cleared. That's OK, as long as we
- * decide to back them up on the next iteration as well. Hence,
- * don't allow "doPageWrites" value to go from true to false after
- * we've modified the rdata chain.
+ * Get values needed to decide whether to do full-page writes. Since
+ * we don't yet have an insertion lock, these could change under us,
+ * but XLogInsertRecData will recheck them once it has a lock.
*/
- bool newDoPageWrites;
+ GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
- GetFullPageWriteInfo(&RedoRecPtr, &newDoPageWrites);
- doPageWrites = doPageWrites || newDoPageWrites;
- rdt_lastnormal->next = NULL;
+ rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
+ &fpw_lsn);
- goto retry;
- }
+ EndPos = XLogInsertRecord(rdt, fpw_lsn);
+ } while (EndPos == InvalidXLogRecPtr);
+
+ XLogResetInsertion();
return EndPos;
}
/*
- * Assemble a full WAL record, including backup blocks, from an XLogRecData
- * chain, ready for insertion with XLogInsertRecord(). The record header
- * fields are filled in, except for the xl_prev field and CRC.
+ * Assemble a WAL record from the registered data and buffers into an
+ * XLogRecData chain, ready for insertion with XLogInsertRecord().
*
- * The rdata chain is modified, adding entries for full-page images.
- * *rdt_lastnormal is set to point to the last normal (ie. not added by
- * this function) entry. It can be used to reset the chain to its original
- * state.
+ * The record header fields are filled in, except for the xl_prev field. The
+ * calculated CRC does not include xl_prev either.
*
- * If the rdata chain contains any buffer references, and a full-page image
- * was not taken of all the buffers, *fpw_lsn is set to the lowest LSN among
- * such pages. This signals that the assembled record is only good for
- * insertion on the assumption that the RedoRecPtr and doPageWrites values
- * were up-to-date.
+ * If there are any registered buffers, and a full-page image was not taken
+ * of all them, *page_writes_omitted is set to true. This signals that the
+ * assembled record is only good for insertion on the assumption that the
+ * RedoRecPtr and doPageWrites values were up-to-date.
*/
static XLogRecData *
-XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata,
+XLogRecordAssemble(RmgrId rmid, uint8 info,
XLogRecPtr RedoRecPtr, bool doPageWrites,
- XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal)
+ XLogRecPtr *fpw_lsn)
{
- bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
XLogRecData *rdt;
- Buffer dtbuf[XLR_MAX_BKP_BLOCKS];
- bool dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
- uint32 len,
- total_len;
- unsigned i;
+ uint32 total_len = 0;
+ int block_id;
+ pg_crc32 rdata_crc;
+ registered_buffer *prev_regbuf = NULL;
+ XLogRecData *rdt_datas_last;
+ XLogRecord *rechdr;
+ char *scratch = hdr_scratch;
/*
- * These need to be static because they are returned to the caller as part
- * of the XLogRecData chain.
+ * Note: this function can be called multiple times for the same record.
+ * All the modifications we do to the rdata chains below must handle that.
*/
- static BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
- static XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
- static XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
- static XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
- static XLogRecData hdr_rdt;
- static XLogRecord *rechdr;
-
- if (rechdr == NULL)
- {
- static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
- rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
- MemSet(rechdr, 0, SizeOfXLogRecord);
- }
+ /* The record begins with the fixed-size header */
+ rechdr = (XLogRecord *) scratch;
+ scratch += SizeOfXLogRecord;
- /* The record begins with the header */
- hdr_rdt.data = (char *) rechdr;
- hdr_rdt.len = SizeOfXLogRecord;
- hdr_rdt.next = rdata;
- total_len = SizeOfXLogRecord;
+ hdr_rdt.next = NULL;
+ rdt_datas_last = &hdr_rdt;
+ hdr_rdt.data = hdr_scratch;
/*
- * Here we scan the rdata chain, to determine which buffers must be backed
- * up.
- *
- * We add entries for backup blocks to the chain, so that they don't need
- * any special treatment in the critical section where the chunks are
- * copied into the WAL buffers. Those entries have to be unlinked from the
- * chain if we have to loop back here.
+ * Make an rdata chain containing all the data portions of all block
+ * references. This includes the data for full-page images. Also append
+ * the headers for the block references in the scratch buffer.
*/
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
- {
- dtbuf[i] = InvalidBuffer;
- dtbuf_bkp[i] = false;
- }
-
*fpw_lsn = InvalidXLogRecPtr;
- len = 0;
- for (rdt = rdata;;)
+ for (block_id = 0; block_id < max_registered_block_id; block_id++)
{
- if (rdt->buffer == InvalidBuffer)
+ registered_buffer *regbuf = &registered_buffers[block_id];
+ bool needs_backup;
+ bool needs_data;
+ XLogRecordBlockHeader bkpb;
+ XLogRecordBlockImageHeader bimg;
+ bool samerel;
+
+ if (!regbuf->in_use)
+ continue;
+
+ /* Determine if this block needs to be backed up */
+ if (regbuf->flags & REGBUF_FORCE_IMAGE)
+ needs_backup = true;
+ else if (regbuf->flags & REGBUF_NO_IMAGE)
+ needs_backup = false;
+ else if (!doPageWrites)
+ needs_backup = false;
+ else
{
- /* Simple data, just include it */
- len += rdt->len;
+ /*
+ * We assume page LSN is first data on *every* page that can be
+ * passed to XLogInsert, whether it has the standard page layout
+ * or not.
+ */
+ XLogRecPtr page_lsn = PageGetLSN(regbuf->page);
+
+ needs_backup = (page_lsn <= RedoRecPtr);
+ if (!needs_backup)
+ {
+ if (*fpw_lsn == InvalidXLogRecPtr || page_lsn < *fpw_lsn)
+ *fpw_lsn = page_lsn;
+ }
}
+
+ /* Determine if the buffer data needs to included */
+ if (regbuf->rdata_len == 0)
+ needs_data = false;
+ else if ((regbuf->flags & REGBUF_KEEP_DATA) != 0)
+ needs_data = true;
else
+ needs_data = !needs_backup;
+
+ bkpb.id = block_id;
+ bkpb.fork_flags = regbuf->forkno;
+ bkpb.data_length = 0;
+
+ if ((regbuf->flags & REGBUF_WILL_INIT) == REGBUF_WILL_INIT)
+ bkpb.fork_flags |= BKPBLOCK_WILL_INIT;
+
+ if (needs_backup)
{
- /* Find info for buffer */
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+ Page page = regbuf->page;
+
+ /*
+ * The page needs to be backed up, so set up *bimg
+ */
+ if (regbuf->flags & REGBUF_STANDARD)
{
- if (rdt->buffer == dtbuf[i])
+ /* Assume we can omit data between pd_lower and pd_upper */
+ uint16 lower = ((PageHeader) page)->pd_lower;
+ uint16 upper = ((PageHeader) page)->pd_upper;
+
+ if (lower >= SizeOfPageHeaderData &&
+ upper > lower &&
+ upper <= BLCKSZ)
{
- /* Buffer already referenced by earlier chain item */
- if (dtbuf_bkp[i])
- {
- rdt->data = NULL;
- rdt->len = 0;
- }
- else if (rdt->data)
- len += rdt->len;
- break;
+ bimg.hole_offset = lower;
+ bimg.hole_length = upper - lower;
}
- if (dtbuf[i] == InvalidBuffer)
+ else
{
- /* OK, put it in this slot */
- XLogRecPtr page_lsn;
- bool needs_backup;
-
- dtbuf[i] = rdt->buffer;
-
- /*
- * Determine whether the buffer has to be backed up.
- *
- * We assume page LSN is first data on *every* page that
- * can be passed to XLogInsert, whether it has the
- * standard page layout or not. We don't need to take the
- * buffer header lock for PageGetLSN because we hold an
- * exclusive lock on the page and/or the relation.
- */
- page_lsn = PageGetLSN(BufferGetPage(rdt->buffer));
- if (!doPageWrites)
- needs_backup = false;
- else if (page_lsn <= RedoRecPtr)
- needs_backup = true;
- else
- needs_backup = false;
-
- if (needs_backup)
- {
- /*
- * The page needs to be backed up, so set up BkpBlock
- */
- XLogFillBkpBlock(rdt->buffer, rdt->buffer_std,
- &(dtbuf_xlg[i]));
- dtbuf_bkp[i] = true;
- rdt->data = NULL;
- rdt->len = 0;
- }
- else
- {
- if (rdt->data)
- len += rdt->len;
- if (*fpw_lsn == InvalidXLogRecPtr ||
- page_lsn < *fpw_lsn)
- {
- *fpw_lsn = page_lsn;
- }
- }
- break;
+ /* No "hole" to compress out */
+ bimg.hole_offset = 0;
+ bimg.hole_length = 0;
}
}
- if (i >= XLR_MAX_BKP_BLOCKS)
- elog(PANIC, "can backup at most %d blocks per xlog record",
- XLR_MAX_BKP_BLOCKS);
- }
- /* Break out of loop when rdt points to last chain item */
- if (rdt->next == NULL)
- break;
- rdt = rdt->next;
- }
- total_len += len;
+ else
+ {
+ /* Not a standard page header, don't try to eliminate "hole" */
+ bimg.hole_offset = 0;
+ bimg.hole_length = 0;
+ }
- /*
- * Make additional rdata chain entries for the backup blocks, so that we
- * don't need to special-case them in the write loop. This modifies the
- * original rdata chain, but we keep a pointer to the last regular entry,
- * rdt_lastnormal, so that we can undo this if we have to start over.
- *
- * At the exit of this loop, total_len includes the backup block data.
- *
- * Also set the appropriate info bits to show which buffers were backed
- * up. The XLR_BKP_BLOCK(N) bit corresponds to the N'th distinct buffer
- * value (ignoring InvalidBuffer) appearing in the rdata chain.
- */
- *rdt_lastnormal = rdt;
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
- {
- BkpBlock *bkpb;
- char *page;
+ /* Fill in the remaining fields in the XLogRecordBlockData struct */
+ bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE;
- if (!dtbuf_bkp[i])
- continue;
+ total_len += BLCKSZ - bimg.hole_length;
+
+ /*
+ * Construct XLogRecData entries for the page content.
+ */
+ rdt_datas_last->next = &regbuf->bkp_rdatas[0];
+ rdt_datas_last = rdt_datas_last->next;
+ if (bimg.hole_length == 0)
+ {
+ rdt_datas_last->data = page;
+ rdt_datas_last->len = BLCKSZ;
+ }
+ else
+ {
+ /* must skip the hole */
+ rdt_datas_last->data = page;
+ rdt_datas_last->len = bimg.hole_offset;
- info |= XLR_BKP_BLOCK(i);
+ rdt_datas_last->next = &regbuf->bkp_rdatas[1];
+ rdt_datas_last = rdt_datas_last->next;
- bkpb = &(dtbuf_xlg[i]);
- page = (char *) BufferGetBlock(dtbuf[i]);
+ rdt_datas_last->data = page + (bimg.hole_offset + bimg.hole_length);
+ rdt_datas_last->len = BLCKSZ - (bimg.hole_offset + bimg.hole_length);
+ }
+ }
- rdt->next = &(dtbuf_rdt1[i]);
- rdt = rdt->next;
+ if (needs_data)
+ {
+ /*
+ * Link the caller-supplied rdata chain for this buffer to the
+ * overall list.
+ */
+ bkpb.fork_flags |= BKPBLOCK_HAS_DATA;
+ bkpb.data_length = regbuf->rdata_len;
+ total_len += regbuf->rdata_len;
+
+ rdt_datas_last->next = regbuf->rdata_head;
+ rdt_datas_last = regbuf->rdata_tail;
+ }
- rdt->data = (char *) bkpb;
- rdt->len = sizeof(BkpBlock);
- total_len += sizeof(BkpBlock);
+ if (prev_regbuf && RelFileNodeEquals(regbuf->rnode, prev_regbuf->rnode))
+ {
+ samerel = true;
+ bkpb.fork_flags |= BKPBLOCK_SAME_REL;
+ prev_regbuf = regbuf;
+ }
+ else
+ samerel = false;
- rdt->next = &(dtbuf_rdt2[i]);
- rdt = rdt->next;
+ /* Ok, copy the header to the scratch buffer */
+ memcpy(scratch, &bkpb, SizeOfXLogRecordBlockHeader);
+ scratch += SizeOfXLogRecordBlockHeader;
+ if (needs_backup)
+ {
+ memcpy(scratch, &bimg, SizeOfXLogRecordBlockImageHeader);
+ scratch += SizeOfXLogRecordBlockImageHeader;
+ }
+ if (!samerel)
+ {
+ memcpy(scratch, &regbuf->rnode, sizeof(RelFileNode));
+ scratch += sizeof(RelFileNode);
+ }
+ memcpy(scratch, &regbuf->block, sizeof(BlockNumber));
+ scratch += sizeof(BlockNumber);
+ }
- if (bkpb->hole_length == 0)
+ /* followed by main data, if any */
+ if (mainrdata_len > 0)
+ {
+ if (mainrdata_len > 255)
{
- rdt->data = page;
- rdt->len = BLCKSZ;
- total_len += BLCKSZ;
- rdt->next = NULL;
+ *(scratch++) = XLR_BLOCK_ID_DATA_LONG;
+ memcpy(scratch, &mainrdata_len, sizeof(uint32));
+ scratch += sizeof(uint32);
}
else
{
- /* must skip the hole */
- rdt->data = page;
- rdt->len = bkpb->hole_offset;
- total_len += bkpb->hole_offset;
-
- rdt->next = &(dtbuf_rdt3[i]);
- rdt = rdt->next;
-
- rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
- rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
- total_len += rdt->len;
- rdt->next = NULL;
+ *(scratch++) = XLR_BLOCK_ID_DATA_SHORT;
+ *(scratch++) = (uint8) mainrdata_len;
}
+ rdt_datas_last->next = mainrdata_head;
+ rdt_datas_last = mainrdata_last;
+ total_len += mainrdata_len;
}
+ rdt_datas_last->next = NULL;
+
+ hdr_rdt.len = (scratch - hdr_scratch);
+ total_len += hdr_rdt.len;
/*
- * We disallow len == 0 because it provides a useful bit of extra error
- * checking in ReadRecord. This means that all callers of XLogInsert
- * must supply at least some not-in-a-buffer data. However, we make an
- * exception for XLOG SWITCH records because we don't want them to ever
- * cross a segment boundary.
+ * Calculate CRC of the data
+ *
+ * Note that the record header isn't added into the CRC initially since we
+ * don't know the prev-link yet. Thus, the CRC will represent the CRC of
+ * the whole record in the order: rdata, then backup blocks, then record
+ * header.
*/
- if (len == 0 && !isLogSwitch)
- elog(PANIC, "invalid xlog record length %u", rechdr->xl_len);
+ INIT_CRC32C(rdata_crc);
+ COMP_CRC32C(rdata_crc, hdr_scratch + SizeOfXLogRecord, hdr_rdt.len - SizeOfXLogRecord);
+ for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
+ COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
/*
* Fill in the fields in the record header. Prev-link is filled in later,
- * once we know where in the WAL the record will be inserted. CRC is also
- * not calculated yet.
+ * once we know where in the WAL the record will be inserted. The CRC does
+ * not include the record header yet.
*/
rechdr->xl_xid = GetCurrentTransactionIdIfAny();
rechdr->xl_tot_len = total_len;
- rechdr->xl_len = len; /* doesn't include backup blocks */
rechdr->xl_info = info;
rechdr->xl_rmid = rmid;
rechdr->xl_prev = InvalidXLogRecPtr;
+ rechdr->xl_crc = rdata_crc;
return &hdr_rdt;
}
@@ -429,45 +752,41 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
if (lsn <= RedoRecPtr)
{
- XLogRecData rdata[2];
- BkpBlock bkpb;
+ int flags;
char copied_buffer[BLCKSZ];
char *origdata = (char *) BufferGetBlock(buffer);
-
- /* Make a BkpBlock struct representing the buffer */
- XLogFillBkpBlock(buffer, buffer_std, &bkpb);
+ RelFileNode rnode;
+ ForkNumber forkno;
+ BlockNumber blkno;
/*
* Copy buffer so we don't have to worry about concurrent hint bit or
* lsn updates. We assume pd_lower/upper cannot be changed without an
* exclusive lock, so the contents bkp are not racy.
- *
- * With buffer_std set to false, XLogFillBkpBlock() sets hole_length
- * and hole_offset to 0; so the following code is safe for either
- * case.
*/
- memcpy(copied_buffer, origdata, bkpb.hole_offset);
- memcpy(copied_buffer + bkpb.hole_offset,
- origdata + bkpb.hole_offset + bkpb.hole_length,
- BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
+ if (buffer_std)
+ {
+ /* Assume we can omit data between pd_lower and pd_upper */
+ Page page = BufferGetPage(buffer);
+ uint16 lower = ((PageHeader) page)->pd_lower;
+ uint16 upper = ((PageHeader) page)->pd_upper;
- /*
- * Header for backup block.
- */
- rdata[0].data = (char *) &bkpb;
- rdata[0].len = sizeof(BkpBlock);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ memcpy(copied_buffer, origdata, lower);
+ memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper);
+ }
+ else
+ memcpy(copied_buffer, origdata, BLCKSZ);
- /*
- * Save copy of the buffer.
- */
- rdata[1].data = copied_buffer;
- rdata[1].len = BLCKSZ - bkpb.hole_length;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
- recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+ flags = REGBUF_FORCE_IMAGE;
+ if (buffer_std)
+ flags |= REGBUF_STANDARD;
+
+ BufferGetTag(buffer, &rnode, &forkno, &blkno);
+ XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags);
+
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
}
return recptr;
@@ -489,71 +808,16 @@ XLogRecPtr
log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
Page page, bool page_std)
{
- BkpBlock bkpb;
+ int flags;
XLogRecPtr recptr;
- XLogRecData rdata[3];
-
- /* NO ELOG(ERROR) from here till newpage op is logged */
- START_CRIT_SECTION();
-
- bkpb.node = *rnode;
- bkpb.fork = forkNum;
- bkpb.block = blkno;
+ flags = REGBUF_FORCE_IMAGE;
if (page_std)
- {
- /* Assume we can omit data between pd_lower and pd_upper */
- uint16 lower = ((PageHeader) page)->pd_lower;
- uint16 upper = ((PageHeader) page)->pd_upper;
-
- if (lower >= SizeOfPageHeaderData &&
- upper > lower &&
- upper <= BLCKSZ)
- {
- bkpb.hole_offset = lower;
- bkpb.hole_length = upper - lower;
- }
- else
- {
- /* No "hole" to compress out */
- bkpb.hole_offset = 0;
- bkpb.hole_length = 0;
- }
- }
- else
- {
- /* Not a standard page header, don't try to eliminate "hole" */
- bkpb.hole_offset = 0;
- bkpb.hole_length = 0;
- }
-
- rdata[0].data = (char *) &bkpb;
- rdata[0].len = sizeof(BkpBlock);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- if (bkpb.hole_length == 0)
- {
- rdata[1].data = (char *) page;
- rdata[1].len = BLCKSZ;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
- }
- else
- {
- /* must skip the hole */
- rdata[1].data = (char *) page;
- rdata[1].len = bkpb.hole_offset;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &rdata[2];
-
- rdata[2].data = (char *) page + (bkpb.hole_offset + bkpb.hole_length);
- rdata[2].len = BLCKSZ - (bkpb.hole_offset + bkpb.hole_length);
- rdata[2].buffer = InvalidBuffer;
- rdata[2].next = NULL;
- }
+ flags |= REGBUF_STANDARD;
- recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+ XLogBeginInsert();
+ XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags);
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
/*
* The page may be uninitialized. If so, we can't set the LSN because that
@@ -564,8 +828,6 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
PageSetLSN(page, recptr);
}
- END_CRIT_SECTION();
-
return recptr;
}
@@ -596,38 +858,38 @@ log_newpage_buffer(Buffer buffer, bool page_std)
}
/*
- * Fill a BkpBlock for a buffer.
+ * Allocate working buffers needed for WAL record construction.
*/
-static void
-XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb)
+void
+InitXLogInsert(void)
{
- BufferGetTag(buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
+ /* Initialize the working areas */
+ if (xloginsert_cxt == NULL)
+ {
+ xloginsert_cxt = AllocSetContextCreate(TopMemoryContext,
+ "WAL record construction",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ }
- if (buffer_std)
+ if (registered_buffers == NULL)
{
- /* Assume we can omit data between pd_lower and pd_upper */
- Page page = BufferGetPage(buffer);
- uint16 lower = ((PageHeader) page)->pd_lower;
- uint16 upper = ((PageHeader) page)->pd_upper;
-
- if (lower >= SizeOfPageHeaderData &&
- upper > lower &&
- upper <= BLCKSZ)
- {
- bkpb->hole_offset = lower;
- bkpb->hole_length = upper - lower;
- }
- else
- {
- /* No "hole" to compress out */
- bkpb->hole_offset = 0;
- bkpb->hole_length = 0;
- }
+ registered_buffers = (registered_buffer *)
+ MemoryContextAllocZero(xloginsert_cxt,
+ sizeof(registered_buffer) * (XLR_NORMAL_MAX_BLOCK_ID + 1));
+ max_registered_buffers = XLR_NORMAL_MAX_BLOCK_ID + 1;
}
- else
+ if (rdatas == NULL)
{
- /* Not a standard page header, don't try to eliminate "hole" */
- bkpb->hole_offset = 0;
- bkpb->hole_length = 0;
+ rdatas = MemoryContextAlloc(xloginsert_cxt,
+ sizeof(XLogRecData) * XLR_NORMAL_RDATAS);
+ max_rdatas = XLR_NORMAL_RDATAS;
}
+
+ /*
+ * Allocate a buffer to hold the header information for a WAL record.
+ */
+ if (hdr_scratch == NULL)
+ hdr_scratch = palloc0(HEADER_SCRATCH_SIZE);
}
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 7d573cc585d..67d62234369 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -37,6 +37,8 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
the supplied arguments. */
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
+static void ResetDecoder(XLogReaderState *state);
+
/* size of the buffer allocated for error message. */
#define MAX_ERRORMSG_LEN 1000
@@ -59,46 +61,33 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
/*
* Allocate and initialize a new XLogReader.
*
- * Returns NULL if the xlogreader couldn't be allocated.
+ * The returned XLogReader is palloc'd. (In FRONTEND code, that means that
+ * running out-of-memory causes an immediate exit(1).
*/
XLogReaderState *
XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
{
XLogReaderState *state;
- AssertArg(pagereadfunc != NULL);
+ state = (XLogReaderState *) palloc0(sizeof(XLogReaderState));
- state = (XLogReaderState *) malloc(sizeof(XLogReaderState));
- if (!state)
- return NULL;
- MemSet(state, 0, sizeof(XLogReaderState));
+ state->max_block_id = -1;
/*
* Permanently allocate readBuf. We do it this way, rather than just
* making a static array, for two reasons: (1) no need to waste the
* storage in most instantiations of the backend; (2) a static char array
- * isn't guaranteed to have any particular alignment, whereas malloc()
+ * isn't guaranteed to have any particular alignment, whereas palloc()
* will provide MAXALIGN'd storage.
*/
- state->readBuf = (char *) malloc(XLOG_BLCKSZ);
- if (!state->readBuf)
- {
- free(state);
- return NULL;
- }
+ state->readBuf = (char *) palloc(XLOG_BLCKSZ);
state->read_page = pagereadfunc;
/* system_identifier initialized to zeroes above */
state->private_data = private_data;
/* ReadRecPtr and EndRecPtr initialized to zeroes above */
/* readSegNo, readOff, readLen, readPageTLI initialized to zeroes above */
- state->errormsg_buf = malloc(MAX_ERRORMSG_LEN + 1);
- if (!state->errormsg_buf)
- {
- free(state->readBuf);
- free(state);
- return NULL;
- }
+ state->errormsg_buf = palloc(MAX_ERRORMSG_LEN + 1);
state->errormsg_buf[0] = '\0';
/*
@@ -107,9 +96,9 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
*/
if (!allocate_recordbuf(state, 0))
{
- free(state->errormsg_buf);
- free(state->readBuf);
- free(state);
+ pfree(state->errormsg_buf);
+ pfree(state->readBuf);
+ pfree(state);
return NULL;
}
@@ -119,11 +108,24 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
void
XLogReaderFree(XLogReaderState *state)
{
- free(state->errormsg_buf);
+ int block_id;
+
+ for (block_id = 0; block_id <= state->max_block_id; block_id++)
+ {
+ if (state->blocks[block_id].in_use)
+ {
+ if (state->blocks[block_id].data)
+ pfree(state->blocks[block_id].data);
+ }
+ }
+ if (state->main_data)
+ pfree(state->main_data);
+
+ pfree(state->errormsg_buf);
if (state->readRecordBuf)
- free(state->readRecordBuf);
- free(state->readBuf);
- free(state);
+ pfree(state->readRecordBuf);
+ pfree(state->readBuf);
+ pfree(state);
}
/*
@@ -146,14 +148,8 @@ allocate_recordbuf(XLogReaderState *state, uint32 reclength)
newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
if (state->readRecordBuf)
- free(state->readRecordBuf);
- state->readRecordBuf = (char *) malloc(newSize);
- if (!state->readRecordBuf)
- {
- state->readRecordBufSize = 0;
- return false;
- }
-
+ pfree(state->readRecordBuf);
+ state->readRecordBuf = (char *) palloc(newSize);
state->readRecordBufSize = newSize;
return true;
}
@@ -191,6 +187,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
*errormsg = NULL;
state->errormsg_buf[0] = '\0';
+ ResetDecoder(state);
+
if (RecPtr == InvalidXLogRecPtr)
{
RecPtr = state->EndRecPtr;
@@ -440,7 +438,10 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
state->EndRecPtr -= state->EndRecPtr % XLogSegSize;
}
- return record;
+ if (DecodeXLogRecord(state, record, errormsg))
+ return record;
+ else
+ return NULL;
err:
@@ -579,30 +580,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
XLogRecPtr PrevRecPtr, XLogRecord *record,
bool randAccess)
{
- /*
- * xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
- * required.
- */
- if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
- {
- if (record->xl_len != 0)
- {
- report_invalid_record(state,
- "invalid xlog switch record at %X/%X",
- (uint32) (RecPtr >> 32), (uint32) RecPtr);
- return false;
- }
- }
- else if (record->xl_len == 0)
- {
- report_invalid_record(state,
- "record with zero length at %X/%X",
- (uint32) (RecPtr >> 32), (uint32) RecPtr);
- return false;
- }
- if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
- record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
- XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+ if (record->xl_tot_len < SizeOfXLogRecord)
{
report_invalid_record(state,
"invalid record length at %X/%X",
@@ -663,79 +641,17 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
* We assume all of the record (that is, xl_tot_len bytes) has been read
* into memory at *record. Also, ValidXLogRecordHeader() has accepted the
* record's header, which means in particular that xl_tot_len is at least
- * SizeOfXlogRecord, so it is safe to fetch xl_len.
+ * SizeOfXlogRecord.
*/
static bool
ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
{
pg_crc32 crc;
- int i;
- uint32 len = record->xl_len;
- BkpBlock bkpb;
- char *blk;
- size_t remaining = record->xl_tot_len;
- /* First the rmgr data */
- if (remaining < SizeOfXLogRecord + len)
- {
- /* ValidXLogRecordHeader() should've caught this already... */
- report_invalid_record(state, "invalid record length at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
- remaining -= SizeOfXLogRecord + len;
+ /* Calculate the CRC */
INIT_CRC32C(crc);
- COMP_CRC32C(crc, XLogRecGetData(record), len);
-
- /* Add in the backup blocks, if any */
- blk = (char *) XLogRecGetData(record) + len;
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
- {
- uint32 blen;
-
- if (!(record->xl_info & XLR_BKP_BLOCK(i)))
- continue;
-
- if (remaining < sizeof(BkpBlock))
- {
- report_invalid_record(state,
- "invalid backup block size in record at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
- memcpy(&bkpb, blk, sizeof(BkpBlock));
-
- if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
- {
- report_invalid_record(state,
- "incorrect hole size in record at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
- blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
-
- if (remaining < blen)
- {
- report_invalid_record(state,
- "invalid backup block size in record at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
- remaining -= blen;
- COMP_CRC32C(crc, blk, blen);
- blk += blen;
- }
-
- /* Check that xl_tot_len agrees with our calculation */
- if (remaining != 0)
- {
- report_invalid_record(state,
- "incorrect total length in record at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
-
- /* Finally include the record header */
+ COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
+ /* include the record header last */
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc);
@@ -985,3 +901,321 @@ out:
}
#endif /* FRONTEND */
+
+
+/* ----------------------------------------
+ * Functions for decoding the data and block references in a record.
+ * ----------------------------------------
+ */
+
+/* private function to reset the state between records */
+static void
+ResetDecoder(XLogReaderState *state)
+{
+ int block_id;
+
+ state->decoded_record = NULL;
+
+ state->main_data_len = 0;
+
+ for (block_id = 0; block_id <= state->max_block_id; block_id++)
+ {
+ state->blocks[block_id].in_use = false;
+ state->blocks[block_id].has_image = false;
+ state->blocks[block_id].has_data = false;
+ }
+ state->max_block_id = -1;
+}
+
+/*
+ * Decode the previously read record.
+ *
+ * On error, a human-readable error message is returned in *errormsg, and
+ * the return value is false.
+ */
+bool
+DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
+{
+ /*
+ * read next _size bytes from record buffer, but check for overrun first.
+ */
+#define COPY_HEADER_FIELD(_dst, _size) \
+ do { \
+ if (remaining < _size) \
+ goto shortdata_err; \
+ memcpy(_dst, ptr, _size); \
+ ptr += _size; \
+ remaining -= _size; \
+ } while(0)
+
+ char *ptr;
+ uint32 remaining;
+ uint32 datatotal;
+ RelFileNode *rnode = NULL;
+ uint8 block_id;
+
+ ResetDecoder(state);
+
+ state->decoded_record = record;
+
+ ptr = (char *) record;
+ ptr += SizeOfXLogRecord;
+ remaining = record->xl_tot_len - SizeOfXLogRecord;
+
+ /* Decode the headers */
+ datatotal = 0;
+ while (remaining > datatotal)
+ {
+ COPY_HEADER_FIELD(&block_id, sizeof(uint8));
+
+ if (block_id == XLR_BLOCK_ID_DATA_SHORT)
+ {
+ /* XLogRecordDataHeaderShort */
+ uint8 main_data_len;
+
+ COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
+
+ state->main_data_len = main_data_len;
+ datatotal += main_data_len;
+ break; /* by convention, the main data fragment is
+ * always last */
+ }
+ else if (block_id == XLR_BLOCK_ID_DATA_LONG)
+ {
+ /* XLogRecordDataHeaderLong */
+ uint32 main_data_len;
+
+ COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
+ state->main_data_len = main_data_len;
+ datatotal += main_data_len;
+ break; /* by convention, the main data fragment is
+ * always last */
+ }
+ else if (block_id <= XLR_MAX_BLOCK_ID)
+ {
+ /* XLogRecordBlockHeader */
+ DecodedBkpBlock *blk;
+ uint8 fork_flags;
+
+ if (block_id <= state->max_block_id)
+ {
+ report_invalid_record(state,
+ "out-of-order block_id %u at %X/%X",
+ block_id,
+ (uint32) (state->ReadRecPtr >> 32),
+ (uint32) state->ReadRecPtr);
+ goto err;
+ }
+ state->max_block_id = block_id;
+
+ blk = &state->blocks[block_id];
+ blk->in_use = true;
+
+ COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
+ blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
+ blk->flags = fork_flags;
+ blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
+ blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
+
+ COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
+ /* cross-check that the HAS_DATA flag is set iff data_length > 0 */
+ if (blk->has_data && blk->data_len == 0)
+ report_invalid_record(state,
+ "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
+ (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+ if (!blk->has_data && blk->data_len != 0)
+ report_invalid_record(state,
+ "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
+ (unsigned int) blk->data_len,
+ (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+ datatotal += blk->data_len;
+
+ if (blk->has_image)
+ {
+ COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
+ COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
+ datatotal += BLCKSZ - blk->hole_length;
+ }
+ if (!(fork_flags & BKPBLOCK_SAME_REL))
+ {
+ COPY_HEADER_FIELD(&blk->rnode, sizeof(RelFileNode));
+ rnode = &blk->rnode;
+ }
+ else
+ {
+ if (rnode == NULL)
+ {
+ report_invalid_record(state,
+ "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
+ (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+ goto err;
+ }
+
+ blk->rnode = *rnode;
+ }
+ COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
+ }
+ else
+ {
+ report_invalid_record(state,
+ "invalid block_id %u at %X/%X",
+ block_id,
+ (uint32) (state->ReadRecPtr >> 32),
+ (uint32) state->ReadRecPtr);
+ goto err;
+ }
+ }
+
+ if (remaining != datatotal)
+ goto shortdata_err;
+
+ /*
+ * Ok, we've parsed the fragment headers, and verified that the total
+ * length of the payload in the fragments is equal to the amount of data
+ * left. Copy the data of each fragment to a separate buffer.
+ *
+ * We could just set up pointers into readRecordBuf, but we want to align
+ * the data for the convenience of the callers. Backup images are not
+ * copied, however; they don't need alignment.
+ */
+
+ /* block data first */
+ for (block_id = 0; block_id <= state->max_block_id; block_id++)
+ {
+ DecodedBkpBlock *blk = &state->blocks[block_id];
+
+ if (!blk->in_use)
+ continue;
+ if (blk->has_image)
+ {
+ blk->bkp_image = ptr;
+ ptr += BLCKSZ - blk->hole_length;
+ }
+ if (blk->has_data)
+ {
+ if (!blk->data || blk->data_len > blk->data_bufsz)
+ {
+ if (blk->data)
+ pfree(blk->data);
+ blk->data_bufsz = blk->data_len;
+ blk->data = palloc(blk->data_bufsz);
+ }
+ memcpy(blk->data, ptr, blk->data_len);
+ ptr += blk->data_len;
+ }
+ }
+
+ /* and finally, the main data */
+ if (state->main_data_len > 0)
+ {
+ if (!state->main_data || state->main_data_len > state->main_data_bufsz)
+ {
+ if (state->main_data)
+ pfree(state->main_data);
+ state->main_data_bufsz = state->main_data_len;
+ state->main_data = palloc(state->main_data_bufsz);
+ }
+ memcpy(state->main_data, ptr, state->main_data_len);
+ ptr += state->main_data_len;
+ }
+
+ return true;
+
+shortdata_err:
+ report_invalid_record(state,
+ "record with invalid length at %X/%X",
+ (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+err:
+ *errormsg = state->errormsg_buf;
+
+ return false;
+}
+
+/*
+ * Returns information about the block that a block reference refers to.
+ *
+ * If the WAL record contains a block reference with the given ID, *rnode,
+ * *forknum, and *blknum are filled in (if not NULL), and returns TRUE.
+ * Otherwise returns FALSE.
+ */
+bool
+XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
+ RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
+{
+ DecodedBkpBlock *bkpb;
+
+ if (!record->blocks[block_id].in_use)
+ return false;
+
+ bkpb = &record->blocks[block_id];
+ if (rnode)
+ *rnode = bkpb->rnode;
+ if (forknum)
+ *forknum = bkpb->forknum;
+ if (blknum)
+ *blknum = bkpb->blkno;
+ return true;
+}
+
+/*
+ * Returns the data associated with a block reference, or NULL if there is
+ * no data (e.g. because a full-page image was taken instead). The returned
+ * pointer points to a MAXALIGNed buffer.
+ */
+char *
+XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
+{
+ DecodedBkpBlock *bkpb;
+
+ if (!record->blocks[block_id].in_use)
+ return NULL;
+
+ bkpb = &record->blocks[block_id];
+
+ if (!bkpb->has_data)
+ {
+ if (len)
+ *len = 0;
+ return NULL;
+ }
+ else
+ {
+ if (len)
+ *len = bkpb->data_len;
+ return bkpb->data;
+ }
+}
+
+/*
+ * Restore a full-page image from a backup block attached to an XLOG record.
+ *
+ * Returns the buffer number containing the page.
+ */
+bool
+RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
+{
+ DecodedBkpBlock *bkpb;
+
+ if (!record->blocks[block_id].in_use)
+ return false;
+ if (!record->blocks[block_id].has_image)
+ return false;
+
+ bkpb = &record->blocks[block_id];
+
+ if (bkpb->hole_length == 0)
+ {
+ memcpy(page, bkpb->bkp_image, BLCKSZ);
+ }
+ else
+ {
+ memcpy(page, bkpb->bkp_image, bkpb->hole_offset);
+ /* must zero-fill the hole */
+ MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
+ memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
+ bkpb->bkp_image + bkpb->hole_offset,
+ BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
+ }
+
+ return true;
+}
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index cf04081c19e..ae323a0db87 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -253,9 +253,8 @@ XLogCheckInvalidPages(void)
*
* 'lsn' is the LSN of the record being replayed. It is compared with the
* page's LSN to determine if the record has already been replayed.
- * 'rnode' and 'blkno' point to the block being replayed (main fork number
- * is implied, use XLogReadBufferForRedoExtended for other forks).
- * 'block_index' identifies the backup block in the record for the page.
+ * 'block_id' is the ID number the block was registered with, when the WAL
+ * record was created.
*
* Returns one of the following:
*
@@ -272,15 +271,36 @@ XLogCheckInvalidPages(void)
* single-process crash recovery, but some subroutines such as MarkBufferDirty
* will complain if we don't have the lock. In hot standby mode it's
* definitely necessary.)
+ *
+ * Note: when a backup block is available in XLOG, we restore it
+ * unconditionally, even if the page in the database appears newer. This is
+ * to protect ourselves against database pages that were partially or
+ * incorrectly written during a crash. We assume that the XLOG data must be
+ * good because it has passed a CRC check, while the database page might not
+ * be. This will force us to replay all subsequent modifications of the page
+ * that appear in XLOG, rather than possibly ignoring them as already
+ * applied, but that's not a huge drawback.
*/
XLogRedoAction
-XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
- RelFileNode rnode, BlockNumber blkno,
+XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
Buffer *buf)
{
- return XLogReadBufferForRedoExtended(lsn, record, block_index,
- rnode, MAIN_FORKNUM, blkno,
- RBM_NORMAL, false, buf);
+ return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
+ false, buf);
+}
+
+/*
+ * Pin and lock a buffer referenced by a WAL record, for the purpose of
+ * re-initializing it.
+ */
+Buffer
+XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
+{
+ Buffer buf;
+
+ XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
+ &buf);
+ return buf;
}
/*
@@ -299,21 +319,54 @@ XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
* using LockBufferForCleanup(), instead of a regular exclusive lock.
*/
XLogRedoAction
-XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
- int block_index, RelFileNode rnode,
- ForkNumber forkno, BlockNumber blkno,
+XLogReadBufferForRedoExtended(XLogReaderState *record,
+ uint8 block_id,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf)
{
- if (record->xl_info & XLR_BKP_BLOCK(block_index))
+ XLogRecPtr lsn = record->EndRecPtr;
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blkno;
+ Page page;
+
+ if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
+ {
+ /* Caller specified a bogus block_id */
+ elog(PANIC, "failed to locate backup block with ID %d", block_id);
+ }
+
+ /* If it's a full-page image, restore it. */
+ if (XLogRecHasBlockImage(record, block_id))
{
- *buf = RestoreBackupBlock(lsn, record, block_index,
- get_cleanup_lock, true);
+ *buf = XLogReadBufferExtended(rnode, forknum, blkno,
+ get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
+ page = BufferGetPage(*buf);
+ if (!RestoreBlockImage(record, block_id, page))
+ elog(ERROR, "failed to restore block image");
+
+ /*
+ * The page may be uninitialized. If so, we can't set the LSN because
+ * that would corrupt the page.
+ */
+ if (!PageIsNew(page))
+ {
+ PageSetLSN(page, lsn);
+ }
+
+ MarkBufferDirty(*buf);
+
return BLK_RESTORED;
}
else
{
- *buf = XLogReadBufferExtended(rnode, forkno, blkno, mode);
+ if ((record->blocks[block_id].flags & BKPBLOCK_WILL_INIT) != 0 &&
+ mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
+ {
+ elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
+ }
+
+ *buf = XLogReadBufferExtended(rnode, forknum, blkno, mode);
if (BufferIsValid(*buf))
{
if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
@@ -334,37 +387,6 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
}
/*
- * XLogReadBuffer
- * Read a page during XLOG replay.
- *
- * This is a shorthand of XLogReadBufferExtended() followed by
- * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main
- * fork.
- *
- * (Getting the buffer lock is not really necessary during single-process
- * crash recovery, but some subroutines such as MarkBufferDirty will complain
- * if we don't have the lock. In hot standby mode it's definitely necessary.)
- *
- * The returned buffer is exclusively-locked.
- *
- * For historical reasons, instead of a ReadBufferMode argument, this only
- * supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
- * modes.
- */
-Buffer
-XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
-{
- Buffer buf;
-
- buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
- init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
- if (BufferIsValid(buf) && !init)
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-
- return buf;
-}
-
-/*
* XLogReadBufferExtended
* Read a page during XLOG replay
*
@@ -383,6 +405,11 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
* exist, and we don't check for all-zeroes. Thus, no log entry is made
* to imply that the page should be dropped or truncated later.
+ *
+ * NB: A redo function should normally not call this directly. To get a page
+ * to modify, use XLogReplayBuffer instead. It is important that all pages
+ * modified by a WAL record are registered in the WAL records, or they will be
+ * invisible to tools that that need to know which pages are modified.
*/
Buffer
XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
@@ -474,124 +501,6 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
}
/*
- * Restore a full-page image from a backup block attached to an XLOG record.
- *
- * lsn: LSN of the XLOG record being replayed
- * record: the complete XLOG record
- * block_index: which backup block to restore (0 .. XLR_MAX_BKP_BLOCKS - 1)
- * get_cleanup_lock: TRUE to get a cleanup rather than plain exclusive lock
- * keep_buffer: TRUE to return the buffer still locked and pinned
- *
- * Returns the buffer number containing the page. Note this is not terribly
- * useful unless keep_buffer is specified as TRUE.
- *
- * Note: when a backup block is available in XLOG, we restore it
- * unconditionally, even if the page in the database appears newer.
- * This is to protect ourselves against database pages that were partially
- * or incorrectly written during a crash. We assume that the XLOG data
- * must be good because it has passed a CRC check, while the database
- * page might not be. This will force us to replay all subsequent
- * modifications of the page that appear in XLOG, rather than possibly
- * ignoring them as already applied, but that's not a huge drawback.
- *
- * If 'get_cleanup_lock' is true, a cleanup lock is obtained on the buffer,
- * else a normal exclusive lock is used. During crash recovery, that's just
- * pro forma because there can't be any regular backends in the system, but
- * in hot standby mode the distinction is important.
- *
- * If 'keep_buffer' is true, return without releasing the buffer lock and pin;
- * then caller is responsible for doing UnlockReleaseBuffer() later. This
- * is needed in some cases when replaying XLOG records that touch multiple
- * pages, to prevent inconsistent states from being visible to other backends.
- * (Again, that's only important in hot standby mode.)
- */
-Buffer
-RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
- bool get_cleanup_lock, bool keep_buffer)
-{
- BkpBlock bkpb;
- char *blk;
- int i;
-
- /* Locate requested BkpBlock in the record */
- blk = (char *) XLogRecGetData(record) + record->xl_len;
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
- {
- if (!(record->xl_info & XLR_BKP_BLOCK(i)))
- continue;
-
- memcpy(&bkpb, blk, sizeof(BkpBlock));
- blk += sizeof(BkpBlock);
-
- if (i == block_index)
- {
- /* Found it, apply the update */
- return RestoreBackupBlockContents(lsn, bkpb, blk, get_cleanup_lock,
- keep_buffer);
- }
-
- blk += BLCKSZ - bkpb.hole_length;
- }
-
- /* Caller specified a bogus block_index */
- elog(ERROR, "failed to restore block_index %d", block_index);
- return InvalidBuffer; /* keep compiler quiet */
-}
-
-/*
- * Workhorse for RestoreBackupBlock usable without an xlog record
- *
- * Restores a full-page image from BkpBlock and a data pointer.
- */
-Buffer
-RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
- bool get_cleanup_lock, bool keep_buffer)
-{
- Buffer buffer;
- Page page;
-
- buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
- get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
- Assert(BufferIsValid(buffer));
-
- page = (Page) BufferGetPage(buffer);
-
- if (bkpb.hole_length == 0)
- {
- memcpy((char *) page, blk, BLCKSZ);
- }
- else
- {
- memcpy((char *) page, blk, bkpb.hole_offset);
- /* must zero-fill the hole */
- MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
- memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
- blk + bkpb.hole_offset,
- BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
- }
-
- /*
- * The checksum value on this page is currently invalid. We don't need to
- * reset it here since it will be set before being written.
- */
-
- /*
- * The page may be uninitialized. If so, we can't set the LSN because that
- * would corrupt the page.
- */
- if (!PageIsNew(page))
- {
- PageSetLSN(page, lsn);
- }
- MarkBufferDirty(buffer);
-
- if (!keep_buffer)
- UnlockReleaseBuffer(buffer);
-
- return buffer;
-}
-
-/*
* Struct actually returned by XLogFakeRelcacheEntry, though the declared
* return type is Relation.
*/
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 46780e71d69..3f5e1700f06 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -125,7 +125,6 @@ void
log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
{
xl_smgr_create xlrec;
- XLogRecData rdata;
/*
* Make an XLOG entry reporting the file creation.
@@ -133,12 +132,9 @@ log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
xlrec.rnode = *rnode;
xlrec.forkNum = forkNum;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xlrec);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
- XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
}
/*
@@ -268,18 +264,16 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* Make an XLOG entry reporting the file truncation.
*/
XLogRecPtr lsn;
- XLogRecData rdata;
xl_smgr_truncate xlrec;
xlrec.blkno = nblocks;
xlrec.rnode = rel->rd_node;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xlrec);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
- lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata);
+ lsn = XLogInsert(RM_SMGR_ID,
+ XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
/*
* Flush, because otherwise the truncation of the main relation might
@@ -479,12 +473,13 @@ AtSubAbort_smgr(void)
}
void
-smgr_redo(XLogRecPtr lsn, XLogRecord *record)
+smgr_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ XLogRecPtr lsn = record->EndRecPtr;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in smgr records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_SMGR_CREATE)
{
@@ -505,8 +500,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
/*
* Forcibly create relation if it doesn't exist (which suggests that
* it was dropped somewhere later in the WAL sequence). As in
- * XLogReadBuffer, we prefer to recreate the rel and replay the log as
- * best we can until the drop is seen.
+ * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
+ * log as best we can until the drop is seen.
*/
smgrcreate(reln, MAIN_FORKNUM, true);
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 94c82d37410..1a5244cade2 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -619,19 +619,17 @@ createdb(const CreatedbStmt *stmt)
/* Record the filesystem change in XLOG */
{
xl_dbase_create_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = dboid;
xlrec.tablespace_id = dsttablespace;
xlrec.src_db_id = src_dboid;
xlrec.src_tablespace_id = srctablespace;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_create_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
}
}
heap_endscan(scan);
@@ -1226,19 +1224,17 @@ movedb(const char *dbname, const char *tblspcname)
*/
{
xl_dbase_create_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = db_id;
xlrec.tablespace_id = dst_tblspcoid;
xlrec.src_db_id = db_id;
xlrec.src_tablespace_id = src_tblspcoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_create_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
}
/*
@@ -1330,17 +1326,15 @@ movedb(const char *dbname, const char *tblspcname)
*/
{
xl_dbase_drop_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = db_id;
xlrec.tablespace_id = src_tblspcoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_drop_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
}
/* Now it's safe to release the database lock */
@@ -1870,17 +1864,15 @@ remove_dbtablespaces(Oid db_id)
/* Record the filesystem change in XLOG */
{
xl_dbase_drop_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = db_id;
xlrec.tablespace_id = dsttablespace;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_drop_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
}
pfree(dstpath);
@@ -2043,12 +2035,12 @@ get_database_name(Oid dbid)
* DATABASE resource manager's routines
*/
void
-dbase_redo(XLogRecPtr lsn, XLogRecord *record)
+dbase_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in dbase records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_DBASE_CREATE)
{
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index cb8b27a93c0..ba5b938863c 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -372,20 +372,16 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
xlrec.node = rel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) tuple->t_data;
- rdata[1].len = tuple->t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) tuple->t_data, tuple->t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
@@ -454,21 +450,17 @@ AlterSequence(AlterSeqStmt *stmt)
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
Page page = BufferGetPage(buf);
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
xlrec.node = seqrel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
- rdata[1].data = (char *) seqtuple.t_data;
- rdata[1].len = seqtuple.t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
@@ -706,7 +698,6 @@ nextval_internal(Oid relid)
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
/*
* We don't log the current state of the tuple, but rather the state
@@ -714,6 +705,8 @@ nextval_internal(Oid relid)
* that many future WAL records, at the cost that we lose those
* sequence values if we crash.
*/
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
/* set values that will be saved in xlog */
seq->last_value = next;
@@ -721,17 +714,11 @@ nextval_internal(Oid relid)
seq->log_cnt = 0;
xlrec.node = seqrel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) seqtuple.t_data;
- rdata[1].len = seqtuple.t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
@@ -894,21 +881,16 @@ do_setval(Oid relid, int64 next, bool iscalled)
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
Page page = BufferGetPage(buf);
- xlrec.node = seqrel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
- rdata[1].data = (char *) seqtuple.t_data;
- rdata[1].len = seqtuple.t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ xlrec.node = seqrel->rd_node;
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
@@ -1552,9 +1534,10 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
void
-seq_redo(XLogRecPtr lsn, XLogRecord *record)
+seq_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ XLogRecPtr lsn = record->EndRecPtr;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
Buffer buffer;
Page page;
Page localpage;
@@ -1563,14 +1546,10 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
sequence_magic *sm;
- /* Backup blocks are not used in seq records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
if (info != XLOG_SEQ_LOG)
elog(PANIC, "seq_redo: unknown op code %u", info);
- buffer = XLogReadBuffer(xlrec->node, 0, true);
- Assert(BufferIsValid(buffer));
+ buffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(buffer);
/*
@@ -1589,7 +1568,7 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
sm->magic = SEQ_MAGIC;
item = (char *) xlrec + sizeof(xl_seq_rec);
- itemsz = record->xl_len - sizeof(xl_seq_rec);
+ itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec);
if (PageAddItem(localpage, (Item) item, itemsz,
FirstOffsetNumber, false, false) == InvalidOffsetNumber)
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index 616308bc2d3..3c9af5776a0 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -354,20 +354,15 @@ CreateTableSpace(CreateTableSpaceStmt *stmt)
/* Record the filesystem change in XLOG */
{
xl_tblspc_create_rec xlrec;
- XLogRecData rdata[2];
xlrec.ts_id = tablespaceoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) location;
- rdata[1].len = strlen(location) + 1;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec,
+ offsetof(xl_tblspc_create_rec, ts_path));
+ XLogRegisterData((char *) location, strlen(location) + 1);
- (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);
+ (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE);
}
/*
@@ -515,15 +510,13 @@ DropTableSpace(DropTableSpaceStmt *stmt)
/* Record the filesystem change in XLOG */
{
xl_tblspc_drop_rec xlrec;
- XLogRecData rdata[1];
xlrec.ts_id = tablespaceoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_tblspc_drop_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
- (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec));
+
+ (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP);
}
/*
@@ -1408,12 +1401,12 @@ get_tablespace_name(Oid spc_oid)
* TABLESPACE resource manager's routines
*/
void
-tblspc_redo(XLogRecPtr lsn, XLogRecord *record)
+tblspc_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in tblspc records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_TBLSPC_CREATE)
{
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 8e78aafda7c..1c7dac38fc9 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -31,7 +31,9 @@
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
+#include "access/xlogutils.h"
#include "access/xlogreader.h"
+#include "access/xlogrecord.h"
#include "catalog/pg_control.h"
@@ -46,8 +48,7 @@ typedef struct XLogRecordBuffer
{
XLogRecPtr origptr;
XLogRecPtr endptr;
- XLogRecord record;
- char *record_data;
+ XLogReaderState *record;
} XLogRecordBuffer;
/* RMGR Handlers */
@@ -79,17 +80,16 @@ static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup);
* context.
*/
void
-LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
+LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
{
XLogRecordBuffer buf;
buf.origptr = ctx->reader->ReadRecPtr;
buf.endptr = ctx->reader->EndRecPtr;
- buf.record = *record;
- buf.record_data = XLogRecGetData(record);
+ buf.record = record;
/* cast so we get a warning when new rmgrs are added */
- switch ((RmgrIds) buf.record.xl_rmid)
+ switch ((RmgrIds) XLogRecGetRmid(record))
{
/*
* Rmgrs we care about for logical decoding. Add new rmgrs in
@@ -135,7 +135,7 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
case RM_BRIN_ID:
break;
case RM_NEXT_ID:
- elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
+ elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) XLogRecGetRmid(buf.record));
}
}
@@ -146,7 +146,7 @@ static void
DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
- uint8 info = buf->record.xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(buf->record) & ~XLR_INFO_MASK;
switch (info)
{
@@ -185,8 +185,8 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
ReorderBuffer *reorder = ctx->reorder;
- XLogRecord *r = &buf->record;
- uint8 info = r->xl_info & ~XLR_INFO_MASK;
+ XLogReaderState *r = buf->record;
+ uint8 info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
/* no point in doing anything yet, data could not be decoded anyway */
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
@@ -200,12 +200,12 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
TransactionId *subxacts = NULL;
SharedInvalidationMessage *invals = NULL;
- xlrec = (xl_xact_commit *) buf->record_data;
+ xlrec = (xl_xact_commit *) XLogRecGetData(r);
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
- DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId,
+ DecodeCommit(ctx, buf, XLogRecGetXid(r), xlrec->dbId,
xlrec->xact_time,
xlrec->nsubxacts, subxacts,
xlrec->nmsgs, invals);
@@ -220,7 +220,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
SharedInvalidationMessage *invals = NULL;
/* Prepared commits contain a normal commit record... */
- prec = (xl_xact_commit_prepared *) buf->record_data;
+ prec = (xl_xact_commit_prepared *) XLogRecGetData(r);
xlrec = &prec->crec;
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@@ -237,9 +237,9 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
xl_xact_commit_compact *xlrec;
- xlrec = (xl_xact_commit_compact *) buf->record_data;
+ xlrec = (xl_xact_commit_compact *) XLogRecGetData(r);
- DecodeCommit(ctx, buf, r->xl_xid, InvalidOid,
+ DecodeCommit(ctx, buf, XLogRecGetXid(r), InvalidOid,
xlrec->xact_time,
xlrec->nsubxacts, xlrec->subxacts,
0, NULL);
@@ -250,11 +250,11 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
xl_xact_abort *xlrec;
TransactionId *sub_xids;
- xlrec = (xl_xact_abort *) buf->record_data;
+ xlrec = (xl_xact_abort *) XLogRecGetData(r);
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
- DecodeAbort(ctx, buf->origptr, r->xl_xid,
+ DecodeAbort(ctx, buf->origptr, XLogRecGetXid(r),
sub_xids, xlrec->nsubxacts);
break;
}
@@ -265,7 +265,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
TransactionId *sub_xids;
/* prepared abort contain a normal commit abort... */
- prec = (xl_xact_abort_prepared *) buf->record_data;
+ prec = (xl_xact_abort_prepared *) XLogRecGetData(r);
xlrec = &prec->arec;
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@@ -282,7 +282,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
int i;
TransactionId *sub_xid;
- xlrec = (xl_xact_assignment *) buf->record_data;
+ xlrec = (xl_xact_assignment *) XLogRecGetData(r);
sub_xid = &xlrec->xsub[0];
@@ -316,14 +316,14 @@ static void
DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
- XLogRecord *r = &buf->record;
- uint8 info = r->xl_info & ~XLR_INFO_MASK;
+ XLogReaderState *r = buf->record;
+ uint8 info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
switch (info)
{
case XLOG_RUNNING_XACTS:
{
- xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
+ xl_running_xacts *running = (xl_running_xacts *) XLogRecGetData(r);
SnapBuildProcessRunningXacts(builder, buf->origptr, running);
@@ -352,8 +352,8 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void
DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
- TransactionId xid = buf->record.xl_xid;
+ uint8 info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
+ TransactionId xid = XLogRecGetXid(buf->record);
SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */
@@ -370,7 +370,7 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
xl_heap_new_cid *xlrec;
- xlrec = (xl_heap_new_cid *) buf->record_data;
+ xlrec = (xl_heap_new_cid *) XLogRecGetData(buf->record);
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
break;
@@ -405,8 +405,8 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void
DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
- TransactionId xid = buf->record.xl_xid;
+ uint8 info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
+ TransactionId xid = XLogRecGetXid(buf->record);
SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */
@@ -576,34 +576,35 @@ DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
static void
DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogReaderState *r = buf->record;
xl_heap_insert *xlrec;
ReorderBufferChange *change;
+ RelFileNode target_node;
- xlrec = (xl_heap_insert *) buf->record_data;
+ xlrec = (xl_heap_insert *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+ if (target_node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
- memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+ memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{
- Assert(r->xl_len > (SizeOfHeapInsert + SizeOfHeapHeader));
+ Size tuplelen;
+ char *tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
- DecodeXLogTuple((char *) xlrec + SizeOfHeapInsert,
- r->xl_len - SizeOfHeapInsert,
- change->data.tp.newtuple);
+ DecodeXLogTuple(tupledata, tuplelen, change->data.tp.newtuple);
}
change->data.tp.clear_toast_afterwards = true;
- ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+ ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
}
/*
@@ -615,62 +616,47 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void
DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogReaderState *r = buf->record;
xl_heap_update *xlrec;
- xl_heap_header_len xlhdr;
ReorderBufferChange *change;
char *data;
+ Size datalen;
+ RelFileNode target_node;
- xlrec = (xl_heap_update *) buf->record_data;
+ xlrec = (xl_heap_update *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+ if (target_node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_UPDATE;
- memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
-
- /* caution, remaining data in record is not aligned */
- data = buf->record_data + SizeOfHeapUpdate;
+ memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{
- Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen));
-
- memcpy(&xlhdr, data, sizeof(xlhdr));
- data += offsetof(xl_heap_header_len, header);
+ data = XLogRecGetBlockData(r, 0, &datalen);
change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
- DecodeXLogTuple(data,
- xlhdr.t_len + SizeOfHeapHeader,
- change->data.tp.newtuple);
- /* skip over the rest of the tuple header */
- data += SizeOfHeapHeader;
- /* skip over the tuple data */
- data += xlhdr.t_len;
+ DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
}
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
{
- memcpy(&xlhdr, data, sizeof(xlhdr));
- data += offsetof(xl_heap_header_len, header);
+ /* caution, remaining data in record is not aligned */
+ data = XLogRecGetData(r) + SizeOfHeapUpdate;
+ datalen = XLogRecGetDataLen(r) - SizeOfHeapUpdate;
change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
- DecodeXLogTuple(data,
- xlhdr.t_len + SizeOfHeapHeader,
- change->data.tp.oldtuple);
-#ifdef NOT_USED
- data += SizeOfHeapHeader;
- data += xlhdr.t_len;
-#endif
+ DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
}
change->data.tp.clear_toast_afterwards = true;
- ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+ ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
}
/*
@@ -681,36 +667,38 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void
DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogReaderState *r = buf->record;
xl_heap_delete *xlrec;
ReorderBufferChange *change;
+ RelFileNode target_node;
- xlrec = (xl_heap_delete *) buf->record_data;
+ xlrec = (xl_heap_delete *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+ if (target_node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_DELETE;
- memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+ memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
/* old primary key stored */
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
{
- Assert(r->xl_len > (SizeOfHeapDelete + SizeOfHeapHeader));
+ Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader));
change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete,
- r->xl_len - SizeOfHeapDelete,
+ XLogRecGetDataLen(r) - SizeOfHeapDelete,
change->data.tp.oldtuple);
}
change->data.tp.clear_toast_afterwards = true;
- ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+ ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
}
/*
@@ -721,27 +709,24 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
static void
DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogReaderState *r = buf->record;
xl_heap_multi_insert *xlrec;
int i;
char *data;
- bool isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
+ char *tupledata;
+ Size tuplelen;
+ RelFileNode rnode;
- xlrec = (xl_heap_multi_insert *) buf->record_data;
+ xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL);
+ if (rnode.dbNode != ctx->slot->data.database)
return;
- data = buf->record_data + SizeOfHeapMultiInsert;
-
- /*
- * OffsetNumbers (which are not of interest to us) are stored when
- * XLOG_HEAP_INIT_PAGE is not set -- skip over them.
- */
- if (!isinit)
- data += sizeof(OffsetNumber) * xlrec->ntuples;
+ tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
+ data = tupledata;
for (i = 0; i < xlrec->ntuples; i++)
{
ReorderBufferChange *change;
@@ -751,7 +736,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
- memcpy(&change->data.tp.relnode, &xlrec->node, sizeof(RelFileNode));
+ memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode));
/*
* CONTAINS_NEW_TUPLE will always be set currently as multi_insert
@@ -806,9 +791,10 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
else
change->data.tp.clear_toast_afterwards = false;
- ReorderBufferQueueChange(ctx->reorder, r->xl_xid,
+ ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
buf->origptr, change);
}
+ Assert(data == tupledata + tuplelen);
}
/*
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 875b89a6288..8c318cd4b51 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -34,6 +34,7 @@
#include "miscadmin.h"
#include "access/xact.h"
+#include "access/xlog_internal.h"
#include "replication/decode.h"
#include "replication/logical.h"
@@ -455,12 +456,12 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
record = XLogReadRecord(ctx->reader, startptr, &err);
if (err)
elog(ERROR, "%s", err);
-
- Assert(record);
+ if (!record)
+ elog(ERROR, "no record found"); /* shouldn't happen */
startptr = InvalidXLogRecPtr;
- LogicalDecodingProcessRecord(ctx, record);
+ LogicalDecodingProcessRecord(ctx, ctx->reader);
/* only continue till we found a consistent spot */
if (DecodingContextReady(ctx))
diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c
index 3a5ec2f61d9..1977f098c79 100644
--- a/src/backend/replication/logical/logicalfuncs.c
+++ b/src/backend/replication/logical/logicalfuncs.c
@@ -21,6 +21,8 @@
#include "funcapi.h"
#include "miscadmin.h"
+#include "access/xlog_internal.h"
+
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
@@ -431,7 +433,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
* store the description into our tuplestore.
*/
if (record != NULL)
- LogicalDecodingProcessRecord(ctx, record);
+ LogicalDecodingProcessRecord(ctx, ctx->reader);
/* check limits */
if (upto_lsn != InvalidXLogRecPtr &&
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 7d8f40738d4..6e75398eabe 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -54,6 +54,7 @@
#include "access/transam.h"
#include "access/tuptoaster.h"
#include "access/xact.h"
+#include "access/xlog_internal.h"
#include "catalog/catalog.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 200b54d7c2a..20f9b04adfa 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -699,7 +699,7 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
- xlrec->target.node, xlrec->target.tid,
+ xlrec->target_node, xlrec->target_tid,
xlrec->cmin, xlrec->cmax,
xlrec->combocid);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 385d18ba1bb..addae8f6ce5 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2444,7 +2444,7 @@ XLogSendLogical(void)
if (record != NULL)
{
- LogicalDecodingProcessRecord(logical_decoding_ctx, record);
+ LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader);
sentPtr = logical_decoding_ctx->reader->EndRecPtr;
}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 8c3720bc737..4269dda66b6 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -759,12 +759,12 @@ StandbyReleaseOldLocks(int nxids, TransactionId *xids)
*/
void
-standby_redo(XLogRecPtr lsn, XLogRecord *record)
+standby_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in standby records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
/* Do nothing if we're not in hot standby mode */
if (standbyState == STANDBY_DISABLED)
@@ -928,8 +928,6 @@ static XLogRecPtr
LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
{
xl_running_xacts xlrec;
- XLogRecData rdata[2];
- int lastrdata = 0;
XLogRecPtr recptr;
xlrec.xcnt = CurrRunningXacts->xcnt;
@@ -940,23 +938,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
/* Header */
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactRunningXacts;
- rdata[0].buffer = InvalidBuffer;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
/* array of TransactionIds */
if (xlrec.xcnt > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) CurrRunningXacts->xids;
- rdata[1].len = (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) CurrRunningXacts->xids,
+ (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
- rdata[lastrdata].next = NULL;
-
- recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, rdata);
+ recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
if (CurrRunningXacts->subxid_overflow)
elog(trace_recovery(DEBUG2),
@@ -996,22 +986,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
static void
LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
{
- XLogRecData rdata[2];
xl_standby_locks xlrec;
xlrec.nlocks = nlocks;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = offsetof(xl_standby_locks, locks);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
-
- rdata[1].data = (char *) locks;
- rdata[1].len = nlocks * sizeof(xl_standby_lock);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
+ XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
- (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, rdata);
+ (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
}
/*
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index d1f64e58c8c..b90d6b5c7fd 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -754,7 +754,6 @@ write_relmap_file(bool shared, RelMapFile *newmap,
if (write_wal)
{
xl_relmap_update xlrec;
- XLogRecData rdata[2];
XLogRecPtr lsn;
/* now errors are fatal ... */
@@ -764,16 +763,11 @@ write_relmap_file(bool shared, RelMapFile *newmap,
xlrec.tsid = tsid;
xlrec.nbytes = sizeof(RelMapFile);
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfRelmapUpdate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) newmap;
- rdata[1].len = sizeof(RelMapFile);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
+ XLogRegisterData((char *) newmap, sizeof(RelMapFile));
- lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE, rdata);
+ lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
/* As always, WAL must hit the disk before the data update does */
XLogFlush(lsn);
@@ -907,12 +901,12 @@ perform_relmap_update(bool shared, const RelMapFile *updates)
* RELMAP resource manager's routines
*/
void
-relmap_redo(XLogRecPtr lsn, XLogRecord *record)
+relmap_redo(XLogReaderState *record)
{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
/* Backup blocks are not used in relmap records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_RELMAP_UPDATE)
{
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 2ba99469825..666e8dbaa24 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -1006,6 +1006,7 @@ WriteEmptyXLOG(void)
char path[MAXPGPATH];
int fd;
int nbytes;
+ char *recptr;
/* Use malloc() to ensure buffer is MAXALIGNED */
buffer = (char *) pg_malloc(XLOG_BLCKSZ);
@@ -1023,18 +1024,21 @@ WriteEmptyXLOG(void)
longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
/* Insert the initial checkpoint record */
- record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
+ recptr = (char *) page + SizeOfXLogLongPHD;
+ record = (XLogRecord *) recptr;
record->xl_prev = 0;
record->xl_xid = InvalidTransactionId;
- record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
- record->xl_len = sizeof(CheckPoint);
+ record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
record->xl_rmid = RM_XLOG_ID;
- memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
+ recptr += SizeOfXLogRecord;
+ *(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
+ *(recptr++) = sizeof(CheckPoint);
+ memcpy(recptr, &ControlFile.checkPointCopy,
sizeof(CheckPoint));
INIT_CRC32C(crc);
- COMP_CRC32C(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
+ COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc);
record->xl_crc = crc;
diff --git a/src/include/access/brin_xlog.h b/src/include/access/brin_xlog.h
index d748db4d0c6..6dc9eb3eca8 100644
--- a/src/include/access/brin_xlog.h
+++ b/src/include/access/brin_xlog.h
@@ -14,7 +14,7 @@
#ifndef BRIN_XLOG_H
#define BRIN_XLOG_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/bufpage.h"
#include "storage/itemptr.h"
@@ -42,59 +42,82 @@
*/
#define XLOG_BRIN_INIT_PAGE 0x80
-/* This is what we need to know about a BRIN index create */
+/*
+ * This is what we need to know about a BRIN index create.
+ *
+ * Backup block 0: metapage
+ */
typedef struct xl_brin_createidx
{
BlockNumber pagesPerRange;
- RelFileNode node;
uint16 version;
} xl_brin_createidx;
#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
/*
* This is what we need to know about a BRIN tuple insert
+ *
+ * Backup block 0: main page, block data is the new BrinTuple.
+ * Backup block 1: revmap page
*/
typedef struct xl_brin_insert
{
- RelFileNode node;
BlockNumber heapBlk;
/* extra information needed to update the revmap */
- BlockNumber revmapBlk;
BlockNumber pagesPerRange;
- uint16 tuplen;
- ItemPointerData tid;
- /* tuple data follows at end of struct */
+ /* offset number in the main page to insert the tuple to. */
+ OffsetNumber offnum;
} xl_brin_insert;
-#define SizeOfBrinInsert (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinInsert (offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
/*
- * A cross-page update is the same as an insert, but also store the old tid.
+ * A cross-page update is the same as an insert, but also stores information
+ * about the old tuple.
+ *
+ * Like in xlog_brin_update:
+ * Backup block 0: new page, block data includes the new BrinTuple.
+ * Backup block 1: revmap page
+ *
+ * And in addition:
+ * Backup block 2: old page
*/
typedef struct xl_brin_update
{
- ItemPointerData oldtid;
+ /* offset number of old tuple on old page */
+ OffsetNumber oldOffnum;
+
xl_brin_insert insert;
} xl_brin_update;
#define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
-/* This is what we need to know about a BRIN tuple samepage update */
+/*
+ * This is what we need to know about a BRIN tuple samepage update
+ *
+ * Backup block 0: updated page, with new BrinTuple as block data
+ */
typedef struct xl_brin_samepage_update
{
- RelFileNode node;
- ItemPointerData tid;
- /* tuple data follows at end of struct */
+ OffsetNumber offnum;
} xl_brin_samepage_update;
-#define SizeOfBrinSamepageUpdate (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinSamepageUpdate (sizeof(OffsetNumber))
-/* This is what we need to know about a revmap extension */
+/*
+ * This is what we need to know about a revmap extension
+ *
+ * Backup block 0: metapage
+ * Backup block 1: new revmap page
+ */
typedef struct xl_brin_revmap_extend
{
- RelFileNode node;
+ /*
+ * XXX: This is actually redundant - the block number is stored as part of
+ * backup block 1.
+ */
BlockNumber targetBlk;
} xl_brin_revmap_extend;
@@ -102,8 +125,8 @@ typedef struct xl_brin_revmap_extend
sizeof(BlockNumber))
-extern void brin_desc(StringInfo buf, XLogRecord *record);
-extern void brin_redo(XLogRecPtr lsn, XLogRecord *record);
+extern void brin_redo(XLogReaderState *record);
+extern void brin_desc(StringInfo buf, XLogReaderState *record);
extern const char *brin_identify(uint8 info);
#endif /* BRIN_XLOG_H */
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
index 04ac4ba3119..fe5e4c634d1 100644
--- a/src/include/access/clog.h
+++ b/src/include/access/clog.h
@@ -11,7 +11,7 @@
#ifndef CLOG_H
#define CLOG_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
/*
@@ -48,8 +48,8 @@ extern void TruncateCLOG(TransactionId oldestXact);
#define CLOG_ZEROPAGE 0x00
#define CLOG_TRUNCATE 0x10
-extern void clog_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void clog_desc(StringInfo buf, XLogRecord *record);
+extern void clog_redo(XLogReaderState *record);
+extern void clog_desc(StringInfo buf, XLogReaderState *record);
extern const char *clog_identify(uint8 info);
#endif /* CLOG_H */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 433e56f20df..fe5f77b1736 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -10,7 +10,7 @@
#ifndef GIN_H
#define GIN_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/block.h"
#include "utils/relcache.h"
@@ -74,8 +74,8 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats);
/* ginxlog.c */
-extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void gin_desc(StringInfo buf, XLogRecord *record);
+extern void gin_redo(XLogReaderState *record);
+extern void gin_desc(StringInfo buf, XLogReaderState *record);
extern const char *gin_identify(uint8 info);
extern void gin_xlog_startup(void);
extern void gin_xlog_cleanup(void);
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 333316d78e2..3d46f20bb83 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -13,7 +13,6 @@
#include "access/genam.h"
#include "access/gin.h"
#include "access/itup.h"
-#include "access/xloginsert.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "utils/rbtree.h"
@@ -397,22 +396,22 @@ typedef struct
typedef struct ginxlogCreatePostingTree
{
- RelFileNode node;
- BlockNumber blkno;
uint32 size;
/* A compressed posting list follows */
} ginxlogCreatePostingTree;
-#define XLOG_GIN_INSERT 0x20
-
/*
* The format of the insertion record varies depending on the page type.
* ginxlogInsert is the common part between all variants.
+ *
+ * Backup Blk 0: target page
+ * Backup Blk 1: left child, if this insertion finishes an incomplete split
*/
+
+#define XLOG_GIN_INSERT 0x20
+
typedef struct
{
- RelFileNode node;
- BlockNumber blkno;
uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
/*
@@ -477,14 +476,17 @@ typedef struct
PostingItem newitem;
} ginxlogInsertDataInternal;
-
+/*
+ * Backup Blk 0: new left page (= original page, if not root split)
+ * Backup Blk 1: new right page
+ * Backup Blk 2: original page / new root page, if root split
+ * Backup Blk 3: left child, if this insertion completes an earlier split
+ */
#define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit
{
RelFileNode node;
- BlockNumber lblkno;
- BlockNumber rblkno;
BlockNumber rrlink; /* right link, or root's blocknumber if root
* split */
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
@@ -538,15 +540,6 @@ typedef struct
*/
#define XLOG_GIN_VACUUM_PAGE 0x40
-typedef struct ginxlogVacuumPage
-{
- RelFileNode node;
- BlockNumber blkno;
- uint16 hole_offset; /* number of bytes before "hole" */
- uint16 hole_length; /* number of bytes in "hole" */
- /* entire page contents (minus the hole) follow at end of record */
-} ginxlogVacuumPage;
-
/*
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
* by insertion.
@@ -555,26 +548,28 @@ typedef struct ginxlogVacuumPage
typedef struct ginxlogVacuumDataLeafPage
{
- RelFileNode node;
- BlockNumber blkno;
-
ginxlogRecompressDataLeaf data;
} ginxlogVacuumDataLeafPage;
+/*
+ * Backup Blk 0: deleted page
+ * Backup Blk 1: parent
+ * Backup Blk 2: left sibling
+ */
#define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage
{
- RelFileNode node;
- BlockNumber blkno;
- BlockNumber parentBlkno;
OffsetNumber parentOffset;
- BlockNumber leftBlkno;
BlockNumber rightLink;
} ginxlogDeletePage;
#define XLOG_GIN_UPDATE_META_PAGE 0x60
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1: tail page
+ */
typedef struct ginxlogUpdateMeta
{
RelFileNode node;
@@ -591,22 +586,29 @@ typedef struct ginxlogUpdateMeta
typedef struct ginxlogInsertListPage
{
- RelFileNode node;
- BlockNumber blkno;
BlockNumber rightlink;
int32 ntuples;
/* array of inserted tuples follows */
} ginxlogInsertListPage;
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1 to (ndeleted + 1): deleted pages
+ */
+
#define XLOG_GIN_DELETE_LISTPAGE 0x80
-#define GIN_NDELETE_AT_ONCE 16
+/*
+ * The WAL record for deleting list pages must contain a block reference to
+ * all the deleted pages, so the number of pages that can be deleted in one
+ * record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
+ * metapage.)
+ */
+#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
typedef struct ginxlogDeleteListPages
{
- RelFileNode node;
GinMetaPageData metadata;
int32 ndeleted;
- BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
} ginxlogDeleteListPages;
@@ -673,7 +675,7 @@ typedef struct GinBtreeData
/* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
- GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *);
+ GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, Page *, Page *);
void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 21daf3b2b6a..2cbc918ad1a 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -16,7 +16,7 @@
#include "access/gist.h"
#include "access/itup.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "storage/buffile.h"
@@ -185,34 +185,33 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
#define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
+/*
+ * Backup Blk 0: updated page.
+ * Backup Blk 1: If this operation completes a page split, by inserting a
+ * downlink for the split page, the left half of the split
+ */
typedef struct gistxlogPageUpdate
{
- RelFileNode node;
- BlockNumber blkno;
-
- /*
- * If this operation completes a page split, by inserting a downlink for
- * the split page, leftchild points to the left half of the split.
- */
- BlockNumber leftchild;
-
/* number of deleted offsets */
uint16 ntodelete;
+ uint16 ntoinsert;
/*
- * follow: 1. todelete OffsetNumbers 2. tuples to insert
+ * In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
*/
} gistxlogPageUpdate;
+/*
+ * Backup Blk 0: If this operation completes a page split, by inserting a
+ * downlink for the split page, the left half of the split
+ * Backup Blk 1 - npage: split pages (1 is the original page)
+ */
typedef struct gistxlogPageSplit
{
- RelFileNode node;
- BlockNumber origblkno; /* splitted page */
BlockNumber origrlink; /* rightlink of the page before split */
GistNSN orignsn; /* NSN of the page before split */
bool origleaf; /* was splitted page a leaf page? */
- BlockNumber leftchild; /* like in gistxlogPageUpdate */
uint16 npage; /* # of pages in the split */
bool markfollowright; /* set F_FOLLOW_RIGHT flags */
@@ -451,8 +450,8 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
/* gistxlog.c */
-extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void gist_desc(StringInfo buf, XLogRecord *record);
+extern void gist_redo(XLogReaderState *record);
+extern void gist_desc(StringInfo buf, XLogReaderState *record);
extern const char *gist_identify(uint8 info);
extern void gist_xlog_startup(void);
extern void gist_xlog_cleanup(void);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index c175a5c1822..afd06ff7def 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -20,7 +20,7 @@
#include "access/genam.h"
#include "access/itup.h"
#include "access/sdir.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
@@ -356,8 +356,8 @@ extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
/* hash.c */
-extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void hash_desc(StringInfo buf, XLogRecord *record);
+extern void hash_redo(XLogReaderState *record);
+extern void hash_desc(StringInfo buf, XLogReaderState *record);
extern const char *hash_identify(uint8 info);
#endif /* HASH_H */
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 1d64264b010..853e2dd491f 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -15,7 +15,7 @@
#define HEAPAM_XLOG_H
#include "access/htup.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
@@ -78,27 +78,11 @@
#define XLOG_HEAP_CONTAINS_OLD \
(XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY)
-/*
- * All what we need to find changed tuple
- *
- * NB: on most machines, sizeof(xl_heaptid) will include some trailing pad
- * bytes for alignment. We don't want to store the pad space in the XLOG,
- * so use SizeOfHeapTid for space calculations. Similar comments apply for
- * the other xl_FOO structs.
- */
-typedef struct xl_heaptid
-{
- RelFileNode node;
- ItemPointerData tid; /* changed tuple id */
-} xl_heaptid;
-
-#define SizeOfHeapTid (offsetof(xl_heaptid, tid) + SizeOfIptrData)
-
/* This is what we need to know about delete */
typedef struct xl_heap_delete
{
- xl_heaptid target; /* deleted tuple id */
TransactionId xmax; /* xmax of the deleted tuple */
+ OffsetNumber offnum; /* deleted tuple's offset */
uint8 infobits_set; /* infomask bits */
uint8 flags;
} xl_heap_delete;
@@ -122,45 +106,33 @@ typedef struct xl_heap_header
#define SizeOfHeapHeader (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
-/*
- * Variant of xl_heap_header that contains the length of the tuple, which is
- * useful if the length of the tuple cannot be computed using the overall
- * record length. E.g. because there are several tuples inside a single
- * record.
- */
-typedef struct xl_heap_header_len
-{
- uint16 t_len;
- xl_heap_header header;
-} xl_heap_header_len;
-
-#define SizeOfHeapHeaderLen (offsetof(xl_heap_header_len, header) + SizeOfHeapHeader)
-
/* This is what we need to know about insert */
typedef struct xl_heap_insert
{
- xl_heaptid target; /* inserted tuple id */
+ OffsetNumber offnum; /* inserted tuple's offset */
uint8 flags;
- /* xl_heap_header & TUPLE DATA FOLLOWS AT END OF STRUCT */
+
+ /* xl_heap_header & TUPLE DATA in backup block 0 */
} xl_heap_insert;
#define SizeOfHeapInsert (offsetof(xl_heap_insert, flags) + sizeof(uint8))
/*
- * This is what we need to know about a multi-insert. The record consists of
- * xl_heap_multi_insert header, followed by a xl_multi_insert_tuple and tuple
- * data for each tuple. 'offsets' array is omitted if the whole page is
- * reinitialized (XLOG_HEAP_INIT_PAGE)
+ * This is what we need to know about a multi-insert.
+ *
+ * The main data of the record consists of this xl_heap_multi_insert header.
+ * 'offsets' array is omitted if the whole page is reinitialized
+ * (XLOG_HEAP_INIT_PAGE).
+ *
+ * In block 0's data portion, there is an xl_multi_insert_tuple struct,
+ * followed by the tuple data for each tuple. There is padding to align
+ * each xl_multi_insert struct.
*/
typedef struct xl_heap_multi_insert
{
- RelFileNode node;
- BlockNumber blkno;
uint8 flags;
uint16 ntuples;
OffsetNumber offsets[1];
-
- /* TUPLE DATA (xl_multi_insert_tuples) FOLLOW AT END OF STRUCT */
} xl_heap_multi_insert;
#define SizeOfHeapMultiInsert offsetof(xl_heap_multi_insert, offsets)
@@ -176,34 +148,39 @@ typedef struct xl_multi_insert_tuple
#define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
-/* This is what we need to know about update|hot_update */
+/*
+ * This is what we need to know about update|hot_update
+ *
+ * Backup blk 0: new page
+ *
+ * If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are set,
+ * the prefix and/or suffix come first, as one or two uint16s.
+ *
+ * After that, xl_heap_header and new tuple data follow. The new tuple
+ * data doesn't include the prefix and suffix, which are copied from the
+ * old tuple on replay.
+ *
+ * If HEAP_CONTAINS_NEW_TUPLE_DATA flag is given, the tuple data is
+ * included even if a full-page image was taken.
+ *
+ * Backup blk 1: old page, if different. (no data, just a reference to the blk)
+ */
typedef struct xl_heap_update
{
- xl_heaptid target; /* deleted tuple id */
TransactionId old_xmax; /* xmax of the old tuple */
- TransactionId new_xmax; /* xmax of the new tuple */
- ItemPointerData newtid; /* new inserted tuple id */
+ OffsetNumber old_offnum; /* old tuple's offset */
uint8 old_infobits_set; /* infomask bits to set on old tuple */
uint8 flags;
+ TransactionId new_xmax; /* xmax of the new tuple */
+ OffsetNumber new_offnum; /* new tuple's offset */
/*
- * If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are
- * set, the prefix and/or suffix come next, as one or two uint16s.
- *
- * After that, xl_heap_header_len and new tuple data follow. The new
- * tuple data and length don't include the prefix and suffix, which are
- * copied from the old tuple on replay. The new tuple data is omitted if
- * a full-page image of the page was taken (unless the
- * XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, in which case it's included
- * anyway).
- *
* If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
- * set, another xl_heap_header_len struct and tuple data for the old tuple
- * follows.
+ * set, a xl_heap_header struct and tuple data for the old tuple follows.
*/
} xl_heap_update;
-#define SizeOfHeapUpdate (offsetof(xl_heap_update, flags) + sizeof(uint8))
+#define SizeOfHeapUpdate (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
/*
* This is what we need to know about vacuum page cleanup/redirect
@@ -218,12 +195,10 @@ typedef struct xl_heap_update
*/
typedef struct xl_heap_clean
{
- RelFileNode node;
- BlockNumber block;
TransactionId latestRemovedXid;
uint16 nredirected;
uint16 ndead;
- /* OFFSET NUMBERS FOLLOW */
+ /* OFFSET NUMBERS are in the block reference 0 */
} xl_heap_clean;
#define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
@@ -251,8 +226,8 @@ typedef struct xl_heap_cleanup_info
/* This is what we need to know about lock */
typedef struct xl_heap_lock
{
- xl_heaptid target; /* locked tuple id */
TransactionId locking_xid; /* might be a MultiXactId not xid */
+ OffsetNumber offnum; /* locked tuple's offset on page */
int8 infobits_set; /* infomask and infomask2 bits to set */
} xl_heap_lock;
@@ -261,8 +236,8 @@ typedef struct xl_heap_lock
/* This is what we need to know about locking an updated version of a row */
typedef struct xl_heap_lock_updated
{
- xl_heaptid target;
TransactionId xmax;
+ OffsetNumber offnum;
uint8 infobits_set;
} xl_heap_lock_updated;
@@ -271,11 +246,11 @@ typedef struct xl_heap_lock_updated
/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
{
- xl_heaptid target; /* updated tuple id */
+ OffsetNumber offnum; /* updated tuple's offset on page */
/* TUPLE DATA FOLLOWS AT END OF STRUCT */
} xl_heap_inplace;
-#define SizeOfHeapInplace (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
+#define SizeOfHeapInplace (offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))
/*
* This struct represents a 'freeze plan', which is what we need to know about
@@ -296,23 +271,26 @@ typedef struct xl_heap_freeze_tuple
/*
* This is what we need to know about a block being frozen during vacuum
+ *
+ * Backup block 0's data contains an array of xl_heap_freeze_tuple structs,
+ * one for each tuple.
*/
typedef struct xl_heap_freeze_page
{
- RelFileNode node;
- BlockNumber block;
TransactionId cutoff_xid;
uint16 ntuples;
- xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
} xl_heap_freeze_page;
-#define SizeOfHeapFreezePage offsetof(xl_heap_freeze_page, tuples)
+#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, ntuples) + sizeof(uint16))
-/* This is what we need to know about setting a visibility map bit */
+/*
+ * This is what we need to know about setting a visibility map bit
+ *
+ * Backup blk 0: visibility map buffer
+ * Backup blk 1: heap buffer
+ */
typedef struct xl_heap_visible
{
- RelFileNode node;
- BlockNumber block;
TransactionId cutoff_xid;
} xl_heap_visible;
@@ -338,10 +316,11 @@ typedef struct xl_heap_new_cid
/*
* Store the relfilenode/ctid pair to facilitate lookups.
*/
- xl_heaptid target;
+ RelFileNode target_node;
+ ItemPointerData target_tid;
} xl_heap_new_cid;
-#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid)
+#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
/* logical rewrite xlog record header */
typedef struct xl_heap_rewrite_mapping
@@ -357,13 +336,13 @@ typedef struct xl_heap_rewrite_mapping
extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
TransactionId *latestRemovedXid);
-extern void heap_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void heap_desc(StringInfo buf, XLogRecord *record);
+extern void heap_redo(XLogReaderState *record);
+extern void heap_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap_identify(uint8 info);
-extern void heap2_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void heap2_desc(StringInfo buf, XLogRecord *record);
+extern void heap2_redo(XLogReaderState *record);
+extern void heap2_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap2_identify(uint8 info);
-extern void heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r);
+extern void heap_xlog_logical_rewrite(XLogReaderState *r);
extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode,
TransactionId latestRemovedXid);
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index 294d21bd180..300c2a52f02 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -498,6 +498,7 @@ do { \
* you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page.
*/
#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
+#define MinHeapTupleSize MAXALIGN(offsetof(HeapTupleHeaderData, t_bits))
/*
* MaxHeapTuplesPerPage is an upper bound on the number of tuples that can
diff --git a/src/include/access/itup.h b/src/include/access/itup.h
index de17936b106..e4dc51e8720 100644
--- a/src/include/access/itup.h
+++ b/src/include/access/itup.h
@@ -133,6 +133,7 @@ typedef IndexAttributeBitMapData *IndexAttributeBitMap;
* IndexTupleData struct. We arrive at the divisor because each tuple
* must be maxaligned, and it must have an associated item pointer.
*/
+#define MinIndexTupleSize MAXALIGN(sizeof(IndexTupleData) + 1)
#define MaxIndexTuplesPerPage \
((int) ((BLCKSZ - SizeOfPageHeaderData) / \
(MAXALIGN(sizeof(IndexTupleData) + 1) + sizeof(ItemIdData))))
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 43d737505d2..ac58a3766d5 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -11,7 +11,7 @@
#ifndef MULTIXACT_H
#define MULTIXACT_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
@@ -135,8 +135,8 @@ extern void multixact_twophase_postcommit(TransactionId xid, uint16 info,
extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len);
-extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void multixact_desc(StringInfo buf, XLogRecord *record);
+extern void multixact_redo(XLogReaderState *record);
+extern void multixact_desc(StringInfo buf, XLogReaderState *record);
extern const char *multixact_identify(uint8 info);
extern char *mxid_to_string(MultiXactId multi, int nmembers,
MultiXactMember *members);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 6ecd2ced62d..d3d258bcc9f 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -17,7 +17,7 @@
#include "access/genam.h"
#include "access/itup.h"
#include "access/sdir.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "catalog/pg_index.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
@@ -228,15 +228,6 @@ typedef struct BTMetaPageData
* FSM */
/*
- * All that we need to find changed index tuple
- */
-typedef struct xl_btreetid
-{
- RelFileNode node;
- ItemPointerData tid; /* changed tuple id */
-} xl_btreetid;
-
-/*
* All that we need to regenerate the meta-data page
*/
typedef struct xl_btree_metadata
@@ -252,16 +243,17 @@ typedef struct xl_btree_metadata
*
* This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
* Note that INSERT_META implies it's not a leaf page.
+ *
+ * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
+ * Backup Blk 2: xl_btree_metadata, if INSERT_META
*/
typedef struct xl_btree_insert
{
- xl_btreetid target; /* inserted tuple id */
- /* BlockNumber finishes_split field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
- /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
- /* INDEX TUPLE FOLLOWS AT END OF STRUCT */
+ OffsetNumber offnum;
} xl_btree_insert;
-#define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData)
+#define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
/*
* On insert with split, we save all the items going into the right sibling
@@ -278,45 +270,41 @@ typedef struct xl_btree_insert
* the root page, and thus that a newroot record rather than an insert or
* split record should follow. Note that a split record never carries a
* metapage update --- we'll do that in the parent-level update.
+ *
+ * Backup Blk 0: original page / new left page
+ *
+ * The left page's data portion contains the new item, if it's the _L variant.
+ * (In the _R variants, the new item is one of the right page's tuples.)
+ * If level > 0, an IndexTuple representing the HIKEY of the left page
+ * follows. We don't need this on leaf pages, because it's the same as the
+ * leftmost key in the new right page.
+ *
+ * Backup Blk 1: new right page
+ *
+ * The right page's data portion contains the right page's tuples in the
+ * form used by _bt_restore_page.
+ *
+ * Backup Blk 2: next block (orig page's rightlink), if any
+ * Backup Blk 3: child's left sibling, if non-leaf split
*/
typedef struct xl_btree_split
{
- RelFileNode node;
- BlockNumber leftsib; /* orig page / new left page */
- BlockNumber rightsib; /* new right page */
- BlockNumber rnext; /* next block (orig page's rightlink) */
uint32 level; /* tree level of page being split */
OffsetNumber firstright; /* first item moved to right page */
-
- /*
- * In the _L variants, next are OffsetNumber newitemoff and the new item.
- * (In the _R variants, the new item is one of the right page's tuples.)
- * The new item, but not newitemoff, is suppressed if XLogInsert chooses
- * to store the left page's whole page image.
- *
- * If level > 0, an IndexTuple representing the HIKEY of the left page
- * follows. We don't need this on leaf pages, because it's the same as
- * the leftmost key in the new right page. Also, it's suppressed if
- * XLogInsert chooses to store the left page's whole page image.
- *
- * If level > 0, BlockNumber of the page whose incomplete-split flag this
- * insertion clears. (not aligned)
- *
- * Last are the right page's tuples in the form used by _bt_restore_page.
- */
+ OffsetNumber newitemoff; /* new item's offset (if placed on left page) */
} xl_btree_split;
-#define SizeOfBtreeSplit (offsetof(xl_btree_split, firstright) + sizeof(OffsetNumber))
+#define SizeOfBtreeSplit (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
/*
* This is what we need to know about delete of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a
* single index page when *not* executed by VACUUM.
+ *
+ * Backup Blk 0: index page
*/
typedef struct xl_btree_delete
{
- RelFileNode node; /* RelFileNode of the index */
- BlockNumber block;
RelFileNode hnode; /* RelFileNode of the heap the index currently
* points at */
int nitems;
@@ -361,8 +349,6 @@ typedef struct xl_btree_reuse_page
*/
typedef struct xl_btree_vacuum
{
- RelFileNode node;
- BlockNumber block;
BlockNumber lastBlockVacuumed;
/* TARGET OFFSET NUMBERS FOLLOW */
@@ -376,10 +362,13 @@ typedef struct xl_btree_vacuum
* remove this tuple's downlink and the *following* tuple's key). Note that
* the leaf page is empty, so we don't need to store its content --- it is
* just reinitialized during recovery using the rest of the fields.
+ *
+ * Backup Blk 0: leaf block
+ * Backup Blk 1: top parent
*/
typedef struct xl_btree_mark_page_halfdead
{
- xl_btreetid target; /* deleted tuple id in parent page */
+ OffsetNumber poffset; /* deleted tuple id in parent page */
/* information needed to recreate the leaf page: */
BlockNumber leafblk; /* leaf block ultimately being deleted */
@@ -394,11 +383,15 @@ typedef struct xl_btree_mark_page_halfdead
* This is what we need to know about deletion of a btree page. Note we do
* not store any content for the deleted page --- it is just rewritten as empty
* during recovery, apart from resetting the btpo.xact.
+ *
+ * Backup Blk 0: target block being deleted
+ * Backup Blk 1: target block's left sibling, if any
+ * Backup Blk 2: target block's right sibling
+ * Backup Blk 3: leaf block (if different from target)
+ * Backup Blk 4: metapage (if rightsib becomes new fast root)
*/
typedef struct xl_btree_unlink_page
{
- RelFileNode node;
- BlockNumber deadblk; /* target block being deleted */
BlockNumber leftsib; /* target block's left sibling, if any */
BlockNumber rightsib; /* target block's right sibling */
@@ -406,7 +399,6 @@ typedef struct xl_btree_unlink_page
* Information needed to recreate the leaf page, when target is an
* internal page.
*/
- BlockNumber leafblk;
BlockNumber leafleftsib;
BlockNumber leafrightsib;
BlockNumber topparent; /* next child down in the branch */
@@ -423,13 +415,15 @@ typedef struct xl_btree_unlink_page
*
* Note that although this implies rewriting the metadata page, we don't need
* an xl_btree_metadata record --- the rootblk and level are sufficient.
+ *
+ * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
+ * Backup Blk 1: left child (if splitting an old root)
+ * Backup Blk 2: metapage
*/
typedef struct xl_btree_newroot
{
- RelFileNode node;
- BlockNumber rootblk; /* location of new root */
+ BlockNumber rootblk; /* location of new root (redundant with blk 0) */
uint32 level; /* its tree level */
- /* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
} xl_btree_newroot;
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
@@ -726,8 +720,8 @@ extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
/*
* prototypes for functions in nbtxlog.c
*/
-extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void btree_desc(StringInfo buf, XLogRecord *record);
+extern void btree_redo(XLogReaderState *record);
+extern void btree_desc(StringInfo buf, XLogReaderState *record);
extern const char *btree_identify(uint8 info);
#endif /* NBTREE_H */
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index ccf1ed77869..3aa96bde86f 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -15,7 +15,7 @@
#define SPGIST_H
#include "access/skey.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
@@ -197,8 +197,8 @@ extern Datum spgbulkdelete(PG_FUNCTION_ARGS);
extern Datum spgvacuumcleanup(PG_FUNCTION_ARGS);
/* spgxlog.c */
-extern void spg_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void spg_desc(StringInfo buf, XLogRecord *record);
+extern void spg_redo(XLogReaderState *record);
+extern void spg_desc(StringInfo buf, XLogReaderState *record);
extern const char *spg_identify(uint8 info);
extern void spg_xlog_startup(void);
extern void spg_xlog_cleanup(void);
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index 3330644651c..4b6fdee8017 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -18,7 +18,6 @@
#include "access/spgist.h"
#include "nodes/tidbitmap.h"
#include "storage/buf.h"
-#include "storage/relfilenode.h"
#include "utils/relcache.h"
@@ -351,35 +350,8 @@ typedef SpGistDeadTupleData *SpGistDeadTuple;
/*
* XLOG stuff
- *
- * ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof
*/
-#define ACCEPT_RDATA_DATA(p, s, i) \
- do { \
- Assert((i) < lengthof(rdata)); \
- rdata[i].data = (char *) (p); \
- rdata[i].len = (s); \
- rdata[i].buffer = InvalidBuffer; \
- rdata[i].buffer_std = true; \
- rdata[i].next = NULL; \
- if ((i) > 0) \
- rdata[(i) - 1].next = rdata + (i); \
- } while(0)
-
-#define ACCEPT_RDATA_BUFFER(b, i) \
- do { \
- Assert((i) < lengthof(rdata)); \
- rdata[i].data = NULL; \
- rdata[i].len = 0; \
- rdata[i].buffer = (b); \
- rdata[i].buffer_std = true; \
- rdata[i].next = NULL; \
- if ((i) > 0) \
- rdata[(i) - 1].next = rdata + (i); \
- } while(0)
-
-
/* XLOG record types for SPGiST */
#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
@@ -408,36 +380,36 @@ typedef struct spgxlogState
(d).isBuild = (s)->isBuild; \
} while(0)
-
+/*
+ * Backup Blk 0: destination page for leaf tuple
+ * Backup Blk 1: parent page (if any)
+ */
typedef struct spgxlogAddLeaf
{
- RelFileNode node;
-
- BlockNumber blknoLeaf; /* destination page for leaf tuple */
bool newPage; /* init dest page? */
bool storesNulls; /* page is in the nulls tree? */
OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */
OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
- BlockNumber blknoParent; /* where the parent downlink is, if any */
- OffsetNumber offnumParent;
+ OffsetNumber offnumParent; /* where the parent downlink is, if any */
uint16 nodeI;
/* new leaf tuple follows (unaligned!) */
} spgxlogAddLeaf;
+/*
+ * Backup Blk 0: source leaf page
+ * Backup Blk 1: destination leaf page
+ * Backup Blk 2: parent page
+ */
typedef struct spgxlogMoveLeafs
{
- RelFileNode node;
-
- BlockNumber blknoSrc; /* source leaf page */
- BlockNumber blknoDst; /* destination leaf page */
uint16 nMoves; /* number of tuples moved from source page */
bool newPage; /* init dest page? */
bool replaceDead; /* are we replacing a DEAD source tuple? */
bool storesNulls; /* pages are in the nulls tree? */
- BlockNumber blknoParent; /* where the parent downlink is */
+ /* where the parent downlink is */
OffsetNumber offnumParent;
uint16 nodeI;
@@ -452,11 +424,6 @@ typedef struct spgxlogMoveLeafs
* Note: if replaceDead is true then there is only one inserted tuple
* number and only one leaf tuple in the data, because we are not copying
* the dead tuple from the source
- *
- * Buffer references in the rdata array are:
- * Src page
- * Dest page
- * Parent page
*----------
*/
OffsetNumber offsets[1];
@@ -464,21 +431,43 @@ typedef struct spgxlogMoveLeafs
#define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
+/*
+ * Backup Blk 0: original page
+ * Backup Blk 1: where new tuple goes, if not same place
+ * Backup Blk 2: where parent downlink is, if updated and different from
+ * the old and new
+ */
typedef struct spgxlogAddNode
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number of original inner tuple */
- OffsetNumber offnum; /* offset of original inner tuple */
-
- BlockNumber blknoParent; /* where parent downlink is, if updated */
- OffsetNumber offnumParent;
- uint16 nodeI;
+ /*
+ * Offset of the original inner tuple, in the original page (on backup
+ * block 0).
+ */
+ OffsetNumber offnum;
- BlockNumber blknoNew; /* where new tuple goes, if not same place */
+ /*
+ * Offset of the new tuple, on the new page (on backup block 1). Invalid,
+ * if we overwrote the old tuple in the original page).
+ */
OffsetNumber offnumNew;
bool newPage; /* init new page? */
+ /*----
+ * Where is the parent downlink? parentBlk indicates which page it's on,
+ * and offnumParent is the offset within the page. The possible values for
+ * parentBlk are:
+ *
+ * 0: parent == original page
+ * 1: parent == new page
+ * 2: parent == different page (blk ref 2)
+ * -1: parent not updated
+ *----
+ */
+ char parentBlk;
+ OffsetNumber offnumParent; /* offset within the parent page */
+
+ uint16 nodeI;
+
spgxlogState stateSrc;
/*
@@ -486,41 +475,51 @@ typedef struct spgxlogAddNode
*/
} spgxlogAddNode;
+/*
+ * Backup Blk 0: where the prefix tuple goes
+ * Backup Blk 1: where the postfix tuple goes (if different page)
+ */
typedef struct spgxlogSplitTuple
{
- RelFileNode node;
-
- BlockNumber blknoPrefix; /* where the prefix tuple goes */
+ /* where the prefix tuple goes */
OffsetNumber offnumPrefix;
- BlockNumber blknoPostfix; /* where the postfix tuple goes */
+ /* where the postfix tuple goes */
OffsetNumber offnumPostfix;
bool newPage; /* need to init that page? */
+ bool postfixBlkSame; /* was postfix tuple put on same page as
+ * prefix? */
/*
- * new prefix inner tuple follows, then new postfix inner tuple
- * (both are unaligned!)
+ * new prefix inner tuple follows, then new postfix inner tuple (both are
+ * unaligned!)
*/
} spgxlogSplitTuple;
+/*
+ * Buffer references in the rdata array are:
+ * Backup Blk 0: Src page (only if not root)
+ * Backup Blk 1: Dest page (if used)
+ * Backup Blk 2: Inner page
+ * Backup Blk 3: Parent page (if any, and different from Inner)
+ */
typedef struct spgxlogPickSplit
{
- RelFileNode node;
+ bool isRootSplit;
- BlockNumber blknoSrc; /* original leaf page */
- BlockNumber blknoDest; /* other leaf page, if any */
uint16 nDelete; /* n to delete from Src */
uint16 nInsert; /* n to insert on Src and/or Dest */
bool initSrc; /* re-init the Src page? */
bool initDest; /* re-init the Dest page? */
- BlockNumber blknoInner; /* where to put new inner tuple */
+ /* where to put new inner tuple */
OffsetNumber offnumInner;
bool initInner; /* re-init the Inner page? */
bool storesNulls; /* pages are in the nulls tree? */
- BlockNumber blknoParent; /* where the parent downlink is, if any */
+ /* where the parent downlink is, if any */
+ bool innerIsParent; /* is parent the same as inner page? */
OffsetNumber offnumParent;
uint16 nodeI;
@@ -533,24 +532,15 @@ typedef struct spgxlogPickSplit
* array of page selector bytes for inserted tuples, length nInsert
* new inner tuple (unaligned!)
* list of leaf tuples, length nInsert (unaligned!)
- *
- * Buffer references in the rdata array are:
- * Src page (only if not root and not being init'd)
- * Dest page (if used and not being init'd)
- * Inner page (only if not being init'd)
- * Parent page (if any; could be same as Inner)
*----------
*/
- OffsetNumber offsets[1];
+ OffsetNumber offsets[1];
} spgxlogPickSplit;
#define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets)
typedef struct spgxlogVacuumLeaf
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nDead; /* number of tuples to become DEAD */
uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */
uint16 nMove; /* number of tuples to move */
@@ -576,9 +566,6 @@ typedef struct spgxlogVacuumLeaf
typedef struct spgxlogVacuumRoot
{
/* vacuum a root page when it is also a leaf */
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nDelete; /* number of tuples to delete */
spgxlogState stateSrc;
@@ -591,9 +578,6 @@ typedef struct spgxlogVacuumRoot
typedef struct spgxlogVacuumRedirect
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nToPlaceholder; /* number of redirects to make placeholders */
OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */
TransactionId newestRedirectXid; /* newest XID of removed redirects */
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 11a51b26859..b018aa4f5d8 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -14,7 +14,7 @@
#ifndef XACT_H
#define XACT_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "storage/relfilenode.h"
@@ -256,8 +256,8 @@ extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg);
extern int xactGetCommittedChildren(TransactionId **ptr);
-extern void xact_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void xact_desc(StringInfo buf, XLogRecord *record);
+extern void xact_redo(XLogReaderState *record);
+extern void xact_desc(StringInfo buf, XLogReaderState *record);
extern const char *xact_identify(uint8 info);
#endif /* XACT_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 6f8b5f46e10..d06fbc0ec1e 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -14,7 +14,7 @@
#include "access/rmgr.h"
#include "access/xlogdefs.h"
#include "access/xloginsert.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
@@ -186,7 +186,9 @@ typedef struct CheckpointStatsData
extern CheckpointStatsData CheckpointStats;
-extern XLogRecPtr XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn);
+struct XLogRecData;
+
+extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn);
extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
@@ -198,8 +200,8 @@ extern XLogSegNo XLogGetLastRemovedSegno(void);
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
-extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void xlog_desc(StringInfo buf, XLogRecord *record);
+extern void xlog_redo(XLogReaderState *record);
+extern void xlog_desc(StringInfo buf, XLogReaderState *record);
extern const char *xlog_identify(uint8 info);
extern void issue_xlog_fsync(int fd, XLogSegNo segno);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 19b2ef8d90d..423ef4d7fa0 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -20,7 +20,7 @@
#define XLOG_INTERNAL_H
#include "access/xlogdefs.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "pgtime.h"
@@ -31,7 +31,7 @@
/*
* Each page of XLOG file has a header like this:
*/
-#define XLOG_PAGE_MAGIC 0xD080 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD081 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
@@ -204,6 +204,17 @@ typedef struct xl_end_of_recovery
} xl_end_of_recovery;
/*
+ * The functions in xloginsert.c construct a chain of XLogRecData structs
+ * to represent the final WAL record.
+ */
+typedef struct XLogRecData
+{
+ struct XLogRecData *next; /* next struct in chain, or NULL */
+ char *data; /* start of rmgr data to include */
+ uint32 len; /* length of rmgr data to include */
+} XLogRecData;
+
+/*
* Method table for resource managers.
*
* This struct must be kept in sync with the PG_RMGR definition in
@@ -219,8 +230,8 @@ typedef struct xl_end_of_recovery
typedef struct RmgrData
{
const char *rm_name;
- void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
- void (*rm_desc) (StringInfo buf, XLogRecord *rptr);
+ void (*rm_redo) (XLogReaderState *record);
+ void (*rm_desc) (StringInfo buf, XLogReaderState *record);
const char *(*rm_identify) (uint8 info);
void (*rm_startup) (void);
void (*rm_cleanup) (void);
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index 30c2e84cbc9..e5ab71e2305 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -18,49 +18,43 @@
#include "storage/relfilenode.h"
/*
- * The rmgr data to be written by XLogInsert() is defined by a chain of
- * one or more XLogRecData structs. (Multiple structs would be used when
- * parts of the source data aren't physically adjacent in memory, or when
- * multiple associated buffers need to be specified.)
- *
- * If buffer is valid then XLOG will check if buffer must be backed up
- * (ie, whether this is first change of that page since last checkpoint).
- * If so, the whole page contents are attached to the XLOG record, and XLOG
- * sets XLR_BKP_BLOCK(N) bit in xl_info. Note that the buffer must be pinned
- * and exclusive-locked by the caller, so that it won't change under us.
- * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
- * this XLogRecData struct into the XLOG record, since we assume it's present
- * in the buffer. Therefore, rmgr redo routines MUST pay attention to
- * XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
- * The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
- * value (ignoring InvalidBuffer) appearing in the rdata chain.
- *
- * When buffer is valid, caller must set buffer_std to indicate whether the
- * page uses standard pd_lower/pd_upper header fields. If this is true, then
- * XLOG is allowed to omit the free space between pd_lower and pd_upper from
- * the backed-up page image. Note that even when buffer_std is false, the
- * page MUST have an LSN field as its first eight bytes!
- *
- * Note: data can be NULL to indicate no rmgr data associated with this chain
- * entry. This can be sensible (ie, not a wasted entry) if buffer is valid.
- * The implication is that the buffer has been changed by the operation being
- * logged, and so may need to be backed up, but the change can be redone using
- * only information already present elsewhere in the XLOG entry.
+ * The minimum size of the WAL construction working area. If you need to
+ * register more than XLR_NORMAL_MAX_BLOCK_ID block references or have more
+ * than XLR_NORMAL_RDATAS data chunks in a single WAL record, you must call
+ * XLogEnsureRecordSpace() first to allocate more working memory.
*/
-typedef struct XLogRecData
-{
- char *data; /* start of rmgr data to include */
- uint32 len; /* length of rmgr data to include */
- Buffer buffer; /* buffer associated with data, if any */
- bool buffer_std; /* buffer has standard pd_lower/pd_upper */
- struct XLogRecData *next; /* next struct in chain, or NULL */
-} XLogRecData;
+#define XLR_NORMAL_MAX_BLOCK_ID 4
+#define XLR_NORMAL_RDATAS 20
+
+/* flags for XLogRegisterBuffer */
+#define REGBUF_FORCE_IMAGE 0x01 /* force a full-page image */
+#define REGBUF_NO_IMAGE 0x02 /* don't take a full-page image */
+#define REGBUF_WILL_INIT (0x04 | 0x02) /* page will be re-initialized at
+ * replay (implies NO_IMAGE) */
+#define REGBUF_STANDARD 0x08 /* page follows "standard" page layout,
+ * (data between pd_lower and pd_upper
+ * will be skipped) */
+#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
+ * is taken */
+
+/* prototypes for public functions in xloginsert.c: */
+extern void XLogBeginInsert(void);
+extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
+extern void XLogRegisterData(char *data, int len);
+extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
+extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
+ ForkNumber forknum, BlockNumber blknum, char *page,
+ uint8 flags);
+extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
+extern void XLogResetInsertion(void);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
-extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
BlockNumber blk, char *page, bool page_std);
extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
-extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
+
+extern void InitXLogInsert(void);
#endif /* XLOGINSERT_H */
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index ea873a2d9c7..eb6cc8996a5 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -14,12 +14,18 @@
*
* The basic idea is to allocate an XLogReaderState via
* XLogReaderAllocate(), and call XLogReadRecord() until it returns NULL.
+ *
+ * After reading a record with XLogReadRecord(), it's decomposed into
+ * the per-block and main data parts, and the parts can be accessed
+ * with the XLogRec* macros and functions. You can also decode a
+ * record that's already constructed in memory, without reading from
+ * disk, by calling the DecodeXLogRecord() function.
*-------------------------------------------------------------------------
*/
#ifndef XLOGREADER_H
#define XLOGREADER_H
-#include "access/xlog_internal.h"
+#include "access/xlogrecord.h"
typedef struct XLogReaderState XLogReaderState;
@@ -31,6 +37,32 @@ typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
char *readBuf,
TimeLineID *pageTLI);
+typedef struct
+{
+ /* Is this block ref in use? */
+ bool in_use;
+
+ /* Identify the block this refers to */
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blkno;
+
+ /* copy of the fork_flags field from the XLogRecordBlockHeader */
+ uint8 flags;
+
+ /* Information on full-page image, if any */
+ bool has_image;
+ char *bkp_image;
+ uint16 hole_offset;
+ uint16 hole_length;
+
+ /* Buffer holding the rmgr-specific data associated with this block */
+ bool has_data;
+ char *data;
+ uint16 data_len;
+ uint16 data_bufsz;
+} DecodedBkpBlock;
+
struct XLogReaderState
{
/* ----------------------------------------
@@ -79,6 +111,25 @@ struct XLogReaderState
XLogRecPtr ReadRecPtr; /* start of last record read */
XLogRecPtr EndRecPtr; /* end+1 of last record read */
+
+ /* ----------------------------------------
+ * Decoded representation of current record
+ *
+ * Use XLogRecGet* functions to investigate the record; these fields
+ * should not be accessed directly.
+ * ----------------------------------------
+ */
+ XLogRecord *decoded_record; /* currently decoded record */
+
+ char *main_data; /* record's main data portion */
+ uint32 main_data_len; /* main data portion's length */
+ uint32 main_data_bufsz; /* allocated size of the buffer */
+
+ /* information about blocks referenced by the record. */
+ DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
+
+ int max_block_id; /* highest block_id in use (-1 if none) */
+
/* ----------------------------------------
* private/internal state
* ----------------------------------------
@@ -123,4 +174,28 @@ extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
#endif /* FRONTEND */
+/* Functions for decoding an XLogRecord */
+
+extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
+ char **errmsg);
+
+#define XLogRecGetTotalLen(decoder) ((decoder)->decoded_record->xl_tot_len)
+#define XLogRecGetPrev(decoder) ((decoder)->decoded_record->xl_prev)
+#define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
+#define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
+#define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
+#define XLogRecGetData(decoder) ((decoder)->main_data)
+#define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
+#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
+#define XLogRecHasBlockRef(decoder, block_id) \
+ ((decoder)->blocks[block_id].in_use)
+#define XLogRecHasBlockImage(decoder, block_id) \
+ ((decoder)->blocks[block_id].has_image)
+
+extern bool RestoreBlockImage(XLogReaderState *recoder, uint8 block_id, char *dst);
+extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
+extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
+ RelFileNode *rnode, ForkNumber *forknum,
+ BlockNumber *blknum);
+
#endif /* XLOGREADER_H */
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index ab0fb1c5004..11ddfac9c7f 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -20,81 +20,161 @@
/*
* The overall layout of an XLOG record is:
* Fixed-size header (XLogRecord struct)
- * rmgr-specific data
- * BkpBlock
- * backup block data
- * BkpBlock
- * backup block data
+ * XLogRecordBlockHeader struct
+ * XLogRecordBlockHeader struct
* ...
+ * XLogRecordDataHeader[Short|Long] struct
+ * block data
+ * block data
+ * ...
+ * main data
*
- * where there can be zero to four backup blocks (as signaled by xl_info flag
- * bits). XLogRecord structs always start on MAXALIGN boundaries in the WAL
- * files, and we round up SizeOfXLogRecord so that the rmgr data is also
- * guaranteed to begin on a MAXALIGN boundary. However, no padding is added
- * to align BkpBlock structs or backup block data.
+ * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
+ * rmgr-specific data not associated with a block. XLogRecord structs
+ * always start on MAXALIGN boundaries in the WAL files, but the rest of
+ * the fields are not aligned.
*
- * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
- * and also not any backup blocks. xl_tot_len counts everything. Neither
- * length field is rounded up to an alignment boundary.
+ * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
+ * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
+ * used to distinguish between block references, and the main data structs.
*/
typedef struct XLogRecord
{
uint32 xl_tot_len; /* total len of entire record */
TransactionId xl_xid; /* xact id */
- uint32 xl_len; /* total len of rmgr data */
+ XLogRecPtr xl_prev; /* ptr to previous record in log */
uint8 xl_info; /* flag bits, see below */
RmgrId xl_rmid; /* resource manager for this record */
/* 2 bytes of padding here, initialize to zero */
- XLogRecPtr xl_prev; /* ptr to previous record in log */
pg_crc32 xl_crc; /* CRC for this record */
- /* If MAXALIGN==8, there are 4 wasted bytes here */
-
- /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
+ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
} XLogRecord;
-#define SizeOfXLogRecord MAXALIGN(sizeof(XLogRecord))
-
-#define XLogRecGetData(record) ((char*) (record) + SizeOfXLogRecord)
+#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32))
/*
- * XLOG uses only low 4 bits of xl_info. High 4 bits may be used by rmgr.
+ * The high 4 bits in xl_info may be used freely by rmgr. The
+ * XLR_SPECIAL_REL_UPDATE bit can be passed by XLogInsert caller. The rest
+ * are set internally by XLogInsert.
*/
#define XLR_INFO_MASK 0x0F
+#define XLR_RMGR_INFO_MASK 0xF0
/*
- * If we backed up any disk blocks with the XLOG record, we use flag bits in
- * xl_info to signal it. We support backup of up to 4 disk blocks per XLOG
- * record.
+ * If a WAL record modifies any relation files, in ways not covered by the
+ * usual block references, this flag is set. This is not used for anything
+ * by PostgreSQL itself, but it allows external tools that read WAL and keep
+ * track of modified blocks to recognize such special record types.
+ */
+#define XLR_SPECIAL_REL_UPDATE 0x01
+
+/*
+ * Header info for block data appended to an XLOG record.
+ *
+ * Note that we don't attempt to align the XLogRecordBlockHeader struct!
+ * So, the struct must be copied to aligned local storage before use.
+ * 'data_length' is the length of the payload data associated with this,
+ * and includes the possible full-page image, and rmgr-specific data. It
+ * does not include the XLogRecordBlockHeader struct itself.
*/
-#define XLR_BKP_BLOCK_MASK 0x0F /* all info bits used for bkp blocks */
-#define XLR_MAX_BKP_BLOCKS 4
-#define XLR_BKP_BLOCK(iblk) (0x08 >> (iblk)) /* iblk in 0..3 */
+typedef struct XLogRecordBlockHeader
+{
+ uint8 id; /* block reference ID */
+ uint8 fork_flags; /* fork within the relation, and flags */
+ uint16 data_length; /* number of payload bytes (not including page
+ * image) */
+
+ /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
+ /* If !BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
+ /* BlockNumber follows */
+} XLogRecordBlockHeader;
+
+#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
/*
- * Header info for a backup block appended to an XLOG record.
+ * Additional header information when a full-page image is included
+ * (i.e. when BKPBLOCK_HAS_IMAGE is set).
*
* As a trivial form of data compression, the XLOG code is aware that
* PG data pages usually contain an unused "hole" in the middle, which
* contains only zero bytes. If hole_length > 0 then we have removed
* such a "hole" from the stored data (and it's not counted in the
* XLOG record's CRC, either). Hence, the amount of block data actually
- * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
- *
- * Note that we don't attempt to align either the BkpBlock struct or the
- * block's data. So, the struct must be copied to aligned local storage
- * before use.
+ * present is BLCKSZ - hole_length bytes.
*/
-typedef struct BkpBlock
+typedef struct XLogRecordBlockImageHeader
{
- RelFileNode node; /* relation containing block */
- ForkNumber fork; /* fork within the relation */
- BlockNumber block; /* block number */
uint16 hole_offset; /* number of bytes before "hole" */
uint16 hole_length; /* number of bytes in "hole" */
+} XLogRecordBlockImageHeader;
+
+#define SizeOfXLogRecordBlockImageHeader sizeof(XLogRecordBlockImageHeader)
+
+/*
+ * Maximum size of the header for a block reference. This is used to size a
+ * temporary buffer for constructing the header.
+ */
+#define MaxSizeOfXLogRecordBlockHeader \
+ (SizeOfXLogRecordBlockHeader + \
+ SizeOfXLogRecordBlockImageHeader + \
+ sizeof(RelFileNode) + \
+ sizeof(BlockNumber))
+
+/*
+ * The fork number fits in the lower 4 bits in the fork_flags field. The upper
+ * bits are used for flags.
+ */
+#define BKPBLOCK_FORK_MASK 0x0F
+#define BKPBLOCK_FLAG_MASK 0xF0
+#define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
+#define BKPBLOCK_HAS_DATA 0x20
+#define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
+#define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */
+
+/*
+ * XLogRecordDataHeaderShort/Long are used for the "main data" portion of
+ * the record. If the length of the data is less than 256 bytes, the short
+ * form is used, with a single byte to hold the length. Otherwise the long
+ * form is used.
+ *
+ * (These structs are currently not used in the code, they are here just for
+ * documentation purposes).
+ */
+typedef struct XLogRecordDataHeaderShort
+{
+ uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */
+ uint8 data_length; /* number of payload bytes */
+} XLogRecordDataHeaderShort;
+
+#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
+
+typedef struct XLogRecordDataHeaderLong
+{
+ uint8 id; /* XLR_BLOCK_ID_DATA_LONG */
+ /* followed by uint32 data_length, unaligned */
+} XLogRecordDataHeaderLong;
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
+
+/*
+ * Block IDs used to distinguish different kinds of record fragments. Block
+ * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
+ * any ID number in that range (although you should stick to small numbers,
+ * because the WAL machinery is optimized for that case). A couple of ID
+ * numbers are reserved to denote the "main" data portion of the record.
+ *
+ * The maximum is currently set at 32, quite arbitrarily. Most records only
+ * need a handful of block references, but there are a few exceptions that
+ * need more.
+ */
+#define XLR_MAX_BLOCK_ID 32
+
+#define XLR_BLOCK_ID_DATA_SHORT 255
+#define XLR_BLOCK_ID_DATA_LONG 254
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
- /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
-} BkpBlock;
#endif /* XLOGRECORD_H */
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 8d906967232..68f72cfac6d 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -11,7 +11,7 @@
#ifndef XLOG_UTILS_H
#define XLOG_UTILS_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "storage/bufmgr.h"
@@ -33,26 +33,17 @@ typedef enum
* replayed) */
} XLogRedoAction;
-extern XLogRedoAction XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record,
- int block_index, RelFileNode rnode, BlockNumber blkno,
- Buffer *buf);
-extern XLogRedoAction XLogReadBufferForRedoExtended(XLogRecPtr lsn,
- XLogRecord *record, int block_index,
- RelFileNode rnode, ForkNumber forkno,
- BlockNumber blkno,
+extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record,
+ uint8 buffer_id, Buffer *buf);
+extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id);
+extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record,
+ uint8 buffer_id,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf);
-extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, ReadBufferMode mode);
-extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
- int block_index,
- bool get_cleanup_lock, bool keep_buffer);
-extern Buffer RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb,
- char *blk, bool get_cleanup_lock, bool keep_buffer);
-
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel);
diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h
index 6c687e3a827..31a51c42f60 100644
--- a/src/include/catalog/storage_xlog.h
+++ b/src/include/catalog/storage_xlog.h
@@ -14,7 +14,7 @@
#ifndef STORAGE_XLOG_H
#define STORAGE_XLOG_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/block.h"
#include "storage/relfilenode.h"
@@ -44,8 +44,8 @@ typedef struct xl_smgr_truncate
extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
-extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void smgr_desc(StringInfo buf, XLogRecord *record);
+extern void smgr_redo(XLogReaderState *record);
+extern void smgr_desc(StringInfo buf, XLogReaderState *record);
extern const char *smgr_identify(uint8 info);
#endif /* STORAGE_XLOG_H */
diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h
index b79d9fc8648..bcf0e48cbb2 100644
--- a/src/include/commands/dbcommands.h
+++ b/src/include/commands/dbcommands.h
@@ -14,7 +14,7 @@
#ifndef DBCOMMANDS_H
#define DBCOMMANDS_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "nodes/parsenodes.h"
@@ -63,8 +63,8 @@ extern Oid AlterDatabaseOwner(const char *dbname, Oid newOwnerId);
extern Oid get_database_oid(const char *dbname, bool missingok);
extern char *get_database_name(Oid dbid);
-extern void dbase_redo(XLogRecPtr lsn, XLogRecord *rptr);
-extern void dbase_desc(StringInfo buf, XLogRecord *rptr);
+extern void dbase_redo(XLogReaderState *rptr);
+extern void dbase_desc(StringInfo buf, XLogReaderState *rptr);
extern const char *dbase_identify(uint8 info);
extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype);
diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h
index 7cbe6f9a819..386f1e677cf 100644
--- a/src/include/commands/sequence.h
+++ b/src/include/commands/sequence.h
@@ -13,7 +13,7 @@
#ifndef SEQUENCE_H
#define SEQUENCE_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
#include "nodes/parsenodes.h"
@@ -77,8 +77,8 @@ extern Oid AlterSequence(AlterSeqStmt *stmt);
extern void ResetSequence(Oid seq_relid);
extern void ResetSequenceCaches(void);
-extern void seq_redo(XLogRecPtr lsn, XLogRecord *rptr);
-extern void seq_desc(StringInfo buf, XLogRecord *rptr);
+extern void seq_redo(XLogReaderState *rptr);
+extern void seq_desc(StringInfo buf, XLogReaderState *rptr);
extern const char *seq_identify(uint8 info);
#endif /* SEQUENCE_H */
diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h
index afd9e05cb78..80e021e2d4c 100644
--- a/src/include/commands/tablespace.h
+++ b/src/include/commands/tablespace.h
@@ -14,7 +14,7 @@
#ifndef TABLESPACE_H
#define TABLESPACE_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "nodes/parsenodes.h"
@@ -56,8 +56,8 @@ extern char *get_tablespace_name(Oid spc_oid);
extern bool directory_is_empty(const char *path);
-extern void tblspc_redo(XLogRecPtr lsn, XLogRecord *rptr);
-extern void tblspc_desc(StringInfo buf, XLogRecord *rptr);
+extern void tblspc_redo(XLogReaderState *rptr);
+extern void tblspc_desc(StringInfo buf, XLogReaderState *rptr);
extern const char *tblspc_identify(uint8 info);
#endif /* TABLESPACE_H */
diff --git a/src/include/replication/decode.h b/src/include/replication/decode.h
index e4185287a1c..385c4a7b508 100644
--- a/src/include/replication/decode.h
+++ b/src/include/replication/decode.h
@@ -15,6 +15,6 @@
#include "replication/logical.h"
void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx,
- XLogRecord *record);
+ XLogReaderState *record);
#endif
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index c89989fd201..d2599be0cfb 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -14,7 +14,7 @@
#ifndef STANDBY_H
#define STANDBY_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/lock.h"
#include "storage/procsignal.h"
@@ -82,8 +82,8 @@ typedef struct xl_running_xacts
/* Recovery handlers for the Standby Rmgr (RM_STANDBY_ID) */
-extern void standby_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void standby_desc(StringInfo buf, XLogRecord *record);
+extern void standby_redo(XLogReaderState *record);
+extern void standby_desc(StringInfo buf, XLogReaderState *record);
extern const char *standby_identify(uint8 info);
/*
diff --git a/src/include/utils/relmapper.h b/src/include/utils/relmapper.h
index bd5836b0d98..1f2c960ebe6 100644
--- a/src/include/utils/relmapper.h
+++ b/src/include/utils/relmapper.h
@@ -14,7 +14,7 @@
#ifndef RELMAPPER_H
#define RELMAPPER_H
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
#include "lib/stringinfo.h"
/* ----------------
@@ -59,8 +59,8 @@ extern void RelationMapInitialize(void);
extern void RelationMapInitializePhase2(void);
extern void RelationMapInitializePhase3(void);
-extern void relmap_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void relmap_desc(StringInfo buf, XLogRecord *record);
+extern void relmap_redo(XLogReaderState *record);
+extern void relmap_desc(StringInfo buf, XLogReaderState *record);
extern const char *relmap_identify(uint8 info);
#endif /* RELMAPPER_H */