aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-04-23 10:15:06 +0300
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-04-23 10:19:54 +0300
commit4fafc4ecd9e4d224d92c4a8549c5646860787a5d (patch)
treeff1cb07f1140ed9ae19f6a66ab94d154b9bbf5ee
parentd26b042ce577a4012b9798528f0b1bcfa6e502eb (diff)
downloadpostgresql-4fafc4ecd9e4d224d92c4a8549c5646860787a5d.tar.gz
postgresql-4fafc4ecd9e4d224d92c4a8549c5646860787a5d.zip
Cleanup of new b-tree page deletion code.
When marking a branch as half-dead, a pointer to the top of the branch is stored in the leaf block's hi-key. During normal operation, the high key was left in place, and the block number was just stored in the ctid field of the high key tuple, but in WAL replay, the high key was recreated as a truncated tuple with zero columns. For the sake of easier debugging, also truncate the tuple in normal operation, so that the page is identical after WAL replay. Also, rename the 'downlink' field in the WAL record to 'topparent', as that seems like a more descriptive name. And make sure it's set to invalid when unlinking the leaf page.
-rw-r--r--src/backend/access/nbtree/nbtpage.c27
-rw-r--r--src/backend/access/nbtree/nbtxlog.c8
-rw-r--r--src/backend/access/rmgrdesc/nbtdesc.c8
-rw-r--r--src/include/access/nbtree.h8
4 files changed, 33 insertions, 18 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index b95c1b886d4..c0ebb95ba8a 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -1303,6 +1303,10 @@ _bt_pagedel(Relation rel, Buffer buf)
return ndeleted;
}
+/*
+ * First stage of page deletion. Remove the downlink to the top of the
+ * branch being deleted, and mark the leaf page as half-dead.
+ */
static bool
_bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
{
@@ -1317,6 +1321,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
OffsetNumber topoff;
OffsetNumber nextoffset;
IndexTuple itup;
+ IndexTupleData trunctuple;
page = BufferGetPage(leafbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1406,12 +1411,17 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_flags |= BTP_HALF_DEAD;
- itemid = PageGetItemId(page, P_HIKEY);
- itup = (IndexTuple) PageGetItem(page, itemid);
- if (target == leafblkno)
- ItemPointerSetInvalid(&(itup->t_tid));
+ PageIndexTupleDelete(page, P_HIKEY);
+ Assert(PageGetMaxOffsetNumber(page) == 0);
+ MemSet(&trunctuple, 0, sizeof(IndexTupleData));
+ trunctuple.t_info = sizeof(IndexTupleData);
+ if (target != leafblkno)
+ ItemPointerSet(&trunctuple.t_tid, target, P_HIKEY);
else
- ItemPointerSet(&(itup->t_tid), target, P_HIKEY);
+ ItemPointerSetInvalid(&trunctuple.t_tid);
+ if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
+ false, false) == InvalidOffsetNumber)
+ elog(ERROR, "could not add dummy high key to half-dead page");
/* Must mark buffers dirty before XLogInsert */
MarkBufferDirty(topparent);
@@ -1427,7 +1437,10 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
xlrec.target.node = rel->rd_node;
ItemPointerSet(&(xlrec.target.tid), BufferGetBlockNumber(topparent), topoff);
xlrec.leafblk = leafblkno;
- xlrec.downlink = target;
+ if (target != leafblkno)
+ xlrec.topparent = target;
+ else
+ xlrec.topparent = InvalidBlockNumber;
page = BufferGetPage(leafbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1768,7 +1781,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
xlrec.leafblk = leafblkno;
xlrec.leafleftsib = leafleftsib;
xlrec.leafrightsib = leafrightsib;
- xlrec.downlink = nextchild;
+ xlrec.topparent = nextchild;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfBtreeUnlinkPage;
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index ada2d6691f3..dc73f2a9cd5 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -870,8 +870,8 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
trunctuple.t_info = sizeof(IndexTupleData);
- if (xlrec->downlink != InvalidBlockNumber)
- ItemPointerSet(&trunctuple.t_tid, xlrec->downlink, P_HIKEY);
+ if (xlrec->topparent != InvalidBlockNumber)
+ ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY);
else
ItemPointerSetInvalid(&trunctuple.t_tid);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
@@ -1006,8 +1006,8 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
/* Add a dummy hikey item */
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
trunctuple.t_info = sizeof(IndexTupleData);
- if (xlrec->downlink != InvalidBlockNumber)
- ItemPointerSet(&trunctuple.t_tid, xlrec->downlink, P_HIKEY);
+ if (xlrec->topparent != InvalidBlockNumber)
+ ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY);
else
ItemPointerSetInvalid(&trunctuple.t_tid);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index 89a91a20be1..af7663b8cac 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -130,8 +130,8 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfoString(buf, "mark_page_halfdead: ");
out_target(buf, &(xlrec->target));
- appendStringInfo(buf, "; downlink %u; leaf %u; left %u; right %u",
- xlrec->downlink, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
+ appendStringInfo(buf, "; topparent %u; leaf %u; left %u; right %u",
+ xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
break;
}
case XLOG_BTREE_UNLINK_PAGE_META:
@@ -143,8 +143,8 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ",
xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
- appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; downlink %u",
- xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->downlink);
+ appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u",
+ xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
break;
}
case XLOG_BTREE_NEWROOT:
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 64c6982f50e..1a8b16d45e2 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -379,13 +379,15 @@ typedef struct xl_btree_vacuum
typedef struct xl_btree_mark_page_halfdead
{
xl_btreetid target; /* deleted tuple id in parent page */
+
+ /* information needed to recreate the leaf page: */
BlockNumber leafblk; /* leaf block ultimately being deleted */
BlockNumber leftblk; /* leaf block's left sibling, if any */
BlockNumber rightblk; /* leaf block's right sibling */
- BlockNumber downlink; /* next child down in the branch */
+ BlockNumber topparent; /* topmost internal page in the branch */
} xl_btree_mark_page_halfdead;
-#define SizeOfBtreeMarkPageHalfDead (offsetof(xl_btree_mark_page_halfdead, downlink) + sizeof(BlockNumber))
+#define SizeOfBtreeMarkPageHalfDead (offsetof(xl_btree_mark_page_halfdead, topparent) + sizeof(BlockNumber))
/*
* This is what we need to know about deletion of a btree page. Note we do
@@ -406,7 +408,7 @@ typedef struct xl_btree_unlink_page
BlockNumber leafblk;
BlockNumber leafleftsib;
BlockNumber leafrightsib;
- BlockNumber downlink; /* next child down in the branch */
+ BlockNumber topparent; /* next child down in the branch */
TransactionId btpo_xact; /* value of btpo.xact for use in recovery */
/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_UNLINK_PAGE_META */