diff options
author | Peter Geoghegan <pg@bowt.ie> | 2020-01-03 12:18:13 -0800 |
---|---|---|
committer | Peter Geoghegan <pg@bowt.ie> | 2020-01-03 12:18:13 -0800 |
commit | d2e5e20e57111cca9e14f6e5a99a186d4c66a5b7 (patch) | |
tree | 3eb23c2ed6433a48866dac0ecf4cad4b6b7cbfea /src | |
parent | 56a3921a2f5102f804bd0ff741e144a0e6f1c0b6 (diff) | |
download | postgresql-d2e5e20e57111cca9e14f6e5a99a186d4c66a5b7.tar.gz postgresql-d2e5e20e57111cca9e14f6e5a99a186d4c66a5b7.zip |
Add xl_btree_delete optimization.
Commit 558a9165e08 taught _bt_delitems_delete() to produce its own XID
horizon on the primary. Standbys no longer needed to generate their own
latestRemovedXid, since they could just use the explicitly logged value
from the primary instead. The deleted offset numbers array from the
xl_btree_delete WAL record was no longer used by the REDO routine for
anything other than deleting the items.
This enables a minor optimization: We now treat the array as buffer
state, not generic WAL data, following _bt_delitems_vacuum()'s example.
This should be a minor win, since it allows us to avoid including the
deleted items array in cases where XLogInsert() stores the whole buffer
anyway. The primary goal here is to make the code more maintainable,
though. Removing inessential differences between the two functions
highlights the fundamental differences that remain.
Also change xl_btree_delete to use uint32 for the size of the array of
item offsets being deleted. This brings xl_btree_delete closer to
xl_btree_vacuum. Furthermore, it seems like a good idea to use an
explicit-width integer type (the field was previously an "int").
Bump XLOG_PAGE_MAGIC because xl_btree_delete changed.
Discussion: https://postgr.es/m/CAH2-Wzkz4TjmezzfAbaV1zYrh=fr0bCpzuJTvBe5iUQ3aUPsCQ@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 48 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtxlog.c | 11 | ||||
-rw-r--r-- | src/backend/access/rmgrdesc/nbtdesc.c | 4 | ||||
-rw-r--r-- | src/include/access/nbtree.h | 3 | ||||
-rw-r--r-- | src/include/access/nbtxlog.h | 6 | ||||
-rw-r--r-- | src/include/access/xlog_internal.h | 2 |
6 files changed, 32 insertions, 42 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 73d28d37a3f..f05cbe74674 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -961,20 +961,15 @@ _bt_page_recyclable(Page page) } /* - * Delete item(s) from a btree page during VACUUM. - * - * This must only be used for deleting leaf items. Deleting an item on a - * non-leaf page has to be done as part of an atomic action that includes - * deleting the page it points to. + * Delete item(s) from a btree leaf page during VACUUM. * * This routine assumes that the caller has a super-exclusive write lock on * the buffer. Also, the given deletable array *must* be sorted in ascending * order. * * We record VACUUMs and b-tree deletes differently in WAL. Deletes must - * generate recovery conflicts by accessing the heap inline, whereas VACUUMs - * can rely on the initial heap scan taking care of the problem (pruning would - * have generated the conflicts needed for hot standby already). + * generate their own latestRemovedXid by accessing the heap directly, whereas + * VACUUMs rely on the initial heap scan taking care of it indirectly. */ void _bt_delitems_vacuum(Relation rel, Buffer buf, @@ -1030,9 +1025,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum); /* - * The target-offsets array is not in the buffer, but pretend that it - * is. When XLogInsert stores the whole buffer, the offsets array - * need not be stored too. + * The deletable array is not in the buffer, but pretend that it is. + * When XLogInsert stores the whole buffer, the array need not be + * stored too. */ XLogRegisterBufData(0, (char *) deletable, ndeletable * sizeof(OffsetNumber)); @@ -1046,21 +1041,19 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, } /* - * Delete item(s) from a btree page during single-page cleanup. - * - * As above, must only be used on leaf pages. + * Delete item(s) from a btree leaf page during single-page cleanup. * * This routine assumes that the caller has pinned and write locked the - * buffer. Also, the given itemnos *must* appear in increasing order in the - * array. + * buffer. Also, the given deletable array *must* be sorted in ascending + * order. * * This is nearly the same as _bt_delitems_vacuum as far as what it does to - * the page, but it needs to generate its own recovery conflicts by accessing - * the heap. See comments for _bt_delitems_vacuum. + * the page, but it needs to generate its own latestRemovedXid by accessing + * the heap. This is used by the REDO routine to generate recovery conflicts. */ void _bt_delitems_delete(Relation rel, Buffer buf, - OffsetNumber *itemnos, int nitems, + OffsetNumber *deletable, int ndeletable, Relation heapRel) { Page page = BufferGetPage(buf); @@ -1068,18 +1061,18 @@ _bt_delitems_delete(Relation rel, Buffer buf, TransactionId latestRemovedXid = InvalidTransactionId; /* Shouldn't be called unless there's something to do */ - Assert(nitems > 0); + Assert(ndeletable > 0); if (XLogStandbyInfoActive() && RelationNeedsWAL(rel)) latestRemovedXid = index_compute_xid_horizon_for_tuples(rel, heapRel, buf, - itemnos, nitems); + deletable, ndeletable); /* No ereport(ERROR) until changes are logged */ START_CRIT_SECTION(); /* Fix the page */ - PageIndexMultiDelete(page, itemnos, nitems); + PageIndexMultiDelete(page, deletable, ndeletable); /* * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID, @@ -1098,18 +1091,19 @@ _bt_delitems_delete(Relation rel, Buffer buf, xl_btree_delete xlrec_delete; xlrec_delete.latestRemovedXid = latestRemovedXid; - xlrec_delete.nitems = nitems; + xlrec_delete.ndeleted = ndeletable; XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete); /* - * We need the target-offsets array whether or not we store the whole - * buffer, to allow us to find the latestRemovedXid on a standby - * server. + * The deletable array is not in the buffer, but pretend that it is. + * When XLogInsert stores the whole buffer, the array need not be + * stored too. */ - XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber)); + XLogRegisterBufData(0, (char *) deletable, + ndeletable * sizeof(OffsetNumber)); recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE); diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index e1c37491485..2e5202c2d6e 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -449,16 +449,11 @@ btree_xlog_delete(XLogReaderState *record) */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { - page = (Page) BufferGetPage(buffer); - - if (XLogRecGetDataLen(record) > SizeOfBtreeDelete) - { - OffsetNumber *unused; + char *ptr = XLogRecGetBlockData(record, 0, NULL); - unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete); + page = (Page) BufferGetPage(buffer); - PageIndexMultiDelete(page, unused, xlrec->nitems); - } + PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted); /* Mark the page as not containing any LP_DEAD items */ opaque = (BTPageOpaque) PageGetSpecialPointer(page); diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index e0ec8a4b0b7..7d63a7124ed 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -53,8 +53,8 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_delete *xlrec = (xl_btree_delete *) rec; - appendStringInfo(buf, "%d items, latest removed xid %u", - xlrec->nitems, xlrec->latestRemovedXid); + appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u", + xlrec->latestRemovedXid, xlrec->ndeleted); break; } case XLOG_BTREE_MARK_PAGE_HALFDEAD: diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 4f84ca83dc4..f90ee3a0e00 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -779,7 +779,8 @@ extern bool _bt_page_recyclable(Page page); extern void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable); extern void _bt_delitems_delete(Relation rel, Buffer buf, - OffsetNumber *itemnos, int nitems, Relation heapRel); + OffsetNumber *deletable, int ndeletable, + Relation heapRel); extern int _bt_pagedel(Relation rel, Buffer buf); /* diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h index 3da55146555..776a9bd7233 100644 --- a/src/include/access/nbtxlog.h +++ b/src/include/access/nbtxlog.h @@ -126,12 +126,12 @@ typedef struct xl_btree_split typedef struct xl_btree_delete { TransactionId latestRemovedXid; - int nitems; + uint32 ndeleted; - /* TARGET OFFSET NUMBERS FOLLOW AT THE END */ + /* DELETED TARGET OFFSET NUMBERS FOLLOW */ } xl_btree_delete; -#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int)) +#define SizeOfBtreeDelete (offsetof(xl_btree_delete, ndeleted) + sizeof(uint32)) /* * This is what we need to know about page reuse within btree. This record diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 0a836d1c928..087918d41dd 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -31,7 +31,7 @@ /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD103 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD104 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { |