aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtinsert.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2006-07-25 19:13:00 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2006-07-25 19:13:00 +0000
commite6284649b9e30372b3990107a082bc7520325676 (patch)
tree7c1075578244b4ce410c8185617dca4a7f7c44a5 /src/backend/access/nbtree/nbtinsert.c
parentedd49fcf69a8d3e044adc7f0794dc8d18e3f994b (diff)
downloadpostgresql-e6284649b9e30372b3990107a082bc7520325676.tar.gz
postgresql-e6284649b9e30372b3990107a082bc7520325676.zip
Modify btree to delete known-dead index entries without an actual VACUUM.
When we are about to split an index page to do an insertion, first look to see if any entries marked LP_DELETE exist on the page, and if so remove them to try to make enough space for the desired insert. This should reduce index bloat in heavily-updated tables, although of course you still need VACUUM eventually to clean up the heap. Junji Teramoto
Diffstat (limited to 'src/backend/access/nbtree/nbtinsert.c')
-rw-r--r--src/backend/access/nbtree/nbtinsert.c81
1 files changed, 73 insertions, 8 deletions
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 974ef92d187..597949aa2d9 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.141 2006/07/13 16:49:12 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.142 2006/07/25 19:13:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -65,6 +65,7 @@ static void _bt_pgaddtup(Relation rel, Page page,
OffsetNumber itup_off, const char *where);
static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
int keysz, ScanKey scankey);
+static void _bt_vacuum_one_page(Relation rel, Buffer buffer);
/*
@@ -262,6 +263,8 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
hbuffer) == HEAPTUPLE_DEAD)
{
curitemid->lp_flags |= LP_DELETE;
+ opaque->btpo_flags |= BTP_HAS_GARBAGE;
+ /* be sure to mark the proper buffer dirty... */
if (nbuf != InvalidBuffer)
SetBufferCommitInfoNeedsSave(nbuf);
else
@@ -424,11 +427,29 @@ _bt_insertonpg(Relation rel,
*/
bool movedright = false;
- while (PageGetFreeSpace(page) < itemsz &&
- !P_RIGHTMOST(lpageop) &&
- _bt_compare(rel, keysz, scankey, page, P_HIKEY) == 0 &&
- random() > (MAX_RANDOM_VALUE / 100))
+ while (PageGetFreeSpace(page) < itemsz)
{
+ Buffer rbuf;
+
+ /*
+ * before considering moving right, see if we can obtain enough
+ * space by erasing LP_DELETE items
+ */
+ if (P_ISLEAF(lpageop) && P_HAS_GARBAGE(lpageop))
+ {
+ _bt_vacuum_one_page(rel, buf);
+ if (PageGetFreeSpace(page) >= itemsz)
+ break; /* OK, now we have enough space */
+ }
+
+ /*
+ * nope, so check conditions (b) and (c) enumerated above
+ */
+ if (P_RIGHTMOST(lpageop) ||
+ _bt_compare(rel, keysz, scankey, page, P_HIKEY) != 0 ||
+ random() <= (MAX_RANDOM_VALUE / 100))
+ break;
+
/*
* step right to next non-dead page
*
@@ -438,7 +459,7 @@ _bt_insertonpg(Relation rel,
* pages won't do because we don't know when they will get
* de-linked from the tree.
*/
- Buffer rbuf = InvalidBuffer;
+ rbuf = InvalidBuffer;
for (;;)
{
@@ -702,9 +723,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage);
/* if we're splitting this page, it won't be the root when we're done */
- /* also, clear the SPLIT_END flag in both pages */
+ /* also, clear the SPLIT_END and HAS_GARBAGE flags in both pages */
lopaque->btpo_flags = oopaque->btpo_flags;
- lopaque->btpo_flags &= ~(BTP_ROOT | BTP_SPLIT_END);
+ lopaque->btpo_flags &= ~(BTP_ROOT | BTP_SPLIT_END | BTP_HAS_GARBAGE);
ropaque->btpo_flags = lopaque->btpo_flags;
lopaque->btpo_prev = oopaque->btpo_prev;
lopaque->btpo_next = BufferGetBlockNumber(rbuf);
@@ -1651,3 +1672,47 @@ _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
/* if we get here, the keys are equal */
return true;
}
+
+/*
+ * _bt_vacuum_one_page - vacuum just one index page.
+ *
+ * Try to remove LP_DELETE items from the given page. The passed buffer
+ * must be exclusive-locked, but unlike a real VACUUM, we don't need a
+ * super-exclusive "cleanup" lock (see nbtree/README).
+ */
+static void
+_bt_vacuum_one_page(Relation rel, Buffer buffer)
+{
+ OffsetNumber deletable[MaxOffsetNumber];
+ int ndeletable = 0;
+ OffsetNumber offnum,
+ minoff,
+ maxoff;
+ Page page = BufferGetPage(buffer);
+ BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ /*
+ * Scan over all items to see which ones need deleted
+ * according to LP_DELETE flags.
+ */
+ minoff = P_FIRSTDATAKEY(opaque);
+ maxoff = PageGetMaxOffsetNumber(page);
+ for (offnum = minoff;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ ItemId itemId = PageGetItemId(page, offnum);
+
+ if (ItemIdDeleted(itemId))
+ deletable[ndeletable++] = offnum;
+ }
+
+ if (ndeletable > 0)
+ _bt_delitems(rel, buffer, deletable, ndeletable);
+ /*
+ * Note: if we didn't find any LP_DELETE items, then the page's
+ * BTP_HAS_GARBAGE hint bit is falsely set. We do not bother
+ * expending a separate write to clear it, however. We will clear
+ * it when we split the page.
+ */
+}