aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginvacuum.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/gin/ginvacuum.c')
-rw-r--r--src/backend/access/gin/ginvacuum.c232
1 files changed, 124 insertions, 108 deletions
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 4212effbe46..6bf65c32935 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -20,8 +20,9 @@
#include "postmaster/autovacuum.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
+#include "utils/memutils.h"
-typedef struct
+typedef struct GinVacuumState
{
Relation index;
IndexBulkDeleteResult *result;
@@ -29,56 +30,58 @@ typedef struct
void *callback_state;
GinState ginstate;
BufferAccessStrategy strategy;
+ MemoryContext tmpCxt;
} GinVacuumState;
-
/*
- * Vacuums a list of item pointers. The original size of the list is 'nitem',
- * returns the number of items remaining afterwards.
+ * Vacuums an uncompressed posting list. The size of the must can be specified
+ * in number of items (nitems).
*
- * If *cleaned == NULL on entry, the original array is left unmodified; if
- * any items are removed, a palloc'd copy of the result is stored in *cleaned.
- * Otherwise *cleaned should point to the original array, in which case it's
- * modified directly.
+ * If none of the items need to be removed, returns NULL. Otherwise returns
+ * a new palloc'd array with the remaining items. The number of remaining
+ * items is returned in *nremaining.
*/
-static int
-ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, int nitem,
- ItemPointerData **cleaned)
+ItemPointer
+ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items,
+ int nitem, int *nremaining)
{
int i,
- j = 0;
-
- Assert(*cleaned == NULL || *cleaned == items);
+ remaining = 0;
+ ItemPointer tmpitems = NULL;
/*
- * just scan over ItemPointer array
+ * Iterate over TIDs array
*/
for (i = 0; i < nitem; i++)
{
if (gvs->callback(items + i, gvs->callback_state))
{
gvs->result->tuples_removed += 1;
- if (!*cleaned)
+ if (!tmpitems)
{
- *cleaned = (ItemPointerData *) palloc(sizeof(ItemPointerData) * nitem);
- if (i != 0)
- memcpy(*cleaned, items, sizeof(ItemPointerData) * i);
+ /*
+ * First TID to be deleted: allocate memory to hold the
+ * remaining items.
+ */
+ tmpitems = palloc(sizeof(ItemPointerData) * nitem);
+ memcpy(tmpitems, items, sizeof(ItemPointerData) * i);
}
}
else
{
gvs->result->num_index_tuples += 1;
- if (i != j)
- (*cleaned)[j] = items[i];
- j++;
+ if (tmpitems)
+ tmpitems[remaining] = items[i];
+ remaining++;
}
}
- return j;
+ *nremaining = remaining;
+ return tmpitems;
}
/*
- * fills WAL record for vacuum leaf page
+ * Create a WAL record for vacuuming entry tree leaf page.
*/
static void
xlogVacuumPage(Relation index, Buffer buffer)
@@ -86,65 +89,64 @@ xlogVacuumPage(Relation index, Buffer buffer)
Page page = BufferGetPage(buffer);
XLogRecPtr recptr;
XLogRecData rdata[3];
- ginxlogVacuumPage data;
- char *backup;
- char itups[BLCKSZ];
- uint32 len = 0;
+ ginxlogVacuumPage xlrec;
+ uint16 lower;
+ uint16 upper;
+ /* This is only used for entry tree leaf pages. */
+ Assert(!GinPageIsData(page));
Assert(GinPageIsLeaf(page));
if (!RelationNeedsWAL(index))
return;
- data.node = index->rd_node;
- data.blkno = BufferGetBlockNumber(buffer);
+ xlrec.node = index->rd_node;
+ xlrec.blkno = BufferGetBlockNumber(buffer);
+
+ /* Assume we can omit data between pd_lower and pd_upper */
+ lower = ((PageHeader) page)->pd_lower;
+ upper = ((PageHeader) page)->pd_upper;
+
+ Assert(lower < BLCKSZ);
+ Assert(upper < BLCKSZ);
- if (GinPageIsData(page))
+ if (lower >= SizeOfPageHeaderData &&
+ upper > lower &&
+ upper <= BLCKSZ)
{
- backup = GinDataPageGetData(page);
- data.nitem = GinPageGetOpaque(page)->maxoff;
- if (data.nitem)
- len = MAXALIGN(sizeof(ItemPointerData) * data.nitem);
+ xlrec.hole_offset = lower;
+ xlrec.hole_length = upper - lower;
}
else
{
- char *ptr;
- OffsetNumber i;
-
- ptr = backup = itups;
- for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++)
- {
- IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
-
- memcpy(ptr, itup, IndexTupleSize(itup));
- ptr += MAXALIGN(IndexTupleSize(itup));
- }
-
- data.nitem = PageGetMaxOffsetNumber(page);
- len = ptr - backup;
+ /* No "hole" to compress out */
+ xlrec.hole_offset = 0;
+ xlrec.hole_length = 0;
}
- rdata[0].buffer = buffer;
- rdata[0].buffer_std = (GinPageIsData(page)) ? FALSE : TRUE;
- rdata[0].len = 0;
- rdata[0].data = NULL;
- rdata[0].next = rdata + 1;
+ rdata[0].data = (char *) &xlrec;
+ rdata[0].len = sizeof(ginxlogVacuumPage);
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].next = &rdata[1];
- rdata[1].buffer = InvalidBuffer;
- rdata[1].len = sizeof(ginxlogVacuumPage);
- rdata[1].data = (char *) &data;
-
- if (len == 0)
+ if (xlrec.hole_length == 0)
{
+ rdata[1].data = (char *) page;
+ rdata[1].len = BLCKSZ;
+ rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
}
else
{
- rdata[1].next = rdata + 2;
-
+ /* must skip the hole */
+ rdata[1].data = (char *) page;
+ rdata[1].len = xlrec.hole_offset;
+ rdata[1].buffer = InvalidBuffer;
+ rdata[1].next = &rdata[2];
+
+ rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
+ rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
rdata[2].buffer = InvalidBuffer;
- rdata[2].len = len;
- rdata[2].data = backup;
rdata[2].next = NULL;
}
@@ -158,6 +160,7 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
Buffer buffer;
Page page;
bool hasVoidPage = FALSE;
+ MemoryContext oldCxt;
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
RBM_NORMAL, gvs->strategy);
@@ -169,7 +172,6 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
* again). New scan can't start but previously started ones work
* concurrently.
*/
-
if (isRoot)
LockBufferForCleanup(buffer);
else
@@ -179,32 +181,14 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
if (GinPageIsLeaf(page))
{
- OffsetNumber newMaxOff,
- oldMaxOff = GinPageGetOpaque(page)->maxoff;
- ItemPointerData *cleaned = NULL;
-
- newMaxOff = ginVacuumPostingList(gvs,
- (ItemPointer) GinDataPageGetData(page), oldMaxOff, &cleaned);
-
- /* saves changes about deleted tuple ... */
- if (oldMaxOff != newMaxOff)
- {
- START_CRIT_SECTION();
-
- if (newMaxOff > 0)
- memcpy(GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff);
- pfree(cleaned);
- GinPageGetOpaque(page)->maxoff = newMaxOff;
+ oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
+ ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
+ MemoryContextSwitchTo(oldCxt);
+ MemoryContextReset(gvs->tmpCxt);
- MarkBufferDirty(buffer);
- xlogVacuumPage(gvs->index, buffer);
-
- END_CRIT_SECTION();
-
- /* if root is a leaf page, we don't desire further processing */
- if (!isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
- hasVoidPage = TRUE;
- }
+ /* if root is a leaf page, we don't desire further processing */
+ if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page))
+ hasVoidPage = TRUE;
}
else
{
@@ -224,7 +208,7 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
}
/*
- * if we have root and theres void pages in tree, then we don't release
+ * if we have root and there are empty pages in tree, then we don't release
* lock to go further processing and guarantee that tree is unused
*/
if (!(isRoot && hasVoidPage))
@@ -391,6 +375,7 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
Buffer buffer;
Page page;
bool meDelete = FALSE;
+ bool isempty;
if (isRoot)
{
@@ -429,7 +414,12 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
}
}
- if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
+ if (GinPageIsLeaf(page))
+ isempty = GinDataLeafPageIsEmpty(page);
+ else
+ isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber;
+
+ if (isempty)
{
/* we never delete the left- or rightmost branch */
if (me->leftBlkno != InvalidBlockNumber && !GinPageRightMost(page))
@@ -513,22 +503,47 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
}
else if (GinGetNPosting(itup) > 0)
{
+ int nitems;
+ ItemPointer uncompressed;
+
/*
- * if we already created a temporary page, make changes in place
+ * Vacuum posting list with proper function for compressed and
+ * uncompressed format.
*/
- ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup);
- int newN;
-
- newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned);
+ if (GinItupIsCompressed(itup))
+ uncompressed = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems);
+ else
+ {
+ uncompressed = (ItemPointer) GinGetPosting(itup);
+ nitems = GinGetNPosting(itup);
+ }
- if (GinGetNPosting(itup) != newN)
+ uncompressed = ginVacuumItemPointers(gvs, uncompressed, nitems,
+ &nitems);
+ if (uncompressed)
{
+ /*
+ * Some ItemPointers were deleted, recreate tuple.
+ */
OffsetNumber attnum;
Datum key;
GinNullCategory category;
+ GinPostingList *plist;
+ int plistsize;
+
+ if (nitems > 0)
+ {
+ plist = ginCompressPostingList(uncompressed, nitems, GinMaxItemSize, NULL);
+ plistsize = SizeOfGinPostingList(plist);
+ }
+ else
+ {
+ plist = NULL;
+ plistsize = 0;
+ }
/*
- * Some ItemPointers were deleted, recreate tuple.
+ * if we already created a temporary page, make changes in place
*/
if (tmppage == origpage)
{
@@ -538,15 +553,6 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
*/
tmppage = PageGetTempPageCopy(origpage);
- if (newN > 0)
- {
- Size pos = ((char *) GinGetPosting(itup)) - ((char *) origpage);
-
- memcpy(tmppage + pos, cleaned, sizeof(ItemPointerData) * newN);
- }
-
- pfree(cleaned);
-
/* set itup pointer to new page */
itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
}
@@ -554,7 +560,10 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
attnum = gintuple_get_attrnum(&gvs->ginstate, itup);
key = gintuple_get_key(&gvs->ginstate, itup, &category);
itup = GinFormTuple(&gvs->ginstate, attnum, key, category,
- GinGetPosting(itup), newN, true);
+ (char *) plist, plistsize,
+ nitems, true);
+ if (plist)
+ pfree(plist);
PageIndexTupleDelete(tmppage, i);
if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i)
@@ -583,6 +592,11 @@ ginbulkdelete(PG_FUNCTION_ARGS)
BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
uint32 nRoot;
+ gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+ "Gin vacuum temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
gvs.index = index;
gvs.callback = callback;
gvs.callback_state = callback_state;
@@ -683,6 +697,8 @@ ginbulkdelete(PG_FUNCTION_ARGS)
LockBuffer(buffer, GIN_EXCLUSIVE);
}
+ MemoryContextDelete(gvs.tmpCxt);
+
PG_RETURN_POINTER(gvs.result);
}