diff options
Diffstat (limited to 'src/backend/access/gin/ginvacuum.c')
-rw-r--r-- | src/backend/access/gin/ginvacuum.c | 232 |
1 files changed, 124 insertions, 108 deletions
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 4212effbe46..6bf65c32935 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -20,8 +20,9 @@ #include "postmaster/autovacuum.h" #include "storage/indexfsm.h" #include "storage/lmgr.h" +#include "utils/memutils.h" -typedef struct +typedef struct GinVacuumState { Relation index; IndexBulkDeleteResult *result; @@ -29,56 +30,58 @@ typedef struct void *callback_state; GinState ginstate; BufferAccessStrategy strategy; + MemoryContext tmpCxt; } GinVacuumState; - /* - * Vacuums a list of item pointers. The original size of the list is 'nitem', - * returns the number of items remaining afterwards. + * Vacuums an uncompressed posting list. The size of the must can be specified + * in number of items (nitems). * - * If *cleaned == NULL on entry, the original array is left unmodified; if - * any items are removed, a palloc'd copy of the result is stored in *cleaned. - * Otherwise *cleaned should point to the original array, in which case it's - * modified directly. + * If none of the items need to be removed, returns NULL. Otherwise returns + * a new palloc'd array with the remaining items. The number of remaining + * items is returned in *nremaining. */ -static int -ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, int nitem, - ItemPointerData **cleaned) +ItemPointer +ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items, + int nitem, int *nremaining) { int i, - j = 0; - - Assert(*cleaned == NULL || *cleaned == items); + remaining = 0; + ItemPointer tmpitems = NULL; /* - * just scan over ItemPointer array + * Iterate over TIDs array */ for (i = 0; i < nitem; i++) { if (gvs->callback(items + i, gvs->callback_state)) { gvs->result->tuples_removed += 1; - if (!*cleaned) + if (!tmpitems) { - *cleaned = (ItemPointerData *) palloc(sizeof(ItemPointerData) * nitem); - if (i != 0) - memcpy(*cleaned, items, sizeof(ItemPointerData) * i); + /* + * First TID to be deleted: allocate memory to hold the + * remaining items. + */ + tmpitems = palloc(sizeof(ItemPointerData) * nitem); + memcpy(tmpitems, items, sizeof(ItemPointerData) * i); } } else { gvs->result->num_index_tuples += 1; - if (i != j) - (*cleaned)[j] = items[i]; - j++; + if (tmpitems) + tmpitems[remaining] = items[i]; + remaining++; } } - return j; + *nremaining = remaining; + return tmpitems; } /* - * fills WAL record for vacuum leaf page + * Create a WAL record for vacuuming entry tree leaf page. */ static void xlogVacuumPage(Relation index, Buffer buffer) @@ -86,65 +89,64 @@ xlogVacuumPage(Relation index, Buffer buffer) Page page = BufferGetPage(buffer); XLogRecPtr recptr; XLogRecData rdata[3]; - ginxlogVacuumPage data; - char *backup; - char itups[BLCKSZ]; - uint32 len = 0; + ginxlogVacuumPage xlrec; + uint16 lower; + uint16 upper; + /* This is only used for entry tree leaf pages. */ + Assert(!GinPageIsData(page)); Assert(GinPageIsLeaf(page)); if (!RelationNeedsWAL(index)) return; - data.node = index->rd_node; - data.blkno = BufferGetBlockNumber(buffer); + xlrec.node = index->rd_node; + xlrec.blkno = BufferGetBlockNumber(buffer); + + /* Assume we can omit data between pd_lower and pd_upper */ + lower = ((PageHeader) page)->pd_lower; + upper = ((PageHeader) page)->pd_upper; + + Assert(lower < BLCKSZ); + Assert(upper < BLCKSZ); - if (GinPageIsData(page)) + if (lower >= SizeOfPageHeaderData && + upper > lower && + upper <= BLCKSZ) { - backup = GinDataPageGetData(page); - data.nitem = GinPageGetOpaque(page)->maxoff; - if (data.nitem) - len = MAXALIGN(sizeof(ItemPointerData) * data.nitem); + xlrec.hole_offset = lower; + xlrec.hole_length = upper - lower; } else { - char *ptr; - OffsetNumber i; - - ptr = backup = itups; - for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++) - { - IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); - - memcpy(ptr, itup, IndexTupleSize(itup)); - ptr += MAXALIGN(IndexTupleSize(itup)); - } - - data.nitem = PageGetMaxOffsetNumber(page); - len = ptr - backup; + /* No "hole" to compress out */ + xlrec.hole_offset = 0; + xlrec.hole_length = 0; } - rdata[0].buffer = buffer; - rdata[0].buffer_std = (GinPageIsData(page)) ? FALSE : TRUE; - rdata[0].len = 0; - rdata[0].data = NULL; - rdata[0].next = rdata + 1; + rdata[0].data = (char *) &xlrec; + rdata[0].len = sizeof(ginxlogVacuumPage); + rdata[0].buffer = InvalidBuffer; + rdata[0].next = &rdata[1]; - rdata[1].buffer = InvalidBuffer; - rdata[1].len = sizeof(ginxlogVacuumPage); - rdata[1].data = (char *) &data; - - if (len == 0) + if (xlrec.hole_length == 0) { + rdata[1].data = (char *) page; + rdata[1].len = BLCKSZ; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; } else { - rdata[1].next = rdata + 2; - + /* must skip the hole */ + rdata[1].data = (char *) page; + rdata[1].len = xlrec.hole_offset; + rdata[1].buffer = InvalidBuffer; + rdata[1].next = &rdata[2]; + + rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length); + rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length); rdata[2].buffer = InvalidBuffer; - rdata[2].len = len; - rdata[2].data = backup; rdata[2].next = NULL; } @@ -158,6 +160,7 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer buffer; Page page; bool hasVoidPage = FALSE; + MemoryContext oldCxt; buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); @@ -169,7 +172,6 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, * again). New scan can't start but previously started ones work * concurrently. */ - if (isRoot) LockBufferForCleanup(buffer); else @@ -179,32 +181,14 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, if (GinPageIsLeaf(page)) { - OffsetNumber newMaxOff, - oldMaxOff = GinPageGetOpaque(page)->maxoff; - ItemPointerData *cleaned = NULL; - - newMaxOff = ginVacuumPostingList(gvs, - (ItemPointer) GinDataPageGetData(page), oldMaxOff, &cleaned); - - /* saves changes about deleted tuple ... */ - if (oldMaxOff != newMaxOff) - { - START_CRIT_SECTION(); - - if (newMaxOff > 0) - memcpy(GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff); - pfree(cleaned); - GinPageGetOpaque(page)->maxoff = newMaxOff; + oldCxt = MemoryContextSwitchTo(gvs->tmpCxt); + ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs); + MemoryContextSwitchTo(oldCxt); + MemoryContextReset(gvs->tmpCxt); - MarkBufferDirty(buffer); - xlogVacuumPage(gvs->index, buffer); - - END_CRIT_SECTION(); - - /* if root is a leaf page, we don't desire further processing */ - if (!isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) - hasVoidPage = TRUE; - } + /* if root is a leaf page, we don't desire further processing */ + if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page)) + hasVoidPage = TRUE; } else { @@ -224,7 +208,7 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, } /* - * if we have root and theres void pages in tree, then we don't release + * if we have root and there are empty pages in tree, then we don't release * lock to go further processing and guarantee that tree is unused */ if (!(isRoot && hasVoidPage)) @@ -391,6 +375,7 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer buffer; Page page; bool meDelete = FALSE; + bool isempty; if (isRoot) { @@ -429,7 +414,12 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, } } - if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) + if (GinPageIsLeaf(page)) + isempty = GinDataLeafPageIsEmpty(page); + else + isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber; + + if (isempty) { /* we never delete the left- or rightmost branch */ if (me->leftBlkno != InvalidBlockNumber && !GinPageRightMost(page)) @@ -513,22 +503,47 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3 } else if (GinGetNPosting(itup) > 0) { + int nitems; + ItemPointer uncompressed; + /* - * if we already created a temporary page, make changes in place + * Vacuum posting list with proper function for compressed and + * uncompressed format. */ - ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup); - int newN; - - newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned); + if (GinItupIsCompressed(itup)) + uncompressed = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems); + else + { + uncompressed = (ItemPointer) GinGetPosting(itup); + nitems = GinGetNPosting(itup); + } - if (GinGetNPosting(itup) != newN) + uncompressed = ginVacuumItemPointers(gvs, uncompressed, nitems, + &nitems); + if (uncompressed) { + /* + * Some ItemPointers were deleted, recreate tuple. + */ OffsetNumber attnum; Datum key; GinNullCategory category; + GinPostingList *plist; + int plistsize; + + if (nitems > 0) + { + plist = ginCompressPostingList(uncompressed, nitems, GinMaxItemSize, NULL); + plistsize = SizeOfGinPostingList(plist); + } + else + { + plist = NULL; + plistsize = 0; + } /* - * Some ItemPointers were deleted, recreate tuple. + * if we already created a temporary page, make changes in place */ if (tmppage == origpage) { @@ -538,15 +553,6 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3 */ tmppage = PageGetTempPageCopy(origpage); - if (newN > 0) - { - Size pos = ((char *) GinGetPosting(itup)) - ((char *) origpage); - - memcpy(tmppage + pos, cleaned, sizeof(ItemPointerData) * newN); - } - - pfree(cleaned); - /* set itup pointer to new page */ itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } @@ -554,7 +560,10 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3 attnum = gintuple_get_attrnum(&gvs->ginstate, itup); key = gintuple_get_key(&gvs->ginstate, itup, &category); itup = GinFormTuple(&gvs->ginstate, attnum, key, category, - GinGetPosting(itup), newN, true); + (char *) plist, plistsize, + nitems, true); + if (plist) + pfree(plist); PageIndexTupleDelete(tmppage, i); if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i) @@ -583,6 +592,11 @@ ginbulkdelete(PG_FUNCTION_ARGS) BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; + gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext, + "Gin vacuum temporary context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); gvs.index = index; gvs.callback = callback; gvs.callback_state = callback_state; @@ -683,6 +697,8 @@ ginbulkdelete(PG_FUNCTION_ARGS) LockBuffer(buffer, GIN_EXCLUSIVE); } + MemoryContextDelete(gvs.tmpCxt); + PG_RETURN_POINTER(gvs.result); } |