diff options
Diffstat (limited to 'src/backend/access/gin/gindatapage.c')
-rw-r--r-- | src/backend/access/gin/gindatapage.c | 285 |
1 files changed, 183 insertions, 102 deletions
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index ec8c94bcbd1..feac59d9e0d 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -18,7 +18,6 @@ #include "access/xloginsert.h" #include "lib/ilist.h" #include "miscadmin.h" -#include "utils/memutils.h" #include "utils/rel.h" /* @@ -57,6 +56,13 @@ typedef struct int rsize; /* total size on right page */ bool oldformat; /* page is in pre-9.4 format on disk */ + + /* + * If we need WAL data representing the reconstructed leaf page, it's + * stored here by computeLeafRecompressWALData. + */ + char *walinfo; /* buffer start */ + int walinfolen; /* and length */ } disassembledLeaf; typedef struct @@ -105,10 +111,9 @@ static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining); static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems); -static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf); +static void computeLeafRecompressWALData(disassembledLeaf *leaf); static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf); -static void dataPlaceToPageLeafSplit(Buffer buf, - disassembledLeaf *leaf, +static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf, ItemPointerData lbound, ItemPointerData rbound, Page lpage, Page rpage); @@ -423,11 +428,22 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset) } /* - * Places keys to leaf data page and fills WAL record. + * Prepare to insert data on a leaf data page. + * + * If it will fit, return GPTP_INSERT after doing whatever setup is needed + * before we enter the insertion critical section. *ptp_workspace can be + * set to pass information along to the execPlaceToPage function. + * + * If it won't fit, perform a page split and return two temporary page + * images into *newlpage and *newrpage, with result GPTP_SPLIT. + * + * In neither case should the given page buffer be modified here. */ static GinPlaceToPageRC -dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, - void *insertdata, Page *newlpage, Page *newrpage) +dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertdata, + void **ptp_workspace, + Page *newlpage, Page *newrpage) { GinBtreeDataLeafInsertData *items = insertdata; ItemPointer newItems = &items->items[items->curitem]; @@ -440,15 +456,11 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, bool append; int segsize; Size freespace; - MemoryContext tmpCxt; - MemoryContext oldCxt; disassembledLeaf *leaf; leafSegmentInfo *lastleftinfo; ItemPointerData maxOldItem; ItemPointerData remaining; - Assert(GinPageIsData(page)); - rbound = *GinDataPageGetRightBound(page); /* @@ -472,18 +484,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, maxitems = i; } - /* - * The following operations do quite a lot of small memory allocations, - * create a temporary memory context so that we don't need to keep track - * of them individually. - */ - tmpCxt = AllocSetContextCreate(CurrentMemoryContext, - "Gin split temporary context", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - oldCxt = MemoryContextSwitchTo(tmpCxt); - + /* Disassemble the data on the page */ leaf = disassembleLeaf(page); /* @@ -548,16 +549,13 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment); } - /* Add the new items to the segments */ + /* Add the new items to the segment list */ if (!addItemsToLeaf(leaf, newItems, maxitems)) { /* all items were duplicates, we have nothing to do */ items->curitem += maxitems; - MemoryContextSwitchTo(oldCxt); - MemoryContextDelete(tmpCxt); - - return UNMODIFIED; + return GPTP_NO_WORK; } /* @@ -590,22 +588,17 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, if (!needsplit) { /* - * Great, all the items fit on a single page. Construct a WAL record - * describing the changes we made, and write the segments back to the - * page. - * - * Once we start modifying the page, there's no turning back. The - * caller is responsible for calling END_CRIT_SECTION() after writing - * the WAL record. + * Great, all the items fit on a single page. If needed, prepare data + * for a WAL record describing the changes we'll make. */ - MemoryContextSwitchTo(oldCxt); if (RelationNeedsWAL(btree->index)) - { - XLogBeginInsert(); - registerLeafRecompressWALData(buf, leaf); - } - START_CRIT_SECTION(); - dataPlaceToPageLeafRecompress(buf, leaf); + computeLeafRecompressWALData(leaf); + + /* + * We're ready to enter the critical section, but + * dataExecPlaceToPageLeaf will need access to the "leaf" data. + */ + *ptp_workspace = leaf; if (append) elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)", @@ -619,7 +612,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, else { /* - * Had to split. + * Have to split. * * leafRepackItems already divided the segments between the left and * the right page. It filled the left page as full as possible, and @@ -631,7 +624,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, * until they're balanced. * * As a further heuristic, when appending items to the end of the - * page, try make the left page 75% full, one the assumption that + * page, try to make the left page 75% full, on the assumption that * subsequent insertions will probably also go to the end. This packs * the index somewhat tighter when appending to a table, which is very * common. @@ -680,10 +673,13 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, &lastleftinfo->nitems); lbound = lastleftinfo->items[lastleftinfo->nitems - 1]; - *newlpage = MemoryContextAlloc(oldCxt, BLCKSZ); - *newrpage = MemoryContextAlloc(oldCxt, BLCKSZ); + /* + * Now allocate a couple of temporary page images, and fill them. + */ + *newlpage = palloc(BLCKSZ); + *newrpage = palloc(BLCKSZ); - dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound, + dataPlaceToPageLeafSplit(leaf, lbound, rbound, *newlpage, *newrpage); Assert(GinPageRightMost(page) || @@ -700,12 +696,31 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, items->nitem - items->curitem - maxitems); } - MemoryContextSwitchTo(oldCxt); - MemoryContextDelete(tmpCxt); - items->curitem += maxitems; - return needsplit ? SPLIT : INSERTED; + return needsplit ? GPTP_SPLIT : GPTP_INSERT; +} + +/* + * Perform data insertion after beginPlaceToPage has decided it will fit. + * + * This is invoked within a critical section, and XLOG record creation (if + * needed) is already started. The target buffer is registered in slot 0. + */ +static void +dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertdata, void *ptp_workspace) +{ + disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace; + + /* Apply changes to page */ + dataPlaceToPageLeafRecompress(buf, leaf); + + /* If needed, register WAL data built by computeLeafRecompressWALData */ + if (RelationNeedsWAL(btree->index)) + { + XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen); + } } /* @@ -816,11 +831,11 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs) } if (RelationNeedsWAL(indexrel)) - { - XLogBeginInsert(); - registerLeafRecompressWALData(buffer, leaf); - } + computeLeafRecompressWALData(leaf); + + /* Apply changes to page */ START_CRIT_SECTION(); + dataPlaceToPageLeafRecompress(buffer, leaf); MarkBufferDirty(buffer); @@ -829,6 +844,9 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs) { XLogRecPtr recptr; + XLogBeginInsert(); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE); PageSetLSN(page, recptr); } @@ -839,10 +857,11 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs) /* * Construct a ginxlogRecompressDataLeaf record representing the changes - * in *leaf. + * in *leaf. (Because this requires a palloc, we have to do it before + * we enter the critical section that actually updates the page.) */ static void -registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf) +computeLeafRecompressWALData(disassembledLeaf *leaf) { int nmodified = 0; char *walbufbegin; @@ -933,18 +952,15 @@ registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf) segno++; } - - XLogRegisterBuffer(0, buf, REGBUF_STANDARD); - XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin); - + /* Pass back the constructed info via *leaf */ + leaf->walinfo = walbufbegin; + leaf->walinfolen = walbufend - walbufbegin; } /* * Assemble a disassembled posting tree leaf page back to a buffer. * - * *prdata is filled with WAL information about this operation. The caller - * is responsible for inserting to the WAL, along with any other information - * about the operation that triggered this recompression. + * This just updates the target buffer; WAL stuff is caller's responsibility. * * NOTE: The segment pointers must not point directly to the same buffer, * except for segments that have not been modified and whose preceding @@ -1003,11 +1019,11 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf) * segments to two pages instead of one. * * This is different from the non-split cases in that this does not modify - * the original page directly, but to temporary in-memory copies of the new - * left and right pages. + * the original page directly, but writes to temporary in-memory copies of + * the new left and right pages. */ static void -dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf, +dataPlaceToPageLeafSplit(disassembledLeaf *leaf, ItemPointerData lbound, ItemPointerData rbound, Page lpage, Page rpage) { @@ -1076,39 +1092,55 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf, } /* - * Place a PostingItem to page, and fill a WAL record. + * Prepare to insert data on an internal data page. + * + * If it will fit, return GPTP_INSERT after doing whatever setup is needed + * before we enter the insertion critical section. *ptp_workspace can be + * set to pass information along to the execPlaceToPage function. * - * If the item doesn't fit, returns false without modifying the page. + * If it won't fit, perform a page split and return two temporary page + * images into *newlpage and *newrpage, with result GPTP_SPLIT. * - * In addition to inserting the given item, the downlink of the existing item - * at 'off' is updated to point to 'updateblkno'. + * In neither case should the given page buffer be modified here. * - * On INSERTED, registers the buffer as buffer ID 0, with data. - * On SPLIT, returns rdata that represents the split pages in *prdata. + * Note: on insertion to an internal node, in addition to inserting the given + * item, the downlink of the existing item at stack->off will be updated to + * point to updateblkno. */ static GinPlaceToPageRC -dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, - void *insertdata, BlockNumber updateblkno, - Page *newlpage, Page *newrpage) +dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertdata, BlockNumber updateblkno, + void **ptp_workspace, + Page *newlpage, Page *newrpage) { Page page = BufferGetPage(buf); - OffsetNumber off = stack->off; - PostingItem *pitem; - /* this must be static so it can be returned to caller */ - static ginxlogInsertDataInternal data; - - /* split if we have to */ + /* If it doesn't fit, deal with split case */ if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem)) { dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno, newlpage, newrpage); - return SPLIT; + return GPTP_SPLIT; } - Assert(GinPageIsData(page)); + /* Else, we're ready to proceed with insertion */ + return GPTP_INSERT; +} - START_CRIT_SECTION(); +/* + * Perform data insertion after beginPlaceToPage has decided it will fit. + * + * This is invoked within a critical section, and XLOG record creation (if + * needed) is already started. The target buffer is registered in slot 0. + */ +static void +dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertdata, BlockNumber updateblkno, + void *ptp_workspace) +{ + Page page = BufferGetPage(buf); + OffsetNumber off = stack->off; + PostingItem *pitem; /* Update existing downlink to point to next page (on internal page) */ pitem = GinDataPageGetPostingItem(page, off); @@ -1120,43 +1152,90 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, if (RelationNeedsWAL(btree->index)) { + /* + * This must be static, because it has to survive until XLogInsert, + * and we can't palloc here. Ugly, but the XLogInsert infrastructure + * isn't reentrant anyway. + */ + static ginxlogInsertDataInternal data; + data.offset = off; data.newitem = *pitem; - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) &data, sizeof(ginxlogInsertDataInternal)); } - - return INSERTED; } /* - * Places an item (or items) to a posting tree. Calls relevant function of - * internal of leaf page because they are handled very differently. + * Prepare to insert data on a posting-tree data page. + * + * If it will fit, return GPTP_INSERT after doing whatever setup is needed + * before we enter the insertion critical section. *ptp_workspace can be + * set to pass information along to the execPlaceToPage function. + * + * If it won't fit, perform a page split and return two temporary page + * images into *newlpage and *newrpage, with result GPTP_SPLIT. + * + * In neither case should the given page buffer be modified here. + * + * Note: on insertion to an internal node, in addition to inserting the given + * item, the downlink of the existing item at stack->off will be updated to + * point to updateblkno. + * + * Calls relevant function for internal or leaf page because they are handled + * very differently. */ static GinPlaceToPageRC -dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, - void *insertdata, BlockNumber updateblkno, - Page *newlpage, Page *newrpage) +dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertdata, BlockNumber updateblkno, + void **ptp_workspace, + Page *newlpage, Page *newrpage) { Page page = BufferGetPage(buf); Assert(GinPageIsData(page)); if (GinPageIsLeaf(page)) - return dataPlaceToPageLeaf(btree, buf, stack, insertdata, - newlpage, newrpage); + return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata, + ptp_workspace, + newlpage, newrpage); + else + return dataBeginPlaceToPageInternal(btree, buf, stack, + insertdata, updateblkno, + ptp_workspace, + newlpage, newrpage); +} + +/* + * Perform data insertion after beginPlaceToPage has decided it will fit. + * + * This is invoked within a critical section, and XLOG record creation (if + * needed) is already started. The target buffer is registered in slot 0. + * + * Calls relevant function for internal or leaf page because they are handled + * very differently. + */ +static void +dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertdata, BlockNumber updateblkno, + void *ptp_workspace) +{ + Page page = BufferGetPage(buf); + + if (GinPageIsLeaf(page)) + dataExecPlaceToPageLeaf(btree, buf, stack, insertdata, + ptp_workspace); else - return dataPlaceToPageInternal(btree, buf, stack, - insertdata, updateblkno, - newlpage, newrpage); + dataExecPlaceToPageInternal(btree, buf, stack, insertdata, + updateblkno, ptp_workspace); } /* - * Split page and fill WAL record. Returns a new temp buffer filled with data - * that should go to the left page. The original buffer is left untouched. + * Split internal page and insert new data. + * + * Returns new temp pages to *newlpage and *newrpage. + * The original buffer is left untouched. */ static void dataSplitPageInternal(GinBtree btree, Buffer origbuf, @@ -1231,6 +1310,7 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf, /* set up right bound for right page */ *GinDataPageGetRightBound(rpage) = oldbound; + /* return temp pages to caller */ *newlpage = lpage; *newrpage = rpage; } @@ -1789,7 +1869,8 @@ ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno) btree->isMoveRight = dataIsMoveRight; btree->findItem = NULL; btree->findChildPtr = dataFindChildPtr; - btree->placeToPage = dataPlaceToPage; + btree->beginPlaceToPage = dataBeginPlaceToPage; + btree->execPlaceToPage = dataExecPlaceToPage; btree->fillRoot = ginDataFillRoot; btree->prepareDownlink = dataPrepareDownlink; |