diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2009-03-24 20:17:18 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2009-03-24 20:17:18 +0000 |
commit | ff301d6e690bb5581502ea3d8591a1600fd87acc (patch) | |
tree | 9fd8b2fa00cf35f8b2e66b0960e7e9ca90dfaa66 /src/backend/access | |
parent | 9987f66001ef7f59dd8f8c92295732dba5507c4f (diff) | |
download | postgresql-ff301d6e690bb5581502ea3d8591a1600fd87acc.tar.gz postgresql-ff301d6e690bb5581502ea3d8591a1600fd87acc.zip |
Implement "fastupdate" support for GIN indexes, in which we try to accumulate
multiple index entries in a holding area before adding them to the main index
structure. This helps because bulk insert is (usually) significantly faster
than retail insert for GIN.
This patch also removes GIN support for amgettuple-style index scans. The
API defined for amgettuple is difficult to support with fastupdate, and
the previously committed partial-match feature didn't really work with
it either. We might eventually figure a way to put back amgettuple
support, but it won't happen for 8.4.
catversion bumped because of change in GIN's pg_am entry, and because
the format of GIN indexes changed on-disk (there's a metapage now,
and possibly a pending list).
Teodor Sigaev
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/common/reloptions.c | 10 | ||||
-rw-r--r-- | src/backend/access/gin/Makefile | 4 | ||||
-rw-r--r-- | src/backend/access/gin/ginbulk.c | 4 | ||||
-rw-r--r-- | src/backend/access/gin/gindatapage.c | 27 | ||||
-rw-r--r-- | src/backend/access/gin/ginfast.c | 866 | ||||
-rw-r--r-- | src/backend/access/gin/ginget.c | 481 | ||||
-rw-r--r-- | src/backend/access/gin/gininsert.c | 58 | ||||
-rw-r--r-- | src/backend/access/gin/ginutil.c | 48 | ||||
-rw-r--r-- | src/backend/access/gin/ginvacuum.c | 46 | ||||
-rw-r--r-- | src/backend/access/gin/ginxlog.c | 215 | ||||
-rw-r--r-- | src/backend/access/gist/gistvacuum.c | 6 | ||||
-rw-r--r-- | src/backend/access/hash/hash.c | 3 | ||||
-rw-r--r-- | src/backend/access/index/indexam.c | 5 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 6 |
14 files changed, 1671 insertions, 108 deletions
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index b926689c5cb..880f2db5266 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.23 2009/03/23 16:36:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.24 2009/03/24 20:17:09 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -56,6 +56,14 @@ static relopt_bool boolRelOpts[] = }, true }, + { + { + "fastupdate", + "Enables \"fast update\" feature for this GIN index", + RELOPT_KIND_GIN + }, + true + }, /* list terminator */ { { NULL } } }; diff --git a/src/backend/access/gin/Makefile b/src/backend/access/gin/Makefile index 08946c88a73..23b75fc1d80 100644 --- a/src/backend/access/gin/Makefile +++ b/src/backend/access/gin/Makefile @@ -4,7 +4,7 @@ # Makefile for access/gin # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.3 2008/02/19 10:30:06 petere Exp $ +# $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.4 2009/03/24 20:17:10 tgl Exp $ # #------------------------------------------------------------------------- @@ -14,6 +14,6 @@ include $(top_builddir)/src/Makefile.global OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \ ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \ - ginbulk.o + ginbulk.o ginfast.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c index 136f80d9977..a7258619aee 100644 --- a/src/backend/access/gin/ginbulk.c +++ b/src/backend/access/gin/ginbulk.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.14 2009/01/01 17:23:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.15 2009/03/24 20:17:10 tgl Exp $ *------------------------------------------------------------------------- */ @@ -197,6 +197,8 @@ ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber att if (nentry <= 0) return; + Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber); + i = nentry - 1; for (; i > 0; i >>= 1) nbit++; diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index d0e426c6560..a872d44880c 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.13 2009/01/01 17:23:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.14 2009/03/24 20:17:10 tgl Exp $ *------------------------------------------------------------------------- */ @@ -43,8 +43,16 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint while (aptr - a < na && bptr - b < nb) { - if (compareItemPointers(aptr, bptr) > 0) + int cmp = compareItemPointers(aptr, bptr); + + if (cmp > 0) + *dptr++ = *bptr++; + else if (cmp == 0) + { + /* we want only one copy of the identical items */ *dptr++ = *bptr++; + aptr++; + } else *dptr++ = *aptr++; } @@ -630,11 +638,16 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem) gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack); if (gdi->btree.findItem(&(gdi->btree), gdi->stack)) - elog(ERROR, "item pointer (%u,%d) already exists", - ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem), - ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem)); - - ginInsertValue(&(gdi->btree), gdi->stack); + { + /* + * gdi->btree.items[gdi->btree.curitem] already exists in index + */ + gdi->btree.curitem++; + LockBuffer(gdi->stack->buffer, GIN_UNLOCK); + freeGinBtreeStack(gdi->stack); + } + else + ginInsertValue(&(gdi->btree), gdi->stack); gdi->stack = NULL; } diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c new file mode 100644 index 00000000000..d8624237ec1 --- /dev/null +++ b/src/backend/access/gin/ginfast.c @@ -0,0 +1,866 @@ +/*------------------------------------------------------------------------- + * + * ginfast.c + * Fast insert routines for the Postgres inverted index access method. + * Pending entries are stored in linear list of pages. Later on + * (typically during VACUUM), ginInsertCleanup() will be invoked to + * transfer pending entries into the regular index structure. This + * wins because bulk insertion is much more efficient than retail. + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.1 2009/03/24 20:17:10 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/genam.h" +#include "access/gin.h" +#include "access/tuptoaster.h" +#include "catalog/index.h" +#include "commands/vacuum.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" + + +#define GIN_PAGE_FREESIZE \ + ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) ) + +typedef struct DatumArray +{ + Datum *values; /* expansible array */ + int32 nvalues; /* current number of valid entries */ + int32 maxvalues; /* allocated size of array */ +} DatumArray; + + +/* + * Build a pending-list page from the given array of tuples, and write it out. + */ +static int32 +writeListPage(Relation index, Buffer buffer, + IndexTuple *tuples, int32 ntuples, BlockNumber rightlink) +{ + Page page = BufferGetPage(buffer); + int i, freesize, size=0; + OffsetNumber l, off; + char *workspace; + char *ptr; + + /* workspace could be a local array; we use palloc for alignment */ + workspace = palloc(BLCKSZ); + + START_CRIT_SECTION(); + + GinInitBuffer(buffer, GIN_LIST); + + off = FirstOffsetNumber; + ptr = workspace; + + for(i=0; i<ntuples; i++) + { + int this_size = IndexTupleSize(tuples[i]); + + memcpy(ptr, tuples[i], this_size); + ptr += this_size; + size += this_size; + + l = PageAddItem(page, (Item)tuples[i], this_size, off, false, false); + + if (l == InvalidOffsetNumber) + elog(ERROR, "failed to add item to index page in \"%s\"", + RelationGetRelationName(index)); + + off++; + } + + Assert(size <= BLCKSZ); /* else we overran workspace */ + + GinPageGetOpaque(page)->rightlink = rightlink; + + /* + * tail page may contain only the whole row(s) or final + * part of row placed on previous pages + */ + if ( rightlink == InvalidBlockNumber ) + { + GinPageSetFullRow(page); + GinPageGetOpaque(page)->maxoff = 1; + } + else + { + GinPageGetOpaque(page)->maxoff = 0; + } + + freesize = PageGetFreeSpace(page); + + MarkBufferDirty(buffer); + + if (!index->rd_istemp) + { + XLogRecData rdata[2]; + ginxlogInsertListPage data; + XLogRecPtr recptr; + + rdata[0].buffer = buffer; + rdata[0].buffer_std = true; + rdata[0].data = (char*)&data; + rdata[0].len = sizeof(ginxlogInsertListPage); + rdata[0].next = rdata+1; + + rdata[1].buffer = InvalidBuffer; + rdata[1].data = workspace; + rdata[1].len = size; + rdata[1].next = NULL; + + data.blkno = BufferGetBlockNumber(buffer); + data.rightlink = rightlink; + data.ntuples = ntuples; + + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } + + UnlockReleaseBuffer(buffer); + + END_CRIT_SECTION(); + + pfree(workspace); + + return freesize; +} + +static void +makeSublist(Relation index, IndexTuple *tuples, int32 ntuples, + GinMetaPageData *res) +{ + Buffer curBuffer = InvalidBuffer; + Buffer prevBuffer = InvalidBuffer; + int i, size = 0, tupsize; + int startTuple = 0; + + Assert(ntuples > 0); + + /* + * Split tuples into pages + */ + for(i=0;i<ntuples;i++) + { + if ( curBuffer == InvalidBuffer ) + { + curBuffer = GinNewBuffer(index); + + if ( prevBuffer != InvalidBuffer ) + { + res->nPendingPages++; + writeListPage(index, prevBuffer, + tuples+startTuple, i-startTuple, + BufferGetBlockNumber(curBuffer)); + } + else + { + res->head = BufferGetBlockNumber(curBuffer); + } + + prevBuffer = curBuffer; + startTuple = i; + size = 0; + } + + tupsize = MAXALIGN(IndexTupleSize(tuples[i])) + sizeof(ItemIdData); + + if ( size + tupsize >= GinListPageSize ) + { + /* won't fit, force a new page and reprocess */ + i--; + curBuffer = InvalidBuffer; + } + else + { + size += tupsize; + } + } + + /* + * Write last page + */ + res->tail = BufferGetBlockNumber(curBuffer); + res->tailFreeSize = writeListPage(index, curBuffer, + tuples+startTuple, ntuples-startTuple, + InvalidBlockNumber); + res->nPendingPages++; + /* that was only one heap tuple */ + res->nPendingHeapTuples = 1; +} + +/* + * Inserts collected values during normal insertion. Function guarantees + * that all values of heap will be stored sequentially, preserving order + */ +void +ginHeapTupleFastInsert(Relation index, GinState *ginstate, + GinTupleCollector *collector) +{ + Buffer metabuffer; + Page metapage; + GinMetaPageData *metadata = NULL; + XLogRecData rdata[2]; + Buffer buffer = InvalidBuffer; + Page page = NULL; + ginxlogUpdateMeta data; + bool separateList = false; + bool needCleanup = false; + + if ( collector->ntuples == 0 ) + return; + + data.node = index->rd_node; + data.ntuples = 0; + data.newRightlink = data.prevTail = InvalidBlockNumber; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) &data; + rdata[0].len = sizeof(ginxlogUpdateMeta); + rdata[0].next = NULL; + + metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + metapage = BufferGetPage(metabuffer); + + if ( collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GIN_PAGE_FREESIZE ) + { + /* + * Total size is greater than one page => make sublist + */ + separateList = true; + } + else + { + LockBuffer(metabuffer, GIN_EXCLUSIVE); + metadata = GinPageGetMeta(metapage); + + if ( metadata->head == InvalidBlockNumber || + collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize ) + { + /* + * Pending list is empty or total size is greater than freespace + * on tail page => make sublist + * + * We unlock metabuffer to keep high concurrency + */ + separateList = true; + LockBuffer(metabuffer, GIN_UNLOCK); + } + } + + if ( separateList ) + { + GinMetaPageData sublist; + + /* + * We should make sublist separately and append it to the tail + */ + memset( &sublist, 0, sizeof(GinMetaPageData) ); + + makeSublist(index, collector->tuples, collector->ntuples, &sublist); + + /* + * metapage was unlocked, see above + */ + LockBuffer(metabuffer, GIN_EXCLUSIVE); + metadata = GinPageGetMeta(metapage); + + if ( metadata->head == InvalidBlockNumber ) + { + /* + * Sublist becomes main list + */ + START_CRIT_SECTION(); + memcpy(metadata, &sublist, sizeof(GinMetaPageData) ); + memcpy(&data.metadata, &sublist, sizeof(GinMetaPageData) ); + } + else + { + /* + * merge lists + */ + + data.prevTail = metadata->tail; + buffer = ReadBuffer(index, metadata->tail); + LockBuffer(buffer, GIN_EXCLUSIVE); + page = BufferGetPage(buffer); + Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber); + + START_CRIT_SECTION(); + + GinPageGetOpaque(page)->rightlink = sublist.head; + metadata->tail = sublist.tail; + metadata->tailFreeSize = sublist.tailFreeSize; + + metadata->nPendingPages += sublist.nPendingPages; + metadata->nPendingHeapTuples += sublist.nPendingHeapTuples; + + memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) ); + data.newRightlink = sublist.head; + + MarkBufferDirty(buffer); + } + } + else + { + /* + * Insert into tail page, metapage is already locked + */ + + OffsetNumber l, off; + int i, tupsize; + char *ptr; + + buffer = ReadBuffer(index, metadata->tail); + LockBuffer(buffer, GIN_EXCLUSIVE); + page = BufferGetPage(buffer); + off = (PageIsEmpty(page)) ? FirstOffsetNumber : + OffsetNumberNext(PageGetMaxOffsetNumber(page)); + + rdata[0].next = rdata + 1; + + rdata[1].buffer = buffer; + rdata[1].buffer_std = true; + ptr = rdata[1].data = (char *) palloc( collector->sumsize ); + rdata[1].len = collector->sumsize; + rdata[1].next = NULL; + + data.ntuples = collector->ntuples; + + START_CRIT_SECTION(); + + /* + * Increase counter of heap tuples + */ + Assert( GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples ); + GinPageGetOpaque(page)->maxoff++; + metadata->nPendingHeapTuples++; + + for(i=0; i<collector->ntuples; i++) + { + tupsize = IndexTupleSize(collector->tuples[i]); + l = PageAddItem(page, (Item)collector->tuples[i], tupsize, off, false, false); + + if (l == InvalidOffsetNumber) + elog(ERROR, "failed to add item to index page in \"%s\"", + RelationGetRelationName(index)); + + memcpy(ptr, collector->tuples[i], tupsize); + ptr+=tupsize; + + off++; + } + + metadata->tailFreeSize -= collector->sumsize + collector->ntuples * sizeof(ItemIdData); + memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) ); + MarkBufferDirty(buffer); + } + + /* + * Make real write + */ + + MarkBufferDirty(metabuffer); + if ( !index->rd_istemp ) + { + XLogRecPtr recptr; + + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata); + PageSetLSN(metapage, recptr); + PageSetTLI(metapage, ThisTimeLineID); + + if ( buffer != InvalidBuffer ) + { + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } + } + + if (buffer != InvalidBuffer) + UnlockReleaseBuffer(buffer); + + /* + * Force pending list cleanup when it becomes too long. + * And, ginInsertCleanup could take significant amount of + * time, so we prefer to call it when it can do all the work in a + * single collection cycle. In non-vacuum mode, it shouldn't + * require maintenance_work_mem, so fire it while pending list is + * still small enough to fit into work_mem. + * + * ginInsertCleanup() should not be called inside our CRIT_SECTION. + */ + if ( metadata->nPendingPages * GIN_PAGE_FREESIZE > work_mem * 1024L ) + needCleanup = true; + + UnlockReleaseBuffer(metabuffer); + + END_CRIT_SECTION(); + + if ( needCleanup ) + ginInsertCleanup(index, ginstate, false, NULL); +} + +/* + * Collect values from one tuples to be indexed. All values for + * one tuples should be written at once - to guarantee consistent state + */ +uint32 +ginHeapTupleFastCollect(Relation index, GinState *ginstate, + GinTupleCollector *collector, + OffsetNumber attnum, Datum value, ItemPointer item) +{ + Datum *entries; + int32 i, + nentries; + + entries = extractEntriesSU(ginstate, attnum, value, &nentries); + + if (nentries == 0) + /* nothing to insert */ + return 0; + + /* + * Allocate/reallocate memory for storing collected tuples + */ + if ( collector->tuples == NULL ) + { + collector->lentuples = nentries * index->rd_att->natts; + collector->tuples = (IndexTuple*)palloc(sizeof(IndexTuple) * collector->lentuples); + } + + while ( collector->ntuples + nentries > collector->lentuples ) + { + collector->lentuples *= 2; + collector->tuples = (IndexTuple*)repalloc( collector->tuples, + sizeof(IndexTuple) * collector->lentuples); + } + + /* + * Creates tuple's array + */ + for (i = 0; i < nentries; i++) + { + int32 tupsize; + + collector->tuples[collector->ntuples + i] = GinFormTuple(ginstate, attnum, entries[i], NULL, 0); + collector->tuples[collector->ntuples + i]->t_tid = *item; + tupsize = IndexTupleSize(collector->tuples[collector->ntuples + i]); + + if ( tupsize > TOAST_INDEX_TARGET || tupsize >= GinMaxItemSize) + elog(ERROR, "huge tuple"); + + collector->sumsize += tupsize; + } + + collector->ntuples += nentries; + + return nentries; +} + +/* + * Deletes pending list pages up to (not including) newHead page. + * If newHead == InvalidBlockNumber then function drops the whole list. + * + * metapage is pinned and exclusive-locked throughout this function. + * + * Returns true if another cleanup process is running concurrently + * (if so, we can just abandon our own efforts) + */ +static bool +shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, + IndexBulkDeleteResult *stats) +{ + Page metapage; + GinMetaPageData *metadata; + BlockNumber blknoToDelete; + + metapage = BufferGetPage(metabuffer); + metadata = GinPageGetMeta(metapage); + blknoToDelete = metadata->head; + + do + { + Page page; + int i; + int64 nDeletedHeapTuples = 0; + ginxlogDeleteListPages data; + XLogRecData rdata[1]; + Buffer buffers[GIN_NDELETE_AT_ONCE]; + + data.node = index->rd_node; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) &data; + rdata[0].len = sizeof(ginxlogDeleteListPages); + rdata[0].next = NULL; + + data.ndeleted = 0; + while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead) + { + data.toDelete[ data.ndeleted ] = blknoToDelete; + buffers[ data.ndeleted ] = ReadBuffer(index, blknoToDelete); + LockBuffer( buffers[ data.ndeleted ], GIN_EXCLUSIVE ); + page = BufferGetPage( buffers[ data.ndeleted ] ); + + data.ndeleted++; + + if ( GinPageIsDeleted(page) ) + { + /* concurrent cleanup process is detected */ + for(i=0;i<data.ndeleted;i++) + UnlockReleaseBuffer( buffers[i] ); + + return true; + } + + nDeletedHeapTuples += GinPageGetOpaque(page)->maxoff; + blknoToDelete = GinPageGetOpaque( page )->rightlink; + } + + if (stats) + stats->pages_deleted += data.ndeleted; + + START_CRIT_SECTION(); + + metadata->head = blknoToDelete; + + Assert( metadata->nPendingPages >= data.ndeleted ); + metadata->nPendingPages -= data.ndeleted; + Assert( metadata->nPendingHeapTuples >= nDeletedHeapTuples ); + metadata->nPendingHeapTuples -= nDeletedHeapTuples; + + if ( blknoToDelete == InvalidBlockNumber ) + { + metadata->tail = InvalidBlockNumber; + metadata->tailFreeSize = 0; + metadata->nPendingPages = 0; + metadata->nPendingHeapTuples = 0; + } + memcpy( &data.metadata, metadata, sizeof(GinMetaPageData)); + + MarkBufferDirty( metabuffer ); + + for(i=0; i<data.ndeleted; i++) + { + page = BufferGetPage( buffers[ i ] ); + GinPageGetOpaque( page )->flags = GIN_DELETED; + MarkBufferDirty( buffers[ i ] ); + } + + if ( !index->rd_istemp ) + { + XLogRecPtr recptr; + + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata); + PageSetLSN(metapage, recptr); + PageSetTLI(metapage, ThisTimeLineID); + + for(i=0; i<data.ndeleted; i++) + { + page = BufferGetPage( buffers[ i ] ); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } + } + + for(i=0; i<data.ndeleted; i++) + UnlockReleaseBuffer( buffers[ i ] ); + + END_CRIT_SECTION(); + } while( blknoToDelete != newHead ); + + return false; +} + +/* Add datum to DatumArray, resizing if needed */ +static void +addDatum(DatumArray *datums, Datum datum) +{ + if ( datums->nvalues >= datums->maxvalues) + { + datums->maxvalues *= 2; + datums->values = (Datum*)repalloc(datums->values, + sizeof(Datum)*datums->maxvalues); + } + + datums->values[ datums->nvalues++ ] = datum; +} + +/* + * Go through all tuples >= startoff on page and collect values in memory + * + * Note that da is just workspace --- it does not carry any state across + * calls. + */ +static void +processPendingPage(BuildAccumulator *accum, DatumArray *da, + Page page, OffsetNumber startoff) +{ + ItemPointerData heapptr; + OffsetNumber i,maxoff; + OffsetNumber attrnum, curattnum; + + /* reset *da to empty */ + da->nvalues = 0; + + maxoff = PageGetMaxOffsetNumber(page); + Assert( maxoff >= FirstOffsetNumber ); + ItemPointerSetInvalid(&heapptr); + attrnum = 0; + + for (i = startoff; i <= maxoff; i = OffsetNumberNext(i)) + { + IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + + curattnum = gintuple_get_attrnum(accum->ginstate, itup); + + if ( !ItemPointerIsValid(&heapptr) ) + { + heapptr = itup->t_tid; + attrnum = curattnum; + } + else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) && + curattnum == attrnum) ) + { + /* + * We can insert several datums per call, but only for one heap + * tuple and one column. + */ + ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); + da->nvalues = 0; + heapptr = itup->t_tid; + attrnum = curattnum; + } + addDatum(da, gin_index_getattr(accum->ginstate, itup)); + } + + ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); +} + +/* + * Move tuples from pending pages into regular GIN structure. + * + * This can be called concurrently by multiple backends, so it must cope. + * On first glance it looks completely not concurrent-safe and not crash-safe + * either. The reason it's okay is that multiple insertion of the same entry + * is detected and treated as a no-op by gininsert.c. If we crash after + * posting entries to the main index and before removing them from the + * pending list, it's okay because when we redo the posting later on, nothing + * bad will happen. Likewise, if two backends simultaneously try to post + * a pending entry into the main index, one will succeed and one will do + * nothing. We try to notice when someone else is a little bit ahead of + * us in the process, but that's just to avoid wasting cycles. Only the + * action of removing a page from the pending list really needs exclusive + * lock. + * + * vac_delay indicates that ginInsertCleanup is called from vacuum process, + * so call vacuum_delay_point() periodically. + * If stats isn't null, we count deleted pending pages into the counts. + */ +void +ginInsertCleanup(Relation index, GinState *ginstate, + bool vac_delay, IndexBulkDeleteResult *stats) +{ + Buffer metabuffer, buffer; + Page metapage, page; + GinMetaPageData *metadata; + MemoryContext opCtx, oldCtx; + BuildAccumulator accum; + DatumArray datums; + BlockNumber blkno; + + metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + LockBuffer(metabuffer, GIN_SHARE); + metapage = BufferGetPage(metabuffer); + metadata = GinPageGetMeta(metapage); + + if ( metadata->head == InvalidBlockNumber ) + { + /* Nothing to do */ + UnlockReleaseBuffer(metabuffer); + return; + } + + /* + * Read and lock head of pending list + */ + blkno = metadata->head; + buffer = ReadBuffer(index, blkno); + LockBuffer(buffer, GIN_SHARE); + page = BufferGetPage(buffer); + + LockBuffer(metabuffer, GIN_UNLOCK); + + /* + * Initialize. All temporary space will be in opCtx + */ + opCtx = AllocSetContextCreate(CurrentMemoryContext, + "GIN insert cleanup temporary context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + oldCtx = MemoryContextSwitchTo(opCtx); + + datums.maxvalues=128; + datums.nvalues = 0; + datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues); + + ginInitBA(&accum); + accum.ginstate = ginstate; + + /* + * At the top of this loop, we have pin and lock on the current page + * of the pending list. However, we'll release that before exiting + * the loop. Note we also have pin but not lock on the metapage. + */ + for(;;) + { + if ( GinPageIsDeleted(page) ) + { + /* another cleanup process is running concurrently */ + UnlockReleaseBuffer( buffer ); + break; + } + + /* + * read page's datums into memory + */ + processPendingPage(&accum, &datums, page, FirstOffsetNumber); + + if (vac_delay) + vacuum_delay_point(); + + /* + * Is it time to flush memory to disk? Flush if we are at the end + * of the pending list, or if we have a full row and memory is + * getting full. + * + * XXX using up maintenance_work_mem here is probably unreasonably + * much, since vacuum might already be using that much. + */ + if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || + ( GinPageHasFullRow(page) && + accum.allocatedMemory > maintenance_work_mem * 1024L ) ) + { + ItemPointerData *list; + uint32 nlist; + Datum entry; + OffsetNumber maxoff, attnum; + + /* + * Unlock current page to increase performance. + * Changes of page will be checked later by comparing + * maxoff after completion of memory flush. + */ + maxoff = PageGetMaxOffsetNumber(page); + LockBuffer(buffer, GIN_UNLOCK); + + /* + * Moving collected data into regular structure can take + * significant amount of time - so, run it without locking pending + * list. + */ + while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) + { + ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); + if (vac_delay) + vacuum_delay_point(); + } + + /* + * Lock the whole list to remove pages + */ + LockBuffer(metabuffer, GIN_EXCLUSIVE); + LockBuffer(buffer, GIN_SHARE); + + if ( GinPageIsDeleted(page) ) + { + /* another cleanup process is running concurrently */ + UnlockReleaseBuffer(buffer); + LockBuffer(metabuffer, GIN_UNLOCK); + break; + } + + /* + * While we left the page unlocked, more stuff might have gotten + * added to it. If so, process those entries immediately. There + * shouldn't be very many, so we don't worry about the fact that + * we're doing this with exclusive lock. Insertion algorithm + * gurantees that inserted row(s) will not continue on next page. + * NOTE: intentionally no vacuum_delay_point in this loop. + */ + if ( PageGetMaxOffsetNumber(page) != maxoff ) + { + ginInitBA(&accum); + processPendingPage(&accum, &datums, page, maxoff+1); + + while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) + ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); + } + + /* + * Remember next page - it will become the new list head + */ + blkno = GinPageGetOpaque(page)->rightlink; + UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */ + + /* + * remove readed pages from pending list, at this point all + * content of readed pages is in regular structure + */ + if ( shiftList(index, metabuffer, blkno, stats) ) + { + /* another cleanup process is running concurrently */ + LockBuffer(metabuffer, GIN_UNLOCK); + break; + } + + Assert( blkno == metadata->head ); + LockBuffer(metabuffer, GIN_UNLOCK); + + /* + * if we removed the whole pending list just exit + */ + if ( blkno == InvalidBlockNumber ) + break; + + /* + * release memory used so far and reinit state + */ + MemoryContextReset(opCtx); + ginInitBA(&accum); + datums.nvalues = 0; + datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues); + } + else + { + blkno = GinPageGetOpaque(page)->rightlink; + UnlockReleaseBuffer(buffer); + } + + /* + * Read next page in pending list + */ + CHECK_FOR_INTERRUPTS(); + buffer = ReadBuffer(index, blkno); + LockBuffer(buffer, GIN_SHARE); + page = BufferGetPage(buffer); + } + + ReleaseBuffer(metabuffer); + + /* Clean up temporary space */ + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(opCtx); +} diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 182981498c1..7f9f1236605 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.22 2009/01/10 21:08:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.23 2009/03/24 20:17:10 tgl Exp $ *------------------------------------------------------------------------- */ @@ -23,6 +23,15 @@ #include "utils/memutils.h" +typedef struct pendingPosition +{ + Buffer pendingBuffer; + OffsetNumber firstOffset; + OffsetNumber lastOffset; + ItemPointerData item; +} pendingPosition; + + /* * Tries to refind previously taken ItemPointer on page. */ @@ -258,7 +267,7 @@ computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry } /* - * Start* functions setup begining state of searches: finds correct buffer and pins it. + * Start* functions setup beginning state of searches: finds correct buffer and pins it. */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) @@ -268,6 +277,15 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) Page page; bool needUnlock = TRUE; + entry->buffer = InvalidBuffer; + entry->offset = InvalidOffsetNumber; + entry->list = NULL; + entry->nlist = 0; + entry->partialMatch = NULL; + entry->partialMatchResult = NULL; + entry->reduceResult = FALSE; + entry->predictNumberResult = 0; + if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; @@ -285,15 +303,6 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; - entry->buffer = InvalidBuffer; - entry->offset = InvalidOffsetNumber; - entry->list = NULL; - entry->nlist = 0; - entry->partialMatch = NULL; - entry->partialMatchIterator = NULL; - entry->partialMatchResult = NULL; - entry->reduceResult = FALSE; - entry->predictNumberResult = 0; if ( entry->isPartialMatch ) { @@ -354,9 +363,10 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) entry->buffer = scanBeginPostingTree(gdi); /* - * We keep buffer pinned because we need to prevent deletition + * We keep buffer pinned because we need to prevent deletion of * page during scan. See GIN's vacuum implementation. RefCount - * is increased to keep buffer pinned after freeGinBtreeStack() call. + * is increased to keep buffer pinned after freeGinBtreeStack() + * call. */ IncrBufferRefCount(entry->buffer); @@ -536,9 +546,10 @@ entryGetItem(Relation index, GinScanEntry entry) { do { - if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples ) + if (entry->partialMatchResult == NULL || + entry->offset >= entry->partialMatchResult->ntuples) { - entry->partialMatchResult = tbm_iterate( entry->partialMatchIterator ); + entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator); if ( entry->partialMatchResult == NULL ) { @@ -548,23 +559,37 @@ entryGetItem(Relation index, GinScanEntry entry) entry->isFinished = TRUE; break; } - else if ( entry->partialMatchResult->ntuples < 0 ) - { - /* bitmap became lossy */ - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("not enough memory to store result of partial match operator" ), - errhint("Increase the \"work_mem\" parameter."))); - } + + /* + * reset counter to the beginning of entry->partialMatchResult. + * Note: entry->offset is still greater than + * partialMatchResult->ntuples if partialMatchResult is + * lossy. So, on next call we will get next result from + * TIDBitmap. + */ entry->offset = 0; } - ItemPointerSet(&entry->curItem, - entry->partialMatchResult->blockno, - entry->partialMatchResult->offsets[ entry->offset ]); - entry->offset ++; + if ( entry->partialMatchResult->ntuples < 0 ) + { + /* + * lossy result, so we need to check the whole page + */ + ItemPointerSetLossyPage(&entry->curItem, + entry->partialMatchResult->blockno); + /* + * We might as well fall out of the loop; we could not + * estimate number of results on this page to support correct + * reducing of result even if it's enabled + */ + break; + } - } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); + ItemPointerSet(&entry->curItem, + entry->partialMatchResult->blockno, + entry->partialMatchResult->offsets[entry->offset]); + entry->offset++; + } while (entry->reduceResult == TRUE && dropItem(entry)); } else if (!BufferIsValid(entry->buffer)) { @@ -618,6 +643,10 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, if (key->entryRes[i]) { + /* + * Move forward only entries which was the least + * on previous call + */ if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE) { if (compareItemPointers(&entry->curItem, &key->curItem) < 0) @@ -664,6 +693,13 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, */ *keyrecheck = true; + /* + * If one of the entry's scans returns lossy result, return it without + * checking - we can't suggest anything helpful to consistentFn. + */ + if (ItemPointerIsLossyPage(&key->curItem)) + return FALSE; + oldCtx = MemoryContextSwitchTo(tempCtx); res = DatumGetBool(FunctionCall4(&ginstate->consistentFn[key->attnum-1], PointerGetDatum(key->entryRes), @@ -677,6 +713,337 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, return FALSE; } + +/* + * Get ItemPointer of next heap row to be checked from pending list. + * Returns false if there are no more. + * + * The pendingBuffer is presumed pinned and share-locked on entry, and is + * pinned and share-locked on success exit. On failure exit it's released. + */ +static bool +scanGetCandidate(IndexScanDesc scan, pendingPosition *pos) +{ + OffsetNumber maxoff; + Page page; + IndexTuple itup; + + ItemPointerSetInvalid( &pos->item ); + for(;;) + { + page = BufferGetPage(pos->pendingBuffer); + + maxoff = PageGetMaxOffsetNumber(page); + if ( pos->firstOffset > maxoff ) + { + BlockNumber blkno = GinPageGetOpaque(page)->rightlink; + if ( blkno == InvalidBlockNumber ) + { + UnlockReleaseBuffer(pos->pendingBuffer); + pos->pendingBuffer=InvalidBuffer; + + return false; + } + else + { + /* + * Here we must prevent deletion of next page by + * insertcleanup process, which may be trying to obtain + * exclusive lock on current page. So, we lock next + * page before releasing the current one + */ + Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno); + + LockBuffer(tmpbuf, GIN_SHARE); + UnlockReleaseBuffer(pos->pendingBuffer); + + pos->pendingBuffer = tmpbuf; + pos->firstOffset = FirstOffsetNumber; + } + } + else + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset)); + pos->item = itup->t_tid; + if ( GinPageHasFullRow(page) ) + { + /* + * find itempointer to the next row + */ + for(pos->lastOffset = pos->firstOffset+1; pos->lastOffset<=maxoff; pos->lastOffset++) + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset)); + if (!ItemPointerEquals(&pos->item, &itup->t_tid)) + break; + } + } + else + { + /* + * All itempointers are the same on this page + */ + pos->lastOffset = maxoff + 1; + } + break; + } + } + + return true; +} + +static bool +matchPartialInPendingList(GinState *ginstate, Page page, + OffsetNumber off, OffsetNumber maxoff, + Datum value, OffsetNumber attrnum, + Datum *datum, bool *datumExtracted, + StrategyNumber strategy) +{ + IndexTuple itup; + int res; + + while ( off < maxoff ) + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); + if ( attrnum != gintuple_get_attrnum(ginstate, itup) ) + return false; + + if (datumExtracted[ off-1 ] == false) + { + datum[ off-1 ] = gin_index_getattr(ginstate, itup); + datumExtracted[ off-1 ] = true; + } + + res = DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[attrnum], + value, + datum[ off-1 ], + UInt16GetDatum(strategy))); + if ( res == 0 ) + return true; + else if (res>0) + return false; + } + + return false; +} + +/* + * Sets entryRes array for each key by looking at + * every entry per indexed value (row) in pending list. + * returns true if at least one of datum was matched by key's entry + * + * The pendingBuffer is presumed pinned and share-locked on entry. + */ +static bool +collectDatumForItem(IndexScanDesc scan, pendingPosition *pos) +{ + GinScanOpaque so = (GinScanOpaque) scan->opaque; + OffsetNumber attrnum; + Page page; + IndexTuple itup; + int i, j; + bool hasMatch = false; + + /* + * Resets entryRes + */ + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + memset( key->entryRes, FALSE, key->nentries ); + } + + for(;;) + { + Datum datum[ BLCKSZ/sizeof(IndexTupleData) ]; + bool datumExtracted[ BLCKSZ/sizeof(IndexTupleData) ]; + + Assert( pos->lastOffset > pos->firstOffset ); + memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset )); + + page = BufferGetPage(pos->pendingBuffer); + + for(i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + for(j=0; j<key->nentries; j++) + { + OffsetNumber StopLow = pos->firstOffset, + StopHigh = pos->lastOffset, + StopMiddle; + GinScanEntry entry = key->scanEntry + j; + + if ( key->entryRes[j] ) + continue; + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle)); + attrnum = gintuple_get_attrnum(&so->ginstate, itup); + + if (key->attnum < attrnum) + StopHigh = StopMiddle; + else if (key->attnum > attrnum) + StopLow = StopMiddle + 1; + else + { + int res; + + if (datumExtracted[ StopMiddle-1 ] == false) + { + datum[ StopMiddle-1 ] = gin_index_getattr(&so->ginstate, itup); + datumExtracted[ StopMiddle-1 ] = true; + } + res = compareEntries(&so->ginstate, + entry->attnum, + entry->entry, + datum[ StopMiddle-1 ]); + + if ( res == 0 ) + { + if ( entry->isPartialMatch ) + key->entryRes[j] = + matchPartialInPendingList(&so->ginstate, + page, StopMiddle, + pos->lastOffset, + entry->entry, + entry->attnum, + datum, + datumExtracted, + entry->strategy); + else + key->entryRes[j] = true; + break; + } + else if ( res < 0 ) + StopHigh = StopMiddle; + else + StopLow = StopMiddle + 1; + } + } + + if ( StopLow>=StopHigh && entry->isPartialMatch ) + key->entryRes[j] = + matchPartialInPendingList(&so->ginstate, + page, StopHigh, + pos->lastOffset, + entry->entry, + entry->attnum, + datum, + datumExtracted, + entry->strategy); + + hasMatch |= key->entryRes[j]; + } + } + + pos->firstOffset = pos->lastOffset; + + if ( GinPageHasFullRow(page) ) + { + /* + * We scan all values from one tuple, go to next one + */ + + return hasMatch; + } + else + { + ItemPointerData item = pos->item; + + if ( scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item) ) + elog(ERROR,"Could not process tuple"); /* XXX should not be here ! */ + } + } + + return hasMatch; +} + +/* + * Collect all matched rows from pending list in bitmap + */ +static void +scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) +{ + GinScanOpaque so = (GinScanOpaque) scan->opaque; + MemoryContext oldCtx; + bool recheck, keyrecheck, match; + int i; + pendingPosition pos; + Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO); + BlockNumber blkno; + + *ntids = 0; + + LockBuffer(metabuffer, GIN_SHARE); + blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head; + + /* + * fetch head of list before unlocking metapage. + * head page must be pinned to prevent deletion by vacuum process + */ + if ( blkno == InvalidBlockNumber ) + { + /* No pending list, so proceed with normal scan */ + UnlockReleaseBuffer( metabuffer ); + return; + } + + pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno); + LockBuffer(pos.pendingBuffer, GIN_SHARE); + pos.firstOffset = FirstOffsetNumber; + UnlockReleaseBuffer( metabuffer ); + + /* + * loop for each heap row + */ + while( scanGetCandidate(scan, &pos) ) + { + + /* + * Check entries in rows and setup entryRes array + */ + if (!collectDatumForItem(scan, &pos)) + continue; + + /* + * check for consistent + */ + oldCtx = MemoryContextSwitchTo(so->tempCtx); + recheck = false; + match = true; + + for (i = 0; match && i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + keyrecheck = true; + + if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ], + PointerGetDatum(key->entryRes), + UInt16GetDatum(key->strategy), + key->query, + PointerGetDatum(&keyrecheck))) == false ) + { + match = false; + } + + recheck |= keyrecheck; + } + + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(so->tempCtx); + + if ( match ) + { + tbm_add_tuples(tbm, &pos.item, 1, recheck); + (*ntids)++; + } + } +} + /* * Get heap item pointer from scan * returns true if found @@ -720,6 +1087,18 @@ scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck) { int cmp = compareItemPointers(item, &key->curItem); + if ( cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem)) ) + { + /* + * if one of ItemPointers points to the whole page then + * compare only page's number + */ + if ( ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem) ) + cmp = 0; + else + cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1; + } + if (cmp == 0) break; else if (cmp > 0) @@ -757,9 +1136,26 @@ gingetbitmap(PG_FUNCTION_ARGS) if (GinIsVoidRes(scan)) PG_RETURN_INT64(0); + ntids = 0; + + /* + * First, scan the pending list and collect any matching entries into + * the bitmap. After we scan a pending item, some other backend could + * post it into the main index, and so we might visit it a second time + * during the main scan. This is okay because we'll just re-set the + * same bit in the bitmap. (The possibility of duplicate visits is a + * major reason why GIN can't support the amgettuple API, however.) + * Note that it would not do to scan the main index before the pending + * list, since concurrent cleanup could then make us miss entries + * entirely. + */ + scanPendingInsert(scan, tbm, &ntids); + + /* + * Now scan the main index. + */ startScan(scan); - ntids = 0; for (;;) { ItemPointerData iptr; @@ -770,31 +1166,12 @@ gingetbitmap(PG_FUNCTION_ARGS) if (!scanGetItem(scan, &iptr, &recheck)) break; - tbm_add_tuples(tbm, &iptr, 1, recheck); + if ( ItemPointerIsLossyPage(&iptr) ) + tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr)); + else + tbm_add_tuples(tbm, &iptr, 1, recheck); ntids++; } PG_RETURN_INT64(ntids); } - -Datum -gingettuple(PG_FUNCTION_ARGS) -{ - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); - bool res; - - if (dir != ForwardScanDirection) - elog(ERROR, "GIN doesn't support other scan directions than forward"); - - if (GinIsNewKey(scan)) - newScanKey(scan); - - if (GinIsVoidRes(scan)) - PG_RETURN_BOOL(false); - - startScan(scan); - res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck); - - PG_RETURN_BOOL(res); -} diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 2ab1105423c..d05882cdb94 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.18 2009/01/01 17:23:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.19 2009/03/24 20:17:11 tgl Exp $ *------------------------------------------------------------------------- */ @@ -138,9 +138,11 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, /* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. */ -static void -ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, - ItemPointerData *items, uint32 nitem, bool isBuild) +void +ginEntryInsert(Relation index, GinState *ginstate, + OffsetNumber attnum, Datum value, + ItemPointerData *items, uint32 nitem, + bool isBuild) { GinBtreeData btree; GinBtreeStack *stack; @@ -273,7 +275,7 @@ ginbuild(PG_FUNCTION_ARGS) IndexBuildResult *result; double reltuples; GinBuildState buildstate; - Buffer buffer; + Buffer RootBuffer, MetaBuffer; ItemPointerData *list; Datum entry; uint32 nlist; @@ -286,11 +288,17 @@ ginbuild(PG_FUNCTION_ARGS) initGinState(&buildstate.ginstate, index); + /* initialize the meta page */ + MetaBuffer = GinNewBuffer(index); + /* initialize the root page */ - buffer = GinNewBuffer(index); + RootBuffer = GinNewBuffer(index); + START_CRIT_SECTION(); - GinInitBuffer(buffer, GIN_LEAF); - MarkBufferDirty(buffer); + GinInitMetabuffer(MetaBuffer); + MarkBufferDirty(MetaBuffer); + GinInitBuffer(RootBuffer, GIN_LEAF); + MarkBufferDirty(RootBuffer); if (!index->rd_istemp) { @@ -303,16 +311,19 @@ ginbuild(PG_FUNCTION_ARGS) rdata.len = sizeof(RelFileNode); rdata.next = NULL; - page = BufferGetPage(buffer); - - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata); + + page = BufferGetPage(RootBuffer); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); + page = BufferGetPage(MetaBuffer); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); } - UnlockReleaseBuffer(buffer); + UnlockReleaseBuffer(MetaBuffer); + UnlockReleaseBuffer(RootBuffer); END_CRIT_SECTION(); /* build the index */ @@ -417,9 +428,26 @@ gininsert(PG_FUNCTION_ARGS) initGinState(&ginstate, index); - for(i=0; i<ginstate.origTupdesc->natts;i++) - if ( !isnull[i] ) - res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid); + if ( GinGetUseFastUpdate(index) ) + { + GinTupleCollector collector; + + memset(&collector, 0, sizeof(GinTupleCollector)); + for(i=0; i<ginstate.origTupdesc->natts;i++) + if ( !isnull[i] ) + res += ginHeapTupleFastCollect(index, &ginstate, &collector, + (OffsetNumber)(i+1), values[i], ht_ctid); + + ginHeapTupleFastInsert(index, &ginstate, &collector); + } + else + { + for(i=0; i<ginstate.origTupdesc->natts;i++) + if ( !isnull[i] ) + res += ginHeapTupleInsert(index, &ginstate, + (OffsetNumber)(i+1), values[i], ht_ctid); + + } MemoryContextSwitchTo(oldCtx); MemoryContextDelete(insertCtx); diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 222ea677883..e0951a6a4f8 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.20 2009/01/05 17:14:28 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.21 2009/03/24 20:17:11 tgl Exp $ *------------------------------------------------------------------------- */ @@ -57,7 +57,7 @@ initGinState(GinState *state, Relation index) CurrentMemoryContext); /* - * Check opclass capability to do partial match. + * Check opclass capability to do partial match. */ if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid ) { @@ -88,7 +88,7 @@ gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple) bool isnull; /* - * First attribute is always int16, so we can safely use any + * First attribute is always int16, so we can safely use any * tuple descriptor to obtain first attribute of tuple */ res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0], @@ -213,6 +213,22 @@ GinInitBuffer(Buffer b, uint32 f) GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b)); } +void +GinInitMetabuffer(Buffer b) +{ + GinMetaPageData *metadata; + Page page = BufferGetPage(b); + + GinInitPage(page, GIN_META, BufferGetPageSize(b)); + + metadata = GinPageGetMeta(page); + + metadata->head = metadata->tail = InvalidBlockNumber; + metadata->tailFreeSize = 0; + metadata->nPendingPages = 0; + metadata->nPendingHeapTuples = 0; +} + int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b) { @@ -315,10 +331,26 @@ ginoptions(PG_FUNCTION_ARGS) { Datum reloptions = PG_GETARG_DATUM(0); bool validate = PG_GETARG_BOOL(1); - bytea *result; + relopt_value *options; + GinOptions *rdopts; + int numoptions; + static const relopt_parse_elt tab[] = { + {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)} + }; + + options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN, + &numoptions); + + /* if none set, we're done */ + if (numoptions == 0) + PG_RETURN_NULL(); + + rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions); + + fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions, + validate, tab, lengthof(tab)); + + pfree(options); - result = default_reloptions(reloptions, validate, RELOPT_KIND_GIN); - if (result) - PG_RETURN_BYTEA_P(result); - PG_RETURN_NULL(); + PG_RETURN_BYTEA_P(rdopts); } diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index be614a3c9c8..dd98b9fd284 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.27 2009/01/01 17:23:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.28 2009/03/24 20:17:11 tgl Exp $ *------------------------------------------------------------------------- */ @@ -19,8 +19,8 @@ #include "catalog/storage.h" #include "commands/vacuum.h" #include "miscadmin.h" +#include "postmaster/autovacuum.h" #include "storage/bufmgr.h" -#include "storage/freespace.h" #include "storage/indexfsm.h" #include "storage/lmgr.h" @@ -593,18 +593,24 @@ ginbulkdelete(PG_FUNCTION_ARGS) BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; + gvs.index = index; + gvs.callback = callback; + gvs.callback_state = callback_state; + gvs.strategy = info->strategy; + initGinState(&gvs.ginstate, index); + /* first time through? */ if (stats == NULL) + { + /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + /* and cleanup any pending inserts */ + ginInsertCleanup(index, &gvs.ginstate, true, stats); + } + /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; - - gvs.index = index; gvs.result = stats; - gvs.callback = callback; - gvs.callback_state = callback_state; - gvs.strategy = info->strategy; - initGinState(&gvs.ginstate, index); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); @@ -702,10 +708,32 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) BlockNumber totFreePages; BlockNumber lastBlock = GIN_ROOT_BLKNO, lastFilledBlock = GIN_ROOT_BLKNO; + GinState ginstate; - /* Set up all-zero stats if ginbulkdelete wasn't called */ + /* + * In an autovacuum analyze, we want to clean up pending insertions. + * Otherwise, an ANALYZE-only call is a no-op. + */ + if (info->analyze_only) + { + if (IsAutoVacuumWorkerProcess()) + { + initGinState(&ginstate, index); + ginInsertCleanup(index, &ginstate, true, stats); + } + PG_RETURN_POINTER(stats); + } + + /* + * Set up all-zero stats and cleanup pending inserts + * if ginbulkdelete wasn't called + */ if (stats == NULL) + { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + initGinState(&ginstate, index); + ginInsertCleanup(index, &ginstate, true, stats); + } /* * XXX we always report the heap tuple count as the number of index diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 362709de330..03cdc1129cf 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.17 2009/01/20 18:59:36 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.18 2009/03/24 20:17:11 tgl Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -71,20 +71,30 @@ static void ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); - Buffer buffer; + Buffer RootBuffer, MetaBuffer; Page page; - buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true); - Assert(BufferIsValid(buffer)); - page = (Page) BufferGetPage(buffer); + MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true); + Assert(BufferIsValid(MetaBuffer)); + GinInitMetabuffer(MetaBuffer); + + page = (Page) BufferGetPage(MetaBuffer); + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); - GinInitBuffer(buffer, GIN_LEAF); + RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true); + Assert(BufferIsValid(RootBuffer)); + page = (Page) BufferGetPage(RootBuffer); + + GinInitBuffer(RootBuffer, GIN_LEAF); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); - MarkBufferDirty(buffer); - UnlockReleaseBuffer(buffer); + MarkBufferDirty(MetaBuffer); + UnlockReleaseBuffer(MetaBuffer); + MarkBufferDirty(RootBuffer); + UnlockReleaseBuffer(RootBuffer); } static void @@ -433,6 +443,174 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) } } +static void +ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record) +{ + ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record); + Buffer metabuffer; + Page metapage; + + metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false); + metapage = BufferGetPage(metabuffer); + + if (!XLByteLE(lsn, PageGetLSN(metapage))) + { + memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); + PageSetLSN(metapage, lsn); + PageSetTLI(metapage, ThisTimeLineID); + MarkBufferDirty(metabuffer); + } + + if ( data->ntuples > 0 ) + { + /* + * insert into tail page + */ + if (!(record->xl_info & XLR_BKP_BLOCK_1)) + { + Buffer buffer = XLogReadBuffer(data->node, data->metadata.tail, false); + Page page = BufferGetPage(buffer); + + if ( !XLByteLE(lsn, PageGetLSN(page))) + { + OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber : + OffsetNumberNext(PageGetMaxOffsetNumber(page)); + int i, tupsize; + IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta)); + + for(i=0; i<data->ntuples; i++) + { + tupsize = IndexTupleSize(tuples); + + l = PageAddItem(page, (Item)tuples, tupsize, off, false, false); + + if (l == InvalidOffsetNumber) + elog(ERROR, "failed to add item to index page"); + + tuples = (IndexTuple)( ((char*)tuples) + tupsize ); + } + + /* + * Increase counter of heap tuples + */ + GinPageGetOpaque(page)->maxoff++; + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + } + UnlockReleaseBuffer(buffer); + } + } + else if ( data->prevTail != InvalidBlockNumber ) + { + /* + * New tail + */ + + Buffer buffer = XLogReadBuffer(data->node, data->prevTail, false); + Page page = BufferGetPage(buffer); + + if ( !XLByteLE(lsn, PageGetLSN(page))) + { + GinPageGetOpaque(page)->rightlink = data->newRightlink; + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + } + UnlockReleaseBuffer(buffer); + } + + UnlockReleaseBuffer(metabuffer); +} + +static void +ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record) +{ + ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record); + Buffer buffer; + Page page; + OffsetNumber l, off = FirstOffsetNumber; + int i, tupsize; + IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage)); + + if (record->xl_info & XLR_BKP_BLOCK_1) + return; + + buffer = XLogReadBuffer(data->node, data->blkno, true); + page = BufferGetPage(buffer); + + GinInitBuffer(buffer, GIN_LIST); + GinPageGetOpaque(page)->rightlink = data->rightlink; + if ( data->rightlink == InvalidBlockNumber ) + { + /* tail of sublist */ + GinPageSetFullRow(page); + GinPageGetOpaque(page)->maxoff = 1; + } + else + { + GinPageGetOpaque(page)->maxoff = 0; + } + + for(i=0; i<data->ntuples; i++) + { + tupsize = IndexTupleSize(tuples); + + l = PageAddItem(page, (Item)tuples, tupsize, off, false, false); + + if (l == InvalidOffsetNumber) + elog(ERROR, "failed to add item to index page"); + + tuples = (IndexTuple)( ((char*)tuples) + tupsize ); + } + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + + UnlockReleaseBuffer(buffer); +} + +static void +ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record) +{ + ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record); + Buffer metabuffer; + Page metapage; + int i; + + metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false); + metapage = BufferGetPage(metabuffer); + + if (!XLByteLE(lsn, PageGetLSN(metapage))) + { + memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); + PageSetLSN(metapage, lsn); + PageSetTLI(metapage, ThisTimeLineID); + MarkBufferDirty(metabuffer); + } + + for(i=0; i<data->ndeleted; i++) + { + Buffer buffer = XLogReadBuffer(data->node,data->toDelete[i],false); + Page page = BufferGetPage(buffer); + + if ( !XLByteLE(lsn, PageGetLSN(page))) + { + GinPageGetOpaque(page)->flags = GIN_DELETED; + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + } + + UnlockReleaseBuffer(buffer); + } + UnlockReleaseBuffer(metabuffer); +} + void gin_redo(XLogRecPtr lsn, XLogRecord *record) { @@ -461,6 +639,15 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record) case XLOG_GIN_DELETE_PAGE: ginRedoDeletePage(lsn, record); break; + case XLOG_GIN_UPDATE_META_PAGE: + ginRedoUpdateMetapage(lsn, record); + break; + case XLOG_GIN_INSERT_LISTPAGE: + ginRedoInsertListPage(lsn, record); + break; + case XLOG_GIN_DELETE_LISTPAGE: + ginRedoDeleteListPages(lsn, record); + break; default: elog(PANIC, "gin_redo: unknown op code %u", info); } @@ -516,6 +703,18 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec) appendStringInfo(buf, "Delete page, "); desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno); break; + case XLOG_GIN_UPDATE_META_PAGE: + appendStringInfo(buf, "Update metapage, "); + desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail); + break; + case XLOG_GIN_INSERT_LISTPAGE: + appendStringInfo(buf, "Insert new list page, "); + desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno); + break; + case XLOG_GIN_DELETE_LISTPAGE: + appendStringInfo(buf, "Delete list pages (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted); + desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head); + break; default: elog(PANIC, "gin_desc: unknown op code %u", info); } diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index fcf471cf2e9..01b8512d070 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.42 2009/01/01 17:23:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.43 2009/03/24 20:17:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -515,6 +515,10 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) lastFilledBlock = GIST_ROOT_BLKNO; bool needLock; + /* No-op in ANALYZE ONLY mode */ + if (info->analyze_only) + PG_RETURN_POINTER(stats); + /* Set up all-zero stats if gistbulkdelete wasn't called */ if (stats == NULL) { diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index ab2f67c6385..42fe9554f0f 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.108 2009/01/01 17:23:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.109 2009/03/24 20:17:11 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -647,6 +647,7 @@ hashvacuumcleanup(PG_FUNCTION_ARGS) BlockNumber num_pages; /* If hashbulkdelete wasn't called, return NULL signifying no change */ + /* Note: this covers the analyze_only case too */ if (stats == NULL) PG_RETURN_POINTER(NULL); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index a03b4595ba1..197fa3b041d 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.112 2009/01/01 17:23:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.113 2009/03/24 20:17:12 tgl Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relation OID @@ -647,7 +647,8 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) * item slot could have been replaced by a newer tuple by the time we get * to it. * - * Returns the number of matching tuples found. + * Returns the number of matching tuples found. (Note: this might be only + * approximate, so it should only be used for statistical purposes.) * ---------------- */ int64 diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 84f409e1aca..b8bb1ad4906 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.167 2009/01/01 17:23:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.168 2009/03/24 20:17:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -557,6 +557,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS) IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); + /* No-op in ANALYZE ONLY mode */ + if (info->analyze_only) + PG_RETURN_POINTER(stats); + /* * If btbulkdelete was called, we need not do anything, just return the * stats from the latest btbulkdelete call. If it wasn't called, we must |