diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-01-07 19:16:24 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-01-07 19:16:24 -0500 |
commit | 73912e7fbd1b52c51d914214abbec1cda64595f2 (patch) | |
tree | f6ae2849198dd7a17ae6a5ec174796848ec07cdb /src/backend/access/gin/ginfast.c | |
parent | 9b4271deb97270d336c9d34ac911748faa5a4892 (diff) | |
download | postgresql-73912e7fbd1b52c51d914214abbec1cda64595f2.tar.gz postgresql-73912e7fbd1b52c51d914214abbec1cda64595f2.zip |
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
Diffstat (limited to 'src/backend/access/gin/ginfast.c')
-rw-r--r-- | src/backend/access/gin/ginfast.c | 181 |
1 files changed, 113 insertions, 68 deletions
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 3941f7eb007..9960c786c94 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -18,8 +18,7 @@ #include "postgres.h" -#include "access/genam.h" -#include "access/gin.h" +#include "access/gin_private.h" #include "catalog/index.h" #include "commands/vacuum.h" #include "miscadmin.h" @@ -30,12 +29,13 @@ #define GIN_PAGE_FREESIZE \ ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) ) -typedef struct DatumArray +typedef struct KeyArray { - Datum *values; /* expansible array */ + Datum *keys; /* expansible array */ + GinNullCategory *categories; /* another expansible array */ int32 nvalues; /* current number of valid entries */ - int32 maxvalues; /* allocated size of array */ -} DatumArray; + int32 maxvalues; /* allocated size of arrays */ +} KeyArray; /* @@ -88,8 +88,9 @@ writeListPage(Relation index, Buffer buffer, GinPageGetOpaque(page)->rightlink = rightlink; /* - * tail page may contain only the whole row(s) or final part of row placed - * on previous pages + * tail page may contain only whole row(s) or final part of row placed + * on previous pages (a "row" here meaning all the index tuples generated + * for one heap tuple) */ if (rightlink == InvalidBlockNumber) { @@ -210,13 +211,16 @@ makeSublist(Relation index, IndexTuple *tuples, int32 ntuples, } /* - * Inserts collected values during normal insertion. Function guarantees - * that all values of heap will be stored sequentially, preserving order + * Write the index tuples contained in *collector into the index's + * pending list. + * + * Function guarantees that all these tuples will be inserted consecutively, + * preserving order */ void -ginHeapTupleFastInsert(Relation index, GinState *ginstate, - GinTupleCollector *collector) +ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) { + Relation index = ginstate->index; Buffer metabuffer; Page metapage; GinMetaPageData *metadata = NULL; @@ -291,7 +295,12 @@ ginHeapTupleFastInsert(Relation index, GinState *ginstate, */ START_CRIT_SECTION(); - memcpy(metadata, &sublist, sizeof(GinMetaPageData)); + metadata->head = sublist.head; + metadata->tail = sublist.tail; + metadata->tailFreeSize = sublist.tailFreeSize; + + metadata->nPendingPages = sublist.nPendingPages; + metadata->nPendingHeapTuples = sublist.nPendingHeapTuples; } else { @@ -421,34 +430,40 @@ ginHeapTupleFastInsert(Relation index, GinState *ginstate, END_CRIT_SECTION(); if (needCleanup) - ginInsertCleanup(index, ginstate, false, NULL); + ginInsertCleanup(ginstate, false, NULL); } /* - * Collect values from one tuples to be indexed. All values for - * one tuples should be written at once - to guarantee consistent state + * Create temporary index tuples for a single indexable item (one index column + * for the heap tuple specified by ht_ctid), and append them to the array + * in *collector. They will subsequently be written out using + * ginHeapTupleFastInsert. Note that to guarantee consistent state, all + * temp tuples for a given heap tuple must be written in one call to + * ginHeapTupleFastInsert. */ -uint32 -ginHeapTupleFastCollect(Relation index, GinState *ginstate, +void +ginHeapTupleFastCollect(GinState *ginstate, GinTupleCollector *collector, - OffsetNumber attnum, Datum value, ItemPointer item) + OffsetNumber attnum, Datum value, bool isNull, + ItemPointer ht_ctid) { Datum *entries; + GinNullCategory *categories; int32 i, nentries; - entries = ginExtractEntriesSU(ginstate, attnum, value, &nentries); - - if (nentries == 0) - /* nothing to insert */ - return 0; + /* + * Extract the key values that need to be inserted in the index + */ + entries = ginExtractEntries(ginstate, attnum, value, isNull, + &nentries, &categories); /* * Allocate/reallocate memory for storing collected tuples */ if (collector->tuples == NULL) { - collector->lentuples = nentries * index->rd_att->natts; + collector->lentuples = nentries * ginstate->origTupdesc->natts; collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) * collector->lentuples); } @@ -460,19 +475,19 @@ ginHeapTupleFastCollect(Relation index, GinState *ginstate, } /* - * Creates tuple's array + * Build an index tuple for each key value, and add to array. In + * pending tuples we just stick the heap TID into t_tid. */ for (i = 0; i < nentries; i++) { - collector->tuples[collector->ntuples + i] = - GinFormTuple(index, ginstate, attnum, entries[i], NULL, 0, true); - collector->tuples[collector->ntuples + i]->t_tid = *item; - collector->sumsize += IndexTupleSize(collector->tuples[collector->ntuples + i]); - } + IndexTuple itup; - collector->ntuples += nentries; - - return nentries; + itup = GinFormTuple(ginstate, attnum, entries[i], categories[i], + NULL, 0, true); + itup->t_tid = *ht_ctid; + collector->tuples[collector->ntuples++] = itup; + collector->sumsize += IndexTupleSize(itup); + } } /* @@ -591,38 +606,55 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, return false; } -/* Add datum to DatumArray, resizing if needed */ +/* Initialize empty KeyArray */ static void -addDatum(DatumArray *datums, Datum datum) +initKeyArray(KeyArray *keys, int32 maxvalues) { - if (datums->nvalues >= datums->maxvalues) + keys->keys = (Datum *) palloc(sizeof(Datum) * maxvalues); + keys->categories = (GinNullCategory *) + palloc(sizeof(GinNullCategory) * maxvalues); + keys->nvalues = 0; + keys->maxvalues = maxvalues; +} + +/* Add datum to KeyArray, resizing if needed */ +static void +addDatum(KeyArray *keys, Datum datum, GinNullCategory category) +{ + if (keys->nvalues >= keys->maxvalues) { - datums->maxvalues *= 2; - datums->values = (Datum *) repalloc(datums->values, - sizeof(Datum) * datums->maxvalues); + keys->maxvalues *= 2; + keys->keys = (Datum *) + repalloc(keys->keys, sizeof(Datum) * keys->maxvalues); + keys->categories = (GinNullCategory *) + repalloc(keys->categories, sizeof(GinNullCategory) * keys->maxvalues); } - datums->values[datums->nvalues++] = datum; + keys->keys[keys->nvalues] = datum; + keys->categories[keys->nvalues] = category; + keys->nvalues++; } /* - * Go through all tuples >= startoff on page and collect values in memory + * Collect data from a pending-list page in preparation for insertion into + * the main index. + * + * Go through all tuples >= startoff on page and collect values in accum * - * Note that da is just workspace --- it does not carry any state across + * Note that ka is just workspace --- it does not carry any state across * calls. */ static void -processPendingPage(BuildAccumulator *accum, DatumArray *da, +processPendingPage(BuildAccumulator *accum, KeyArray *ka, Page page, OffsetNumber startoff) { ItemPointerData heapptr; OffsetNumber i, maxoff; - OffsetNumber attrnum, - curattnum; + OffsetNumber attrnum; - /* reset *da to empty */ - da->nvalues = 0; + /* reset *ka to empty */ + ka->nvalues = 0; maxoff = PageGetMaxOffsetNumber(page); Assert(maxoff >= FirstOffsetNumber); @@ -632,7 +664,11 @@ processPendingPage(BuildAccumulator *accum, DatumArray *da, for (i = startoff; i <= maxoff; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + OffsetNumber curattnum; + Datum curkey; + GinNullCategory curcategory; + /* Check for change of heap TID or attnum */ curattnum = gintuple_get_attrnum(accum->ginstate, itup); if (!ItemPointerIsValid(&heapptr)) @@ -644,18 +680,25 @@ processPendingPage(BuildAccumulator *accum, DatumArray *da, curattnum == attrnum)) { /* - * We can insert several datums per call, but only for one heap - * tuple and one column. + * ginInsertBAEntries can insert several datums per call, but only + * for one heap tuple and one column. So call it at a boundary, + * and reset ka. */ - ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); - da->nvalues = 0; + ginInsertBAEntries(accum, &heapptr, attrnum, + ka->keys, ka->categories, ka->nvalues); + ka->nvalues = 0; heapptr = itup->t_tid; attrnum = curattnum; } - addDatum(da, gin_index_getattr(accum->ginstate, itup)); + + /* Add key to KeyArray */ + curkey = gintuple_get_key(accum->ginstate, itup, &curcategory); + addDatum(ka, curkey, curcategory); } - ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); + /* Dump out all remaining keys */ + ginInsertBAEntries(accum, &heapptr, attrnum, + ka->keys, ka->categories, ka->nvalues); } /* @@ -679,9 +722,10 @@ processPendingPage(BuildAccumulator *accum, DatumArray *da, * If stats isn't null, we count deleted pending pages into the counts. */ void -ginInsertCleanup(Relation index, GinState *ginstate, +ginInsertCleanup(GinState *ginstate, bool vac_delay, IndexBulkDeleteResult *stats) { + Relation index = ginstate->index; Buffer metabuffer, buffer; Page metapage, @@ -690,7 +734,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, MemoryContext opCtx, oldCtx; BuildAccumulator accum; - DatumArray datums; + KeyArray datums; BlockNumber blkno; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); @@ -726,10 +770,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, oldCtx = MemoryContextSwitchTo(opCtx); - datums.maxvalues = 128; - datums.nvalues = 0; - datums.values = (Datum *) palloc(sizeof(Datum) * datums.maxvalues); - + initKeyArray(&datums, 128); ginInitBA(&accum); accum.ginstate = ginstate; @@ -748,7 +789,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, } /* - * read page's datums into memory + * read page's datums into accum */ processPendingPage(&accum, &datums, page, FirstOffsetNumber); @@ -769,7 +810,8 @@ ginInsertCleanup(Relation index, GinState *ginstate, { ItemPointerData *list; uint32 nlist; - Datum entry; + Datum key; + GinNullCategory category; OffsetNumber maxoff, attnum; @@ -787,9 +829,11 @@ ginInsertCleanup(Relation index, GinState *ginstate, * list. */ ginBeginBAScan(&accum); - while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) + while ((list = ginGetBAEntry(&accum, + &attnum, &key, &category, &nlist)) != NULL) { - ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL); + ginEntryInsert(ginstate, attnum, key, category, + list, nlist, NULL); if (vac_delay) vacuum_delay_point(); } @@ -822,8 +866,10 @@ ginInsertCleanup(Relation index, GinState *ginstate, processPendingPage(&accum, &datums, page, maxoff + 1); ginBeginBAScan(&accum); - while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) - ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL); + while ((list = ginGetBAEntry(&accum, + &attnum, &key, &category, &nlist)) != NULL) + ginEntryInsert(ginstate, attnum, key, category, + list, nlist, NULL); } /* @@ -857,9 +903,8 @@ ginInsertCleanup(Relation index, GinState *ginstate, * release memory used so far and reinit state */ MemoryContextReset(opCtx); + initKeyArray(&datums, datums.maxvalues); ginInitBA(&accum); - datums.nvalues = 0; - datums.values = (Datum *) palloc(sizeof(Datum) * datums.maxvalues); } else { |