diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2014-01-22 18:51:48 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2014-01-22 19:20:58 +0200 |
commit | 36a35c550ac114caa423bcbe339d3515db0cd957 (patch) | |
tree | 3bd40801d0bc4ee3ac6ff668f9f2ae221aaada49 /src/backend/access/gin/ginget.c | |
parent | 243ee266339bd4a049ff92e101010242169b7287 (diff) | |
download | postgresql-36a35c550ac114caa423bcbe339d3515db0cd957.tar.gz postgresql-36a35c550ac114caa423bcbe339d3515db0cd957.zip |
Compress GIN posting lists, for smaller index size.
GIN posting lists are now encoded using varbyte-encoding, which allows them
to fit in much smaller space than the straight ItemPointer array format used
before. The new encoding is used for both the lists stored in-line in entry
tree items, and in posting tree leaf pages.
To maintain backwards-compatibility and keep pg_upgrade working, the code
can still read old-style pages and tuples. Posting tree leaf pages in the
new format are flagged with GIN_COMPRESSED flag, to distinguish old and new
format pages. Likewise, entry tree tuples in the new format have a
GIN_ITUP_COMPRESSED flag set in a bit that was previously unused.
This patch bumps GIN_CURRENT_VERSION from 1 to 2. New indexes created with
version 9.4 will therefore have version number 2 in the metapage, while old
pg_upgraded indexes will have version 1. The code treats them the same, but
it might be come handy in the future, if we want to drop support for the
uncompressed format.
Alexander Korotkov and me. Reviewed by Tomas Vondra and Amit Langote.
Diffstat (limited to 'src/backend/access/gin/ginget.c')
-rw-r--r-- | src/backend/access/gin/ginget.c | 117 |
1 files changed, 62 insertions, 55 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 3d99bef76a3..4bdbd4525d5 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -71,24 +71,20 @@ callConsistentFn(GinState *ginstate, GinScanKey key) * Tries to refind previously taken ItemPointer on a posting page. */ static bool -findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off) +needToStepRight(Page page, ItemPointer item) { - OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; - int res; - if (GinPageGetOpaque(page)->flags & GIN_DELETED) /* page was deleted by concurrent vacuum */ - return false; + return true; - /* - * scan page to find equal or first greater value - */ - for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) + if (ginCompareItemPointers(item, GinDataPageGetRightBound(page)) > 0 + && !GinPageRightMost(page)) { - res = ginCompareItemPointers(item, GinDataPageGetItemPointer(page, *off)); - - if (res <= 0) - return true; + /* + * the item we're looking is > the right bound of the page, so it + * can't be on this page. + */ + return true; } return false; @@ -143,14 +139,10 @@ scanPostingTree(Relation index, GinScanEntry scanEntry, for (;;) { page = BufferGetPage(buffer); - - if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && - GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber) + if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0) { - tbm_add_tuples(scanEntry->matchBitmap, - GinDataPageGetItemPointer(page, FirstOffsetNumber), - GinPageGetOpaque(page)->maxoff, false); - scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff; + int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap); + scanEntry->predictNumberResult += n; } if (GinPageRightMost(page)) @@ -335,8 +327,11 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, } else { - tbm_add_tuples(scanEntry->matchBitmap, - GinGetPosting(itup), GinGetNPosting(itup), false); + ItemPointer ipd; + int nipd; + + ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd); + tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false); scanEntry->predictNumberResult += GinGetNPosting(itup); } @@ -450,16 +445,14 @@ restartScanEntry: IncrBufferRefCount(entry->buffer); page = BufferGetPage(entry->buffer); - entry->predictNumberResult = stack->predictNumber * GinPageGetOpaque(page)->maxoff; /* - * Keep page content in memory to prevent durable page locking + * Copy page content to memory to avoid keeping it locked for + * a long time. */ - entry->list = (ItemPointerData *) palloc(BLCKSZ); - entry->nlist = GinPageGetOpaque(page)->maxoff; - memcpy(entry->list, - GinDataPageGetItemPointer(page, FirstOffsetNumber), - GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData)); + entry->list = GinDataLeafPageGetItems(page, &entry->nlist); + + entry->predictNumberResult = stack->predictNumber * entry->nlist; LockBuffer(entry->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); @@ -467,9 +460,10 @@ restartScanEntry: } else if (GinGetNPosting(itup) > 0) { - entry->nlist = GinGetNPosting(itup); - entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); - memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); + entry->list = ginReadTuple(ginstate, entry->attnum, itup, + &entry->nlist); + entry->predictNumberResult = entry->nlist; + entry->isFinished = FALSE; } } @@ -532,6 +526,7 @@ static void entryGetNextItem(GinState *ginstate, GinScanEntry entry) { Page page; + int i; for (;;) { @@ -564,35 +559,47 @@ entryGetNextItem(GinState *ginstate, GinScanEntry entry) page = BufferGetPage(entry->buffer); entry->offset = InvalidOffsetNumber; - if (!ItemPointerIsValid(&entry->curItem) || - findItemInPostingPage(page, &entry->curItem, &entry->offset)) + if (entry->list) { - /* - * Found position equal to or greater than stored - */ - entry->nlist = GinPageGetOpaque(page)->maxoff; - memcpy(entry->list, - GinDataPageGetItemPointer(page, FirstOffsetNumber), - GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData)); + pfree(entry->list); + entry->list = NULL; + } - LockBuffer(entry->buffer, GIN_UNLOCK); + /* + * If the page was concurrently split, we have to re-find the + * item we were stopped on. If the page was split more than once, + * the item might not be on this page, but somewhere to the right. + * Keep following the right-links until we re-find the correct + * page. + */ + if (ItemPointerIsValid(&entry->curItem) && + needToStepRight(page, &entry->curItem)) + { + continue; + } + + entry->list = GinDataLeafPageGetItems(page, &entry->nlist); - if (!ItemPointerIsValid(&entry->curItem) || - ginCompareItemPointers(&entry->curItem, - entry->list + entry->offset - 1) == 0) + /* re-find the item we were stopped on. */ + if (ItemPointerIsValid(&entry->curItem)) + { + for (i = 0; i < entry->nlist; i++) { - /* - * First pages are deleted or empty, or we found exact - * position, so break inner loop and continue outer one. - */ - break; + if (ginCompareItemPointers(&entry->curItem, + &entry->list[i]) < 0) + { + LockBuffer(entry->buffer, GIN_UNLOCK); + entry->offset = i + 1; + entry->curItem = entry->list[entry->offset - 1]; + return; + } } - - /* - * Find greater than entry->curItem position, store it. - */ + } + else + { + LockBuffer(entry->buffer, GIN_UNLOCK); + entry->offset = 1; /* scan all items on the page. */ entry->curItem = entry->list[entry->offset - 1]; - return; } } |