aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2008-04-22 17:54:19 +0000
committerTeodor Sigaev <teodor@sigaev.ru>2008-04-22 17:54:19 +0000
commitdcd4075379458783e035fd0a5f6bb758ba4700a2 (patch)
treef4236ead0385473712362054fa195c4faad419f3
parentcebc3c42bb279649edd0330f52ff1236491a9dbd (diff)
downloadpostgresql-dcd4075379458783e035fd0a5f6bb758ba4700a2.tar.gz
postgresql-dcd4075379458783e035fd0a5f6bb758ba4700a2.zip
Fix using too many LWLocks bug, reported by Craig Ringer
<craig@postnewspapers.com.au>. It was my mistake, I missed limitation of number of held locks, now GIN doesn't use continiuous locks, but still hold buffers pinned to prevent interference with vacuum's deletion algorithm.
-rw-r--r--src/backend/access/gin/ginget.c366
-rw-r--r--src/include/access/gin.h14
2 files changed, 167 insertions, 213 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 31464ab2bc3..3791b25eb60 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.10 2008/01/01 19:45:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.10.2.1 2008/04/22 17:54:19 teodor Exp $
*-------------------------------------------------------------------------
*/
@@ -27,58 +27,16 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
/* page was deleted by concurrent vacuum */
return false;
- if (*off > maxoff || *off == InvalidOffsetNumber)
- res = -1;
- else
- res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
+ /*
+ * scan page to find equal or first greater value
+ */
- if (res == 0)
+ for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
{
- /* page isn't changed */
- return true;
- }
- else if (res > 0)
- {
- /*
- * some items was added before our position, look further to find it
- * or first greater
- */
-
- (*off)++;
- for (; *off <= maxoff; (*off)++)
- {
- res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
-
- if (res == 0)
- return true;
-
- if (res < 0)
- {
- (*off)--;
- return true;
- }
- }
- }
- else
- {
- /*
- * some items was deleted before our position, look from begining to
- * find it or first greater
- */
-
- for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
- {
- res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
-
- if (res == 0)
- return true;
+ res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
- if (res < 0)
- {
- (*off)--;
- return true;
- }
- }
+ if (res <= 0)
+ return true;
}
return false;
@@ -89,119 +47,87 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
* Stop* functions unlock buffer (but don't release!)
*/
static void
-startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall)
+startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
{
+ GinBtreeData btreeEntry;
+ GinBtreeStack *stackEntry;
+ Page page;
+ bool needUnlock = TRUE;
+
if (entry->master != NULL)
{
entry->isFinished = entry->master->isFinished;
return;
}
- if (firstCall)
+ /*
+ * we should find entry, and begin scan of posting tree
+ * or just store posting list in memory
+ */
+
+ prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
+ btreeEntry.searchMode = TRUE;
+ stackEntry = ginFindLeafPage(&btreeEntry, NULL);
+ page = BufferGetPage(stackEntry->buffer);
+
+ entry->isFinished = TRUE;
+ entry->buffer = InvalidBuffer;
+ entry->offset = InvalidOffsetNumber;
+ entry->list = NULL;
+ entry->nlist = 0;
+ entry->reduceResult = FALSE;
+ entry->predictNumberResult = 0;
+
+ if (btreeEntry.findItem(&btreeEntry, stackEntry))
{
- /*
- * at first call we should find entry, and begin scan of posting tree
- * or just store posting list in memory
- */
- GinBtreeData btreeEntry;
- GinBtreeStack *stackEntry;
- Page page;
- bool needUnlock = TRUE;
-
- prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
- btreeEntry.searchMode = TRUE;
- stackEntry = ginFindLeafPage(&btreeEntry, NULL);
- page = BufferGetPage(stackEntry->buffer);
-
- entry->isFinished = TRUE;
- entry->buffer = InvalidBuffer;
- entry->offset = InvalidOffsetNumber;
- entry->list = NULL;
- entry->nlist = 0;
- entry->reduceResult = FALSE;
- entry->predictNumberResult = 0;
-
- if (btreeEntry.findItem(&btreeEntry, stackEntry))
- {
- IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
-
- if (GinIsPostingTree(itup))
- {
- BlockNumber rootPostingTree = GinGetPostingTree(itup);
- GinPostingTreeScan *gdi;
- Page page;
-
- LockBuffer(stackEntry->buffer, GIN_UNLOCK);
- needUnlock = FALSE;
- gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
-
- entry->buffer = scanBeginPostingTree(gdi);
- IncrBufferRefCount(entry->buffer);
-
- page = BufferGetPage(entry->buffer);
- entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
+ IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
- freeGinBtreeStack(gdi->stack);
- pfree(gdi);
- entry->isFinished = FALSE;
- }
- else if (GinGetNPosting(itup) > 0)
- {
- entry->nlist = GinGetNPosting(itup);
- entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
- memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
- entry->isFinished = FALSE;
- }
- }
+ if (GinIsPostingTree(itup))
+ {
+ BlockNumber rootPostingTree = GinGetPostingTree(itup);
+ GinPostingTreeScan *gdi;
+ Page page;
- if (needUnlock)
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
- freeGinBtreeStack(stackEntry);
- }
- else if (entry->buffer != InvalidBuffer)
- {
- /* we should find place where we was stopped */
- BlockNumber blkno;
- Page page;
-
- LockBuffer(entry->buffer, GIN_SHARE);
+ needUnlock = FALSE;
+ gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
- if (!ItemPointerIsValid(&entry->curItem))
- /* start position */
- return;
- Assert(entry->offset != InvalidOffsetNumber);
+ entry->buffer = scanBeginPostingTree(gdi);
+ /*
+ * We keep buffer pinned because we need to prevent deletition
+ * page during scan. See GIN's vacuum implementation. RefCount
+ * is increased to keep buffer pinned after freeGinBtreeStack() call.
+ */
+ IncrBufferRefCount(entry->buffer);
- page = BufferGetPage(entry->buffer);
+ page = BufferGetPage(entry->buffer);
+ entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
- /* try to find curItem in current buffer */
- if (findItemInPage(page, &entry->curItem, &entry->offset))
- return;
+ /*
+ * Keep page content in memory to prevent durable page locking
+ */
+ entry->list = (ItemPointerData *) palloc( BLCKSZ );
+ entry->nlist = GinPageGetOpaque(page)->maxoff;
+ memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
+ GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
- /* walk to right */
- while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber)
- {
LockBuffer(entry->buffer, GIN_UNLOCK);
- entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
- LockBuffer(entry->buffer, GIN_SHARE);
- page = BufferGetPage(entry->buffer);
-
- entry->offset = InvalidOffsetNumber;
- if (findItemInPage(page, &entry->curItem, &entry->offset))
- return;
+ freeGinBtreeStack(gdi->stack);
+ pfree(gdi);
+ entry->isFinished = FALSE;
+ }
+ else if (GinGetNPosting(itup) > 0)
+ {
+ entry->nlist = GinGetNPosting(itup);
+ entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
+ memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
+ entry->isFinished = FALSE;
}
-
- /*
- * curItem and any greated items was deleted by concurrent vacuum, so
- * we finished scan with currrent entry
- */
}
-}
-static void
-stopScanEntry(GinScanEntry entry)
-{
- if (entry->buffer != InvalidBuffer)
- LockBuffer(entry->buffer, GIN_UNLOCK);
+ if (needUnlock)
+ LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+ freeGinBtreeStack(stackEntry);
}
static void
@@ -209,48 +135,39 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
{
uint32 i;
+ if (!key->firstCall)
+ return;
+
for (i = 0; i < key->nentries; i++)
- startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall);
+ startScanEntry(index, ginstate, key->scanEntry + i);
- if (key->firstCall)
- {
- memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
- key->isFinished = FALSE;
- key->firstCall = FALSE;
+ memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
+ key->isFinished = FALSE;
+ key->firstCall = FALSE;
- if (GinFuzzySearchLimit > 0)
- {
- /*
- * If all of keys more than threshold we will try to reduce
- * result, we hope (and only hope, for intersection operation of
- * array our supposition isn't true), that total result will not
- * more than minimal predictNumberResult.
- */
+ if (GinFuzzySearchLimit > 0)
+ {
+ /*
+ * If all of keys more than threshold we will try to reduce
+ * result, we hope (and only hope, for intersection operation of
+ * array our supposition isn't true), that total result will not
+ * more than minimal predictNumberResult.
+ */
- for (i = 0; i < key->nentries; i++)
- if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
- return;
+ for (i = 0; i < key->nentries; i++)
+ if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
+ return;
- for (i = 0; i < key->nentries; i++)
- if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
- {
- key->scanEntry[i].predictNumberResult /= key->nentries;
- key->scanEntry[i].reduceResult = TRUE;
- }
- }
+ for (i = 0; i < key->nentries; i++)
+ if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
+ {
+ key->scanEntry[i].predictNumberResult /= key->nentries;
+ key->scanEntry[i].reduceResult = TRUE;
+ }
}
}
static void
-stopScanKey(GinScanKey key)
-{
- uint32 i;
-
- for (i = 0; i < key->nentries; i++)
- stopScanEntry(key->scanEntry + i);
-}
-
-static void
startScan(IndexScanDesc scan)
{
uint32 i;
@@ -260,44 +177,82 @@ startScan(IndexScanDesc scan)
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
}
-static void
-stopScan(IndexScanDesc scan)
-{
- uint32 i;
- GinScanOpaque so = (GinScanOpaque) scan->opaque;
-
- for (i = 0; i < so->nkeys; i++)
- stopScanKey(so->keys + i);
-}
-
-
+/*
+ * Gets next ItemPointer from PostingTree. Note, that we copy
+ * page into GinScanEntry->list array and unlock page, but keep it pinned
+ * to prevent interference with vacuum
+ */
static void
entryGetNextItem(Relation index, GinScanEntry entry)
{
- Page page = BufferGetPage(entry->buffer);
+ Page page;
+ BlockNumber blkno;
- entry->offset++;
- if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
+ for(;;)
{
- entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset);
- }
- else
- {
- BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+ entry->offset++;
- LockBuffer(entry->buffer, GIN_UNLOCK);
- if (blkno == InvalidBlockNumber)
+ if (entry->offset <= entry->nlist)
{
- ReleaseBuffer(entry->buffer);
- entry->buffer = InvalidBuffer;
- entry->isFinished = TRUE;
+ entry->curItem = entry->list[entry->offset - 1];
+ return;
}
- else
+
+ LockBuffer(entry->buffer, GIN_SHARE);
+ page = BufferGetPage(entry->buffer);
+ for(;;)
{
+ /*
+ * It's needed to go by right link. During that we should refind
+ * first ItemPointer greater that stored
+ */
+
+ blkno = GinPageGetOpaque(page)->rightlink;
+
+ LockBuffer(entry->buffer, GIN_UNLOCK);
+ if (blkno == InvalidBlockNumber)
+ {
+ ReleaseBuffer(entry->buffer);
+ ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
+ entry->buffer = InvalidBuffer;
+ entry->isFinished = TRUE;
+ return;
+ }
+
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
LockBuffer(entry->buffer, GIN_SHARE);
+ page = BufferGetPage(entry->buffer);
+
entry->offset = InvalidOffsetNumber;
- entryGetNextItem(index, entry);
+ if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset))
+ {
+ /*
+ * Found position equal to or greater than stored
+ */
+ entry->nlist = GinPageGetOpaque(page)->maxoff;
+ memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
+ GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
+
+ LockBuffer(entry->buffer, GIN_UNLOCK);
+
+ if ( !ItemPointerIsValid(&entry->curItem) ||
+ compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
+ {
+ /*
+ * First pages are deleted or empty, or we found exact position,
+ * so break inner loop and continue outer one.
+ */
+
+ break;
+ }
+
+ /*
+ * Find greater than entry->curItem position, store it.
+ */
+ entry->curItem = entry->list[entry->offset - 1];
+
+ return;
+ }
}
}
}
@@ -317,7 +272,7 @@ entryGetItem(Relation index, GinScanEntry entry)
entry->isFinished = entry->master->isFinished;
entry->curItem = entry->master->curItem;
}
- else if (entry->list)
+ else if (!BufferIsValid(entry->buffer))
{
entry->offset++;
if (entry->offset <= entry->nlist)
@@ -501,8 +456,6 @@ gingetmulti(PG_FUNCTION_ARGS)
break;
} while (*returned_tids < max_tids);
- stopScan(scan);
-
PG_RETURN_BOOL(*returned_tids == max_tids);
}
@@ -524,7 +477,6 @@ gingettuple(PG_FUNCTION_ARGS)
startScan(scan);
res = scanGetItem(scan, &scan->xs_ctup.t_self);
- stopScan(scan);
PG_RETURN_BOOL(res);
}
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index d27ba66a597..8e3a25e9d43 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -4,7 +4,7 @@
*
* Copyright (c) 2006-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.16 2008/01/01 19:45:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.16.2.1 2008/04/22 17:54:19 teodor Exp $
*--------------------------------------------------------------------------
*/
@@ -356,14 +356,16 @@ typedef struct GinScanEntryData
/* entry, got from extractQueryFn */
Datum entry;
- /* current ItemPointer to heap, its offset in buffer and buffer */
- ItemPointerData curItem;
- OffsetNumber offset;
+ /* Current page in posting tree */
Buffer buffer;
- /* in case of Posing list */
+ /* current ItemPointer to heap */
+ ItemPointerData curItem;
+
+ /* used for Posting list and one page in Posting tree */
ItemPointerData *list;
- uint32 nlist;
+ uint32 nlist;
+ OffsetNumber offset;
bool isFinished;
bool reduceResult;