aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/hash
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/hash')
-rw-r--r--src/backend/access/hash/hash.c767
-rw-r--r--src/backend/access/hash/hashfunc.c411
-rw-r--r--src/backend/access/hash/hashinsert.c386
-rw-r--r--src/backend/access/hash/hashovfl.c1065
-rw-r--r--src/backend/access/hash/hashpage.c1107
-rw-r--r--src/backend/access/hash/hashscan.c229
-rw-r--r--src/backend/access/hash/hashsearch.c758
-rw-r--r--src/backend/access/hash/hashstrat.c69
-rw-r--r--src/backend/access/hash/hashutil.c161
9 files changed, 2570 insertions, 2383 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 89f81fc56a5..e13539c4ad9 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -1,16 +1,16 @@
/*-------------------------------------------------------------------------
*
* hash.c--
- * Implementation of Margo Seltzer's Hashing package for postgres.
+ * Implementation of Margo Seltzer's Hashing package for postgres.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.12 1997/01/10 09:46:13 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.13 1997/09/07 04:37:49 momjian Exp $
*
* NOTES
- * This file contains only the public interface routines.
+ * This file contains only the public interface routines.
*
*-------------------------------------------------------------------------
*/
@@ -26,452 +26,483 @@
#include <miscadmin.h>
#ifndef HAVE_MEMMOVE
-# include <regex/utils.h>
+#include <regex/utils.h>
#else
-# include <string.h>
+#include <string.h>
#endif
-bool BuildingHash = false;
+bool BuildingHash = false;
/*
- * hashbuild() -- build a new hash index.
+ * hashbuild() -- build a new hash index.
*
- * We use a global variable to record the fact that we're creating
- * a new index. This is used to avoid high-concurrency locking,
- * since the index won't be visible until this transaction commits
- * and since building is guaranteed to be single-threaded.
+ * We use a global variable to record the fact that we're creating
+ * a new index. This is used to avoid high-concurrency locking,
+ * since the index won't be visible until this transaction commits
+ * and since building is guaranteed to be single-threaded.
*/
void
hashbuild(Relation heap,
- Relation index,
- int natts,
- AttrNumber *attnum,
- IndexStrategy istrat,
- uint16 pcount,
- Datum *params,
- FuncIndexInfo *finfo,
- PredInfo *predInfo)
+ Relation index,
+ int natts,
+ AttrNumber * attnum,
+ IndexStrategy istrat,
+ uint16 pcount,
+ Datum * params,
+ FuncIndexInfo * finfo,
+ PredInfo * predInfo)
{
- HeapScanDesc hscan;
- Buffer buffer;
- HeapTuple htup;
- IndexTuple itup;
- TupleDesc htupdesc, itupdesc;
- Datum *attdata;
- bool *nulls;
- InsertIndexResult res;
- int nhtups, nitups;
- int i;
- HashItem hitem;
+ HeapScanDesc hscan;
+ Buffer buffer;
+ HeapTuple htup;
+ IndexTuple itup;
+ TupleDesc htupdesc,
+ itupdesc;
+ Datum *attdata;
+ bool *nulls;
+ InsertIndexResult res;
+ int nhtups,
+ nitups;
+ int i;
+ HashItem hitem;
+
#ifndef OMIT_PARTIAL_INDEX
- ExprContext *econtext;
- TupleTable tupleTable;
- TupleTableSlot *slot;
+ ExprContext *econtext;
+ TupleTable tupleTable;
+ TupleTableSlot *slot;
+
#endif
- Oid hrelid, irelid;
- Node *pred, *oldPred;
-
- /* note that this is a new btree */
- BuildingHash = true;
-
- pred = predInfo->pred;
- oldPred = predInfo->oldPred;
-
- /* initialize the hash index metadata page (if this is a new index) */
- if (oldPred == NULL)
- _hash_metapinit(index);
-
- /* get tuple descriptors for heap and index relations */
- htupdesc = RelationGetTupleDescriptor(heap);
- itupdesc = RelationGetTupleDescriptor(index);
-
- /* get space for data items that'll appear in the index tuple */
- attdata = (Datum *) palloc(natts * sizeof(Datum));
- nulls = (bool *) palloc(natts * sizeof(bool));
-
- /*
- * If this is a predicate (partial) index, we will need to evaluate the
- * predicate using ExecQual, which requires the current tuple to be in a
- * slot of a TupleTable. In addition, ExecQual must have an ExprContext
- * referring to that slot. Here, we initialize dummy TupleTable and
- * ExprContext objects for this purpose. --Nels, Feb '92
- */
+ Oid hrelid,
+ irelid;
+ Node *pred,
+ *oldPred;
+
+ /* note that this is a new btree */
+ BuildingHash = true;
+
+ pred = predInfo->pred;
+ oldPred = predInfo->oldPred;
+
+ /* initialize the hash index metadata page (if this is a new index) */
+ if (oldPred == NULL)
+ _hash_metapinit(index);
+
+ /* get tuple descriptors for heap and index relations */
+ htupdesc = RelationGetTupleDescriptor(heap);
+ itupdesc = RelationGetTupleDescriptor(index);
+
+ /* get space for data items that'll appear in the index tuple */
+ attdata = (Datum *) palloc(natts * sizeof(Datum));
+ nulls = (bool *) palloc(natts * sizeof(bool));
+
+ /*
+ * If this is a predicate (partial) index, we will need to evaluate
+ * the predicate using ExecQual, which requires the current tuple to
+ * be in a slot of a TupleTable. In addition, ExecQual must have an
+ * ExprContext referring to that slot. Here, we initialize dummy
+ * TupleTable and ExprContext objects for this purpose. --Nels, Feb
+ * '92
+ */
#ifndef OMIT_PARTIAL_INDEX
- if (pred != NULL || oldPred != NULL) {
- tupleTable = ExecCreateTupleTable(1);
- slot = ExecAllocTableSlot(tupleTable);
- econtext = makeNode(ExprContext);
- FillDummyExprContext(econtext, slot, htupdesc, buffer);
- }
- else /* quiet the compiler */
+ if (pred != NULL || oldPred != NULL)
+ {
+ tupleTable = ExecCreateTupleTable(1);
+ slot = ExecAllocTableSlot(tupleTable);
+ econtext = makeNode(ExprContext);
+ FillDummyExprContext(econtext, slot, htupdesc, buffer);
+ }
+ else
+/* quiet the compiler */
{
econtext = NULL;
tupleTable = 0;
slot = 0;
}
-#endif /* OMIT_PARTIAL_INDEX */
-
- /* start a heap scan */
- hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
- htup = heap_getnext(hscan, 0, &buffer);
-
- /* build the index */
- nhtups = nitups = 0;
-
- for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
-
- nhtups++;
-
- /*
- * If oldPred != NULL, this is an EXTEND INDEX command, so skip
- * this tuple if it was already in the existing partial index
- */
- if (oldPred != NULL) {
- /*SetSlotContents(slot, htup); */
+#endif /* OMIT_PARTIAL_INDEX */
+
+ /* start a heap scan */
+ hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
+ htup = heap_getnext(hscan, 0, &buffer);
+
+ /* build the index */
+ nhtups = nitups = 0;
+
+ for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer))
+ {
+
+ nhtups++;
+
+ /*
+ * If oldPred != NULL, this is an EXTEND INDEX command, so skip
+ * this tuple if it was already in the existing partial index
+ */
+ if (oldPred != NULL)
+ {
+ /* SetSlotContents(slot, htup); */
#ifndef OMIT_PARTIAL_INDEX
- slot->val = htup;
- if (ExecQual((List*)oldPred, econtext) == true) {
+ slot->val = htup;
+ if (ExecQual((List *) oldPred, econtext) == true)
+ {
+ nitups++;
+ continue;
+ }
+#endif /* OMIT_PARTIAL_INDEX */
+ }
+
+ /*
+ * Skip this tuple if it doesn't satisfy the partial-index
+ * predicate
+ */
+ if (pred != NULL)
+ {
+#ifndef OMIT_PARTIAL_INDEX
+ /* SetSlotContents(slot, htup); */
+ slot->val = htup;
+ if (ExecQual((List *) pred, econtext) == false)
+ continue;
+#endif /* OMIT_PARTIAL_INDEX */
+ }
+
nitups++;
- continue;
- }
-#endif /* OMIT_PARTIAL_INDEX */
+
+ /*
+ * For the current heap tuple, extract all the attributes we use
+ * in this index, and note which are null.
+ */
+ for (i = 1; i <= natts; i++)
+ {
+ int attoff;
+ bool attnull;
+
+ /*
+ * Offsets are from the start of the tuple, and are
+ * zero-based; indices are one-based. The next call returns i
+ * - 1. That's data hiding for you.
+ */
+
+ /* attoff = i - 1 */
+ attoff = AttrNumberGetAttrOffset(i);
+
+ /*
+ * below, attdata[attoff] set to equal some datum & attnull is
+ * changed to indicate whether or not the attribute is null
+ * for this tuple
+ */
+ attdata[attoff] = GetIndexValue(htup,
+ htupdesc,
+ attoff,
+ attnum,
+ finfo,
+ &attnull,
+ buffer);
+ nulls[attoff] = (attnull ? 'n' : ' ');
+ }
+
+ /* form an index tuple and point it at the heap tuple */
+ itup = index_formtuple(itupdesc, attdata, nulls);
+
+ /*
+ * If the single index key is null, we don't insert it into the
+ * index. Hash tables support scans on '='. Relational algebra
+ * says that A = B returns null if either A or B is null. This
+ * means that no qualification used in an index scan could ever
+ * return true on a null attribute. It also means that indices
+ * can't be used by ISNULL or NOTNULL scans, but that's an
+ * artifact of the strategy map architecture chosen in 1986, not
+ * of the way nulls are handled here.
+ */
+
+ if (itup->t_info & INDEX_NULL_MASK)
+ {
+ pfree(itup);
+ continue;
+ }
+
+ itup->t_tid = htup->t_ctid;
+ hitem = _hash_formitem(itup);
+ res = _hash_doinsert(index, hitem);
+ pfree(hitem);
+ pfree(itup);
+ pfree(res);
}
-
- /* Skip this tuple if it doesn't satisfy the partial-index predicate */
- if (pred != NULL) {
+
+ /* okay, all heap tuples are indexed */
+ heap_endscan(hscan);
+
+ if (pred != NULL || oldPred != NULL)
+ {
#ifndef OMIT_PARTIAL_INDEX
- /*SetSlotContents(slot, htup); */
- slot->val = htup;
- if (ExecQual((List*)pred, econtext) == false)
- continue;
-#endif /* OMIT_PARTIAL_INDEX */
-}
-
- nitups++;
-
- /*
- * For the current heap tuple, extract all the attributes
- * we use in this index, and note which are null.
- */
- for (i = 1; i <= natts; i++) {
- int attoff;
- bool attnull;
-
- /*
- * Offsets are from the start of the tuple, and are
- * zero-based; indices are one-based. The next call
- * returns i - 1. That's data hiding for you.
- */
-
- /* attoff = i - 1 */
- attoff = AttrNumberGetAttrOffset(i);
-
- /* below, attdata[attoff] set to equal some datum &
- * attnull is changed to indicate whether or not the attribute
- * is null for this tuple
- */
- attdata[attoff] = GetIndexValue(htup,
- htupdesc,
- attoff,
- attnum,
- finfo,
- &attnull,
- buffer);
- nulls[attoff] = (attnull ? 'n' : ' ');
+ ExecDestroyTupleTable(tupleTable, true);
+ pfree(econtext);
+#endif /* OMIT_PARTIAL_INDEX */
}
-
- /* form an index tuple and point it at the heap tuple */
- itup = index_formtuple(itupdesc, attdata, nulls);
-
+
/*
- * If the single index key is null, we don't insert it into
- * the index. Hash tables support scans on '='.
- * Relational algebra says that A = B
- * returns null if either A or B is null. This
- * means that no qualification used in an index scan could ever
- * return true on a null attribute. It also means that indices
- * can't be used by ISNULL or NOTNULL scans, but that's an
- * artifact of the strategy map architecture chosen in 1986, not
- * of the way nulls are handled here.
+ * Since we just counted the tuples in the heap, we update its stats
+ * in pg_class to guarantee that the planner takes advantage of the
+ * index we just created. Finally, only update statistics during
+ * normal index definitions, not for indices on system catalogs
+ * created during bootstrap processing. We must close the relations
+ * before updatings statistics to guarantee that the relcache entries
+ * are flushed when we increment the command counter in UpdateStats().
*/
-
- if (itup->t_info & INDEX_NULL_MASK) {
- pfree(itup);
- continue;
- }
-
- itup->t_tid = htup->t_ctid;
- hitem = _hash_formitem(itup);
- res = _hash_doinsert(index, hitem);
- pfree(hitem);
- pfree(itup);
- pfree(res);
- }
-
- /* okay, all heap tuples are indexed */
- heap_endscan(hscan);
-
- if (pred != NULL || oldPred != NULL) {
-#ifndef OMIT_PARTIAL_INDEX
- ExecDestroyTupleTable(tupleTable, true);
- pfree(econtext);
-#endif /* OMIT_PARTIAL_INDEX */
- }
-
- /*
- * Since we just counted the tuples in the heap, we update its
- * stats in pg_class to guarantee that the planner takes advantage
- * of the index we just created. Finally, only update statistics
- * during normal index definitions, not for indices on system catalogs
- * created during bootstrap processing. We must close the relations
- * before updatings statistics to guarantee that the relcache entries
- * are flushed when we increment the command counter in UpdateStats().
- */
- if (IsNormalProcessingMode())
+ if (IsNormalProcessingMode())
{
- hrelid = heap->rd_id;
- irelid = index->rd_id;
- heap_close(heap);
- index_close(index);
- UpdateStats(hrelid, nhtups, true);
- UpdateStats(irelid, nitups, false);
- if (oldPred != NULL) {
- if (nitups == nhtups) pred = NULL;
- UpdateIndexPredicate(irelid, oldPred, pred);
- }
+ hrelid = heap->rd_id;
+ irelid = index->rd_id;
+ heap_close(heap);
+ index_close(index);
+ UpdateStats(hrelid, nhtups, true);
+ UpdateStats(irelid, nitups, false);
+ if (oldPred != NULL)
+ {
+ if (nitups == nhtups)
+ pred = NULL;
+ UpdateIndexPredicate(irelid, oldPred, pred);
+ }
}
-
- /* be tidy */
- pfree(nulls);
- pfree(attdata);
-
- /* all done */
- BuildingHash = false;
+
+ /* be tidy */
+ pfree(nulls);
+ pfree(attdata);
+
+ /* all done */
+ BuildingHash = false;
}
/*
- * hashinsert() -- insert an index tuple into a hash table.
+ * hashinsert() -- insert an index tuple into a hash table.
*
- * Hash on the index tuple's key, find the appropriate location
- * for the new tuple, put it there, and return an InsertIndexResult
- * to the caller.
+ * Hash on the index tuple's key, find the appropriate location
+ * for the new tuple, put it there, and return an InsertIndexResult
+ * to the caller.
*/
InsertIndexResult
-hashinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation heapRel)
+hashinsert(Relation rel, Datum * datum, char *nulls, ItemPointer ht_ctid, Relation heapRel)
{
- HashItem hitem;
- IndexTuple itup;
- InsertIndexResult res;
-
-
- /* generate an index tuple */
- itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls);
- itup->t_tid = *ht_ctid;
-
- if (itup->t_info & INDEX_NULL_MASK)
- return ((InsertIndexResult) NULL);
-
- hitem = _hash_formitem(itup);
-
- res = _hash_doinsert(rel, hitem);
-
- pfree(hitem);
- pfree(itup);
-
- return (res);
+ HashItem hitem;
+ IndexTuple itup;
+ InsertIndexResult res;
+
+
+ /* generate an index tuple */
+ itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls);
+ itup->t_tid = *ht_ctid;
+
+ if (itup->t_info & INDEX_NULL_MASK)
+ return ((InsertIndexResult) NULL);
+
+ hitem = _hash_formitem(itup);
+
+ res = _hash_doinsert(rel, hitem);
+
+ pfree(hitem);
+ pfree(itup);
+
+ return (res);
}
/*
- * hashgettuple() -- Get the next tuple in the scan.
+ * hashgettuple() -- Get the next tuple in the scan.
*/
-char *
+char *
hashgettuple(IndexScanDesc scan, ScanDirection dir)
{
- RetrieveIndexResult res;
-
- /*
- * If we've already initialized this scan, we can just advance it
- * in the appropriate direction. If we haven't done so yet, we
- * call a routine to get the first item in the scan.
- */
-
- if (ItemPointerIsValid(&(scan->currentItemData)))
- res = _hash_next(scan, dir);
- else
- res = _hash_first(scan, dir);
-
- return ((char *) res);
+ RetrieveIndexResult res;
+
+ /*
+ * If we've already initialized this scan, we can just advance it in
+ * the appropriate direction. If we haven't done so yet, we call a
+ * routine to get the first item in the scan.
+ */
+
+ if (ItemPointerIsValid(&(scan->currentItemData)))
+ res = _hash_next(scan, dir);
+ else
+ res = _hash_first(scan, dir);
+
+ return ((char *) res);
}
/*
- * hashbeginscan() -- start a scan on a hash index
+ * hashbeginscan() -- start a scan on a hash index
*/
-char *
+char *
hashbeginscan(Relation rel,
- bool fromEnd,
- uint16 keysz,
- ScanKey scankey)
+ bool fromEnd,
+ uint16 keysz,
+ ScanKey scankey)
{
- IndexScanDesc scan;
- HashScanOpaque so;
-
- scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
- so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
- so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer;
- scan->opaque = so;
- scan->flags = 0x0;
-
- /* register scan in case we change pages it's using */
- _hash_regscan(scan);
-
- return ((char *) scan);
+ IndexScanDesc scan;
+ HashScanOpaque so;
+
+ scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
+ so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
+ so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer;
+ scan->opaque = so;
+ scan->flags = 0x0;
+
+ /* register scan in case we change pages it's using */
+ _hash_regscan(scan);
+
+ return ((char *) scan);
}
/*
- * hashrescan() -- rescan an index relation
+ * hashrescan() -- rescan an index relation
*/
void
hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
{
- ItemPointer iptr;
- HashScanOpaque so;
-
- so = (HashScanOpaque) scan->opaque;
-
- /* we hold a read lock on the current page in the scan */
- if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
- _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
- so->hashso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(iptr);
- }
- if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
- _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
- so->hashso_mrkbuf = InvalidBuffer;
- ItemPointerSetInvalid(iptr);
- }
-
- /* reset the scan key */
- if (scan->numberOfKeys > 0) {
- memmove(scan->keyData,
- scankey,
- scan->numberOfKeys * sizeof(ScanKeyData));
- }
+ ItemPointer iptr;
+ HashScanOpaque so;
+
+ so = (HashScanOpaque) scan->opaque;
+
+ /* we hold a read lock on the current page in the scan */
+ if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
+ {
+ _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+ so->hashso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+ if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
+ {
+ _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+ so->hashso_mrkbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* reset the scan key */
+ if (scan->numberOfKeys > 0)
+ {
+ memmove(scan->keyData,
+ scankey,
+ scan->numberOfKeys * sizeof(ScanKeyData));
+ }
}
/*
- * hashendscan() -- close down a scan
+ * hashendscan() -- close down a scan
*/
void
hashendscan(IndexScanDesc scan)
{
-
- ItemPointer iptr;
- HashScanOpaque so;
-
- so = (HashScanOpaque) scan->opaque;
-
- /* release any locks we still hold */
- if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
- _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
- so->hashso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(iptr);
- }
-
- if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
- if (BufferIsValid(so->hashso_mrkbuf))
- _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
- so->hashso_mrkbuf = InvalidBuffer;
- ItemPointerSetInvalid(iptr);
- }
-
- /* don't need scan registered anymore */
- _hash_dropscan(scan);
-
- /* be tidy */
- pfree (scan->opaque);
+
+ ItemPointer iptr;
+ HashScanOpaque so;
+
+ so = (HashScanOpaque) scan->opaque;
+
+ /* release any locks we still hold */
+ if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
+ {
+ _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+ so->hashso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
+ {
+ if (BufferIsValid(so->hashso_mrkbuf))
+ _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+ so->hashso_mrkbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* don't need scan registered anymore */
+ _hash_dropscan(scan);
+
+ /* be tidy */
+ pfree(scan->opaque);
}
/*
- * hashmarkpos() -- save current scan position
+ * hashmarkpos() -- save current scan position
*
*/
void
hashmarkpos(IndexScanDesc scan)
{
- ItemPointer iptr;
- HashScanOpaque so;
-
- /* see if we ever call this code. if we do, then so_mrkbuf a
- * useful element in the scan->opaque structure. if this procedure
- * is never called, so_mrkbuf should be removed from the scan->opaque
- * structure.
- */
- elog(NOTICE, "Hashmarkpos() called.");
-
- so = (HashScanOpaque) scan->opaque;
-
- /* release lock on old marked data, if any */
- if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
- _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
- so->hashso_mrkbuf = InvalidBuffer;
- ItemPointerSetInvalid(iptr);
- }
-
- /* bump lock on currentItemData and copy to currentMarkData */
- if (ItemPointerIsValid(&(scan->currentItemData))) {
- so->hashso_mrkbuf = _hash_getbuf(scan->relation,
- BufferGetBlockNumber(so->hashso_curbuf),
- HASH_READ);
- scan->currentMarkData = scan->currentItemData;
- }
+ ItemPointer iptr;
+ HashScanOpaque so;
+
+ /*
+ * see if we ever call this code. if we do, then so_mrkbuf a useful
+ * element in the scan->opaque structure. if this procedure is never
+ * called, so_mrkbuf should be removed from the scan->opaque
+ * structure.
+ */
+ elog(NOTICE, "Hashmarkpos() called.");
+
+ so = (HashScanOpaque) scan->opaque;
+
+ /* release lock on old marked data, if any */
+ if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
+ {
+ _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+ so->hashso_mrkbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* bump lock on currentItemData and copy to currentMarkData */
+ if (ItemPointerIsValid(&(scan->currentItemData)))
+ {
+ so->hashso_mrkbuf = _hash_getbuf(scan->relation,
+ BufferGetBlockNumber(so->hashso_curbuf),
+ HASH_READ);
+ scan->currentMarkData = scan->currentItemData;
+ }
}
/*
- * hashrestrpos() -- restore scan to last saved position
+ * hashrestrpos() -- restore scan to last saved position
*/
void
hashrestrpos(IndexScanDesc scan)
{
- ItemPointer iptr;
- HashScanOpaque so;
-
- /* see if we ever call this code. if we do, then so_mrkbuf a
- * useful element in the scan->opaque structure. if this procedure
- * is never called, so_mrkbuf should be removed from the scan->opaque
- * structure.
- */
- elog(NOTICE, "Hashrestrpos() called.");
-
- so = (HashScanOpaque) scan->opaque;
-
- /* release lock on current data, if any */
- if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
- _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
- so->hashso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(iptr);
- }
-
- /* bump lock on currentMarkData and copy to currentItemData */
- if (ItemPointerIsValid(&(scan->currentMarkData))) {
- so->hashso_curbuf =
- _hash_getbuf(scan->relation,
- BufferGetBlockNumber(so->hashso_mrkbuf),
- HASH_READ);
-
- scan->currentItemData = scan->currentMarkData;
- }
+ ItemPointer iptr;
+ HashScanOpaque so;
+
+ /*
+ * see if we ever call this code. if we do, then so_mrkbuf a useful
+ * element in the scan->opaque structure. if this procedure is never
+ * called, so_mrkbuf should be removed from the scan->opaque
+ * structure.
+ */
+ elog(NOTICE, "Hashrestrpos() called.");
+
+ so = (HashScanOpaque) scan->opaque;
+
+ /* release lock on current data, if any */
+ if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
+ {
+ _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+ so->hashso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* bump lock on currentMarkData and copy to currentItemData */
+ if (ItemPointerIsValid(&(scan->currentMarkData)))
+ {
+ so->hashso_curbuf =
+ _hash_getbuf(scan->relation,
+ BufferGetBlockNumber(so->hashso_mrkbuf),
+ HASH_READ);
+
+ scan->currentItemData = scan->currentMarkData;
+ }
}
/* stubs */
void
hashdelete(Relation rel, ItemPointer tid)
{
- /* adjust any active scans that will be affected by this deletion */
- _hash_adjscans(rel, tid);
-
- /* delete the data from the page */
- _hash_pagedel(rel, tid);
-}
+ /* adjust any active scans that will be affected by this deletion */
+ _hash_adjscans(rel, tid);
+ /* delete the data from the page */
+ _hash_pagedel(rel, tid);
+}
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 5862800b21d..a3cbaa1a94c 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -1,17 +1,17 @@
/*-------------------------------------------------------------------------
*
* hashfunc.c--
- * Comparison functions for hash access method.
+ * Comparison functions for hash access method.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.3 1996/11/10 02:57:40 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.4 1997/09/07 04:37:53 momjian Exp $
*
* NOTES
- * These functions are stored in pg_amproc. For each operator class
- * defined on hash tables, they compute the hash value of the argument.
+ * These functions are stored in pg_amproc. For each operator class
+ * defined on hash tables, they compute the hash value of the argument.
*
*-------------------------------------------------------------------------
*/
@@ -20,206 +20,223 @@
#include "access/hash.h"
-uint32 hashint2(int16 key)
+uint32
+hashint2(int16 key)
{
- return ((uint32) ~key);
+ return ((uint32) ~ key);
}
-uint32 hashint4(uint32 key)
+uint32
+hashint4(uint32 key)
{
- return (~key);
+ return (~key);
}
/* Hash function from Chris Torek. */
-uint32 hashfloat4(float32 keyp)
+uint32
+hashfloat4(float32 keyp)
{
- int len;
- int loop;
- uint32 h;
- char *kp = (char *) keyp;
+ int len;
+ int loop;
+ uint32 h;
+ char *kp = (char *) keyp;
- len = sizeof(float32data);
+ len = sizeof(float32data);
-#define HASH4a h = (h << 5) - h + *kp++;
-#define HASH4b h = (h << 5) + h + *kp++;
+#define HASH4a h = (h << 5) - h + *kp++;
+#define HASH4b h = (h << 5) + h + *kp++;
#define HASH4 HASH4b
- h = 0;
- if (len > 0) {
- loop = (len + 8 - 1) >> 3;
-
- switch (len & (8 - 1)) {
- case 0:
- do { /* All fall throughs */
- HASH4;
- case 7:
- HASH4;
- case 6:
- HASH4;
- case 5:
- HASH4;
- case 4:
- HASH4;
- case 3:
- HASH4;
- case 2:
- HASH4;
- case 1:
- HASH4;
- } while (--loop);
+ h = 0;
+ if (len > 0)
+ {
+ loop = (len + 8 - 1) >> 3;
+
+ switch (len & (8 - 1))
+ {
+ case 0:
+ do
+ { /* All fall throughs */
+ HASH4;
+ case 7:
+ HASH4;
+ case 6:
+ HASH4;
+ case 5:
+ HASH4;
+ case 4:
+ HASH4;
+ case 3:
+ HASH4;
+ case 2:
+ HASH4;
+ case 1:
+ HASH4;
+ } while (--loop);
+ }
}
- }
- return (h);
-}
+ return (h);
+}
-uint32 hashfloat8(float64 keyp)
+uint32
+hashfloat8(float64 keyp)
{
- int len;
- int loop;
- uint32 h;
- char *kp = (char *) keyp;
+ int len;
+ int loop;
+ uint32 h;
+ char *kp = (char *) keyp;
- len = sizeof(float64data);
+ len = sizeof(float64data);
-#define HASH4a h = (h << 5) - h + *kp++;
-#define HASH4b h = (h << 5) + h + *kp++;
+#define HASH4a h = (h << 5) - h + *kp++;
+#define HASH4b h = (h << 5) + h + *kp++;
#define HASH4 HASH4b
- h = 0;
- if (len > 0) {
- loop = (len + 8 - 1) >> 3;
-
- switch (len & (8 - 1)) {
- case 0:
- do { /* All fall throughs */
- HASH4;
- case 7:
- HASH4;
- case 6:
- HASH4;
- case 5:
- HASH4;
- case 4:
- HASH4;
- case 3:
- HASH4;
- case 2:
- HASH4;
- case 1:
- HASH4;
- } while (--loop);
+ h = 0;
+ if (len > 0)
+ {
+ loop = (len + 8 - 1) >> 3;
+
+ switch (len & (8 - 1))
+ {
+ case 0:
+ do
+ { /* All fall throughs */
+ HASH4;
+ case 7:
+ HASH4;
+ case 6:
+ HASH4;
+ case 5:
+ HASH4;
+ case 4:
+ HASH4;
+ case 3:
+ HASH4;
+ case 2:
+ HASH4;
+ case 1:
+ HASH4;
+ } while (--loop);
+ }
}
- }
- return (h);
-}
+ return (h);
+}
-uint32 hashoid(Oid key)
+uint32
+hashoid(Oid key)
{
- return ((uint32) ~key);
+ return ((uint32) ~ key);
}
-uint32 hashchar(char key)
+uint32
+hashchar(char key)
{
- int len;
- uint32 h;
+ int len;
+ uint32 h;
+
+ len = sizeof(char);
- len = sizeof(char);
+#define PRIME1 37
+#define PRIME2 1048583
-#define PRIME1 37
-#define PRIME2 1048583
+ h = 0;
+ /* Convert char to integer */
+ h = h * PRIME1 ^ (key - ' ');
+ h %= PRIME2;
- h = 0;
- /* Convert char to integer */
- h = h * PRIME1 ^ (key - ' ');
- h %= PRIME2;
-
- return (h);
+ return (h);
}
-uint32 hashchar2(uint16 intkey)
+uint32
+hashchar2(uint16 intkey)
{
- uint32 h;
- int len;
- char *key = (char *) &intkey;
-
- h = 0;
- len = sizeof(uint16);
- /* Convert string to integer */
- while (len--)
- h = h * PRIME1 ^ (*key++ - ' ');
- h %= PRIME2;
-
- return (h);
+ uint32 h;
+ int len;
+ char *key = (char *) &intkey;
+
+ h = 0;
+ len = sizeof(uint16);
+ /* Convert string to integer */
+ while (len--)
+ h = h * PRIME1 ^ (*key++ - ' ');
+ h %= PRIME2;
+
+ return (h);
}
-uint32 hashchar4(uint32 intkey)
+uint32
+hashchar4(uint32 intkey)
{
- uint32 h;
- int len;
- char *key = (char *) &intkey;
-
- h = 0;
- len = sizeof(uint32);
- /* Convert string to integer */
- while (len--)
- h = h * PRIME1 ^ (*key++ - ' ');
- h %= PRIME2;
-
- return (h);
+ uint32 h;
+ int len;
+ char *key = (char *) &intkey;
+
+ h = 0;
+ len = sizeof(uint32);
+ /* Convert string to integer */
+ while (len--)
+ h = h * PRIME1 ^ (*key++ - ' ');
+ h %= PRIME2;
+
+ return (h);
}
-uint32 hashchar8(char *key)
+uint32
+hashchar8(char *key)
{
- uint32 h;
- int len;
-
- h = 0;
- len = sizeof(char8);
- /* Convert string to integer */
- while (len--)
- h = h * PRIME1 ^ (*key++ - ' ');
- h %= PRIME2;
-
- return (h);
+ uint32 h;
+ int len;
+
+ h = 0;
+ len = sizeof(char8);
+ /* Convert string to integer */
+ while (len--)
+ h = h * PRIME1 ^ (*key++ - ' ');
+ h %= PRIME2;
+
+ return (h);
}
-uint32 hashname(NameData *n)
+uint32
+hashname(NameData * n)
{
- uint32 h;
- int len;
- char *key;
-
- key = n->data;
-
- h = 0;
- len = NAMEDATALEN;
- /* Convert string to integer */
- while (len--)
- h = h * PRIME1 ^ (*key++ - ' ');
- h %= PRIME2;
-
- return (h);
+ uint32 h;
+ int len;
+ char *key;
+
+ key = n->data;
+
+ h = 0;
+ len = NAMEDATALEN;
+ /* Convert string to integer */
+ while (len--)
+ h = h * PRIME1 ^ (*key++ - ' ');
+ h %= PRIME2;
+
+ return (h);
}
-uint32 hashchar16(char *key)
+uint32
+hashchar16(char *key)
{
- uint32 h;
- int len;
-
- h = 0;
- len = sizeof(char16);
- /* Convert string to integer */
- while (len--)
- h = h * PRIME1 ^ (*key++ - ' ');
- h %= PRIME2;
-
- return (h);
+ uint32 h;
+ int len;
+
+ h = 0;
+ len = sizeof(char16);
+ /* Convert string to integer */
+ while (len--)
+ h = h * PRIME1 ^ (*key++ - ' ');
+ h %= PRIME2;
+
+ return (h);
}
@@ -234,45 +251,49 @@ uint32 hashchar16(char *key)
*
* "OZ's original sdbm hash"
*/
-uint32 hashtext(struct varlena *key)
+uint32
+hashtext(struct varlena * key)
{
- int keylen;
- char *keydata;
- uint32 n;
- int loop;
-
- keydata = VARDATA(key);
- keylen = VARSIZE(key);
-
- /* keylen includes the four bytes in which string keylength is stored */
- keylen -= sizeof(VARSIZE(key));
-
-#define HASHC n = *keydata++ + 65599 * n
-
- n = 0;
- if (keylen > 0) {
- loop = (keylen + 8 - 1) >> 3;
-
- switch (keylen & (8 - 1)) {
- case 0:
- do { /* All fall throughs */
- HASHC;
- case 7:
- HASHC;
- case 6:
- HASHC;
- case 5:
- HASHC;
- case 4:
- HASHC;
- case 3:
- HASHC;
- case 2:
- HASHC;
- case 1:
- HASHC;
- } while (--loop);
+ int keylen;
+ char *keydata;
+ uint32 n;
+ int loop;
+
+ keydata = VARDATA(key);
+ keylen = VARSIZE(key);
+
+ /* keylen includes the four bytes in which string keylength is stored */
+ keylen -= sizeof(VARSIZE(key));
+
+#define HASHC n = *keydata++ + 65599 * n
+
+ n = 0;
+ if (keylen > 0)
+ {
+ loop = (keylen + 8 - 1) >> 3;
+
+ switch (keylen & (8 - 1))
+ {
+ case 0:
+ do
+ { /* All fall throughs */
+ HASHC;
+ case 7:
+ HASHC;
+ case 6:
+ HASHC;
+ case 5:
+ HASHC;
+ case 4:
+ HASHC;
+ case 3:
+ HASHC;
+ case 2:
+ HASHC;
+ case 1:
+ HASHC;
+ } while (--loop);
+ }
}
- }
- return (n);
-}
+ return (n);
+}
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index f1233c68b2d..4829093589a 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -1,19 +1,19 @@
/*-------------------------------------------------------------------------
*
* hashinsert.c--
- * Item insertion in hash tables for Postgres.
+ * Item insertion in hash tables for Postgres.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.8 1997/08/12 22:51:30 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.9 1997/09/07 04:37:56 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <postgres.h>
-
+
#include <access/hash.h>
#include <storage/bufmgr.h>
#include <utils/memutils.h>
@@ -22,211 +22,221 @@ static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, S
static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem);
/*
- * _hash_doinsert() -- Handle insertion of a single HashItem in the table.
+ * _hash_doinsert() -- Handle insertion of a single HashItem in the table.
*
- * This routine is called by the public interface routines, hashbuild
- * and hashinsert. By here, hashitem is filled in, and has a unique
- * (xid, seqno) pair. The datum to be used as a "key" is in the
- * hashitem.
+ * This routine is called by the public interface routines, hashbuild
+ * and hashinsert. By here, hashitem is filled in, and has a unique
+ * (xid, seqno) pair. The datum to be used as a "key" is in the
+ * hashitem.
*/
InsertIndexResult
_hash_doinsert(Relation rel, HashItem hitem)
{
- Buffer buf;
- Buffer metabuf;
- BlockNumber blkno;
- HashMetaPage metap;
- IndexTuple itup;
- InsertIndexResult res;
- ScanKey itup_scankey;
- int natts;
- Page page;
-
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- /* we need a scan key to do our search, so build one */
- itup = &(hitem->hash_itup);
- if ((natts = rel->rd_rel->relnatts) != 1)
- elog(WARN, "Hash indices valid for only one index key.");
- itup_scankey = _hash_mkscankey(rel, itup, metap);
-
- /*
- * find the first page in the bucket chain containing this key and
- * place it in buf. _hash_search obtains a read lock for us.
- */
- _hash_search(rel, natts, itup_scankey, &buf, metap);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE);
-
- /*
- * trade in our read lock for a write lock so that we can do the
- * insertion.
- */
- blkno = BufferGetBlockNumber(buf);
- _hash_relbuf(rel, buf, HASH_READ);
- buf = _hash_getbuf(rel, blkno, HASH_WRITE);
-
-
- /*
- * XXX btree comment (haven't decided what to do in hash): don't
- * think the bucket can be split while we're reading the metapage.
- *
- * If the page was split between the time that we surrendered our
- * read lock and acquired our write lock, then this page may no
- * longer be the right place for the key we want to insert.
- */
-
- /* do the insertion */
- res = _hash_insertonpg(rel, buf, natts, itup_scankey,
- hitem, metabuf);
-
- /* be tidy */
- _hash_freeskey(itup_scankey);
-
- return (res);
+ Buffer buf;
+ Buffer metabuf;
+ BlockNumber blkno;
+ HashMetaPage metap;
+ IndexTuple itup;
+ InsertIndexResult res;
+ ScanKey itup_scankey;
+ int natts;
+ Page page;
+
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ /* we need a scan key to do our search, so build one */
+ itup = &(hitem->hash_itup);
+ if ((natts = rel->rd_rel->relnatts) != 1)
+ elog(WARN, "Hash indices valid for only one index key.");
+ itup_scankey = _hash_mkscankey(rel, itup, metap);
+
+ /*
+ * find the first page in the bucket chain containing this key and
+ * place it in buf. _hash_search obtains a read lock for us.
+ */
+ _hash_search(rel, natts, itup_scankey, &buf, metap);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE);
+
+ /*
+ * trade in our read lock for a write lock so that we can do the
+ * insertion.
+ */
+ blkno = BufferGetBlockNumber(buf);
+ _hash_relbuf(rel, buf, HASH_READ);
+ buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+
+
+ /*
+ * XXX btree comment (haven't decided what to do in hash): don't think
+ * the bucket can be split while we're reading the metapage.
+ *
+ * If the page was split between the time that we surrendered our read
+ * lock and acquired our write lock, then this page may no longer be
+ * the right place for the key we want to insert.
+ */
+
+ /* do the insertion */
+ res = _hash_insertonpg(rel, buf, natts, itup_scankey,
+ hitem, metabuf);
+
+ /* be tidy */
+ _hash_freeskey(itup_scankey);
+
+ return (res);
}
/*
- * _hash_insertonpg() -- Insert a tuple on a particular page in the table.
+ * _hash_insertonpg() -- Insert a tuple on a particular page in the table.
*
- * This recursive procedure does the following things:
+ * This recursive procedure does the following things:
*
- * + if necessary, splits the target page.
- * + inserts the tuple.
+ * + if necessary, splits the target page.
+ * + inserts the tuple.
*
- * On entry, we must have the right buffer on which to do the
- * insertion, and the buffer must be pinned and locked. On return,
- * we will have dropped both the pin and the write lock on the buffer.
+ * On entry, we must have the right buffer on which to do the
+ * insertion, and the buffer must be pinned and locked. On return,
+ * we will have dropped both the pin and the write lock on the buffer.
*
*/
-static InsertIndexResult
+static InsertIndexResult
_hash_insertonpg(Relation rel,
- Buffer buf,
- int keysz,
- ScanKey scankey,
- HashItem hitem,
- Buffer metabuf)
+ Buffer buf,
+ int keysz,
+ ScanKey scankey,
+ HashItem hitem,
+ Buffer metabuf)
{
- InsertIndexResult res;
- Page page;
- BlockNumber itup_blkno;
- OffsetNumber itup_off;
- int itemsz;
- HashPageOpaque pageopaque;
- bool do_expand = false;
- Buffer ovflbuf;
- HashMetaPage metap;
- Bucket bucket;
-
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
- bucket = pageopaque->hasho_bucket;
-
- itemsz = IndexTupleDSize(hitem->hash_itup)
- + (sizeof(HashItemData) - sizeof(IndexTupleData));
- itemsz = DOUBLEALIGN(itemsz);
-
- while (PageGetFreeSpace(page) < itemsz) {
- /*
- * no space on this page; check for an overflow page
- */
- if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) {
- /*
- * ovfl page exists; go get it. if it doesn't have room,
- * we'll find out next pass through the loop test above.
- */
- ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
- HASH_WRITE);
- _hash_relbuf(rel, buf, HASH_WRITE);
- buf = ovflbuf;
- page = BufferGetPage(buf);
- } else {
- /*
- * we're at the end of the bucket chain and we haven't
- * found a page with enough room. allocate a new overflow
- * page.
- */
- do_expand = true;
- ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
- _hash_relbuf(rel, buf, HASH_WRITE);
- buf = ovflbuf;
- page = BufferGetPage(buf);
-
- if (PageGetFreeSpace(page) < itemsz) {
- /* it doesn't fit on an empty page -- give up */
- elog(WARN, "hash item too large");
- }
- }
- _hash_checkpage(page, LH_OVERFLOW_PAGE);
+ InsertIndexResult res;
+ Page page;
+ BlockNumber itup_blkno;
+ OffsetNumber itup_off;
+ int itemsz;
+ HashPageOpaque pageopaque;
+ bool do_expand = false;
+ Buffer ovflbuf;
+ HashMetaPage metap;
+ Bucket bucket;
+
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
- Assert(pageopaque->hasho_bucket == bucket);
- }
-
- itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
- itup_blkno = BufferGetBlockNumber(buf);
-
- /* by here, the new tuple is inserted */
- res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
-
- ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
-
- if (res != NULL) {
- /*
- * Increment the number of keys in the table.
- * We switch lock access type just for a moment
- * to allow greater accessibility to the metapage.
- */
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
- HASH_READ, HASH_WRITE);
- metap->hashm_nkeys += 1;
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
- HASH_WRITE, HASH_READ);
-
- }
-
- _hash_wrtbuf(rel, buf);
-
- if (do_expand ||
- (metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
- > metap->hashm_ffactor) {
- _hash_expandtable(rel, metabuf);
- }
- _hash_relbuf(rel, metabuf, HASH_READ);
- return (res);
-}
+ bucket = pageopaque->hasho_bucket;
+
+ itemsz = IndexTupleDSize(hitem->hash_itup)
+ + (sizeof(HashItemData) - sizeof(IndexTupleData));
+ itemsz = DOUBLEALIGN(itemsz);
+
+ while (PageGetFreeSpace(page) < itemsz)
+ {
+
+ /*
+ * no space on this page; check for an overflow page
+ */
+ if (BlockNumberIsValid(pageopaque->hasho_nextblkno))
+ {
+
+ /*
+ * ovfl page exists; go get it. if it doesn't have room,
+ * we'll find out next pass through the loop test above.
+ */
+ ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
+ HASH_WRITE);
+ _hash_relbuf(rel, buf, HASH_WRITE);
+ buf = ovflbuf;
+ page = BufferGetPage(buf);
+ }
+ else
+ {
+
+ /*
+ * we're at the end of the bucket chain and we haven't found a
+ * page with enough room. allocate a new overflow page.
+ */
+ do_expand = true;
+ ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
+ _hash_relbuf(rel, buf, HASH_WRITE);
+ buf = ovflbuf;
+ page = BufferGetPage(buf);
+
+ if (PageGetFreeSpace(page) < itemsz)
+ {
+ /* it doesn't fit on an empty page -- give up */
+ elog(WARN, "hash item too large");
+ }
+ }
+ _hash_checkpage(page, LH_OVERFLOW_PAGE);
+ pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ Assert(pageopaque->hasho_bucket == bucket);
+ }
+
+ itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
+ itup_blkno = BufferGetBlockNumber(buf);
+
+ /* by here, the new tuple is inserted */
+ res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+
+ ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
+
+ if (res != NULL)
+ {
+
+ /*
+ * Increment the number of keys in the table. We switch lock
+ * access type just for a moment to allow greater accessibility to
+ * the metapage.
+ */
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
+ HASH_READ, HASH_WRITE);
+ metap->hashm_nkeys += 1;
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
+ HASH_WRITE, HASH_READ);
+
+ }
+
+ _hash_wrtbuf(rel, buf);
+
+ if (do_expand ||
+ (metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
+ > metap->hashm_ffactor)
+ {
+ _hash_expandtable(rel, metabuf);
+ }
+ _hash_relbuf(rel, metabuf, HASH_READ);
+ return (res);
+}
/*
- * _hash_pgaddtup() -- add a tuple to a particular page in the index.
+ * _hash_pgaddtup() -- add a tuple to a particular page in the index.
*
- * This routine adds the tuple to the page as requested, and keeps the
- * write lock and reference associated with the page's buffer. It is
- * an error to call pgaddtup() without a write lock and reference.
+ * This routine adds the tuple to the page as requested, and keeps the
+ * write lock and reference associated with the page's buffer. It is
+ * an error to call pgaddtup() without a write lock and reference.
*/
-static OffsetNumber
+static OffsetNumber
_hash_pgaddtup(Relation rel,
- Buffer buf,
- int keysz,
- ScanKey itup_scankey,
- Size itemsize,
- HashItem hitem)
+ Buffer buf,
+ int keysz,
+ ScanKey itup_scankey,
+ Size itemsize,
+ HashItem hitem)
{
- OffsetNumber itup_off;
- Page page;
-
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
-
- itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
- PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED);
-
- /* write the buffer, but hold our lock */
- _hash_wrtnorelbuf(rel, buf);
-
- return (itup_off);
+ OffsetNumber itup_off;
+ Page page;
+
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+
+ itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
+ PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED);
+
+ /* write the buffer, but hold our lock */
+ _hash_wrtnorelbuf(rel, buf);
+
+ return (itup_off);
}
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index d976c4818c8..b6882d4d3e1 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -1,400 +1,423 @@
/*-------------------------------------------------------------------------
*
* hashovfl.c--
- * Overflow page management code for the Postgres hash access method
+ * Overflow page management code for the Postgres hash access method
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.9 1997/08/12 22:51:34 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.10 1997/09/07 04:37:57 momjian Exp $
*
* NOTES
- * Overflow pages look like ordinary relation pages.
+ * Overflow pages look like ordinary relation pages.
*
*-------------------------------------------------------------------------
*/
#include <postgres.h>
-
+
#include <access/hash.h>
#include <storage/bufmgr.h>
#include <utils/memutils.h>
#ifndef HAVE_MEMMOVE
-# include <regex/utils.h>
+#include <regex/utils.h>
#else
-# include <string.h>
+#include <string.h>
#endif
-static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp);
-static uint32 _hash_firstfreebit(uint32 map);
+static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer * metabufp);
+static uint32 _hash_firstfreebit(uint32 map);
/*
- * _hash_addovflpage
+ * _hash_addovflpage
+ *
+ * Add an overflow page to the page currently pointed to by the buffer
+ * argument 'buf'.
*
- * Add an overflow page to the page currently pointed to by the buffer
- * argument 'buf'.
+ * *Metabufp has a read lock upon entering the function; buf has a
+ * write lock.
*
- * *Metabufp has a read lock upon entering the function; buf has a
- * write lock.
- *
*/
Buffer
-_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf)
+_hash_addovflpage(Relation rel, Buffer * metabufp, Buffer buf)
{
-
- OverflowPageAddress oaddr;
- BlockNumber ovflblkno;
- Buffer ovflbuf;
- HashMetaPage metap;
- HashPageOpaque ovflopaque;
- HashPageOpaque pageopaque;
- Page page;
- Page ovflpage;
-
- /* this had better be the last page in a bucket chain */
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
- Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno));
-
- metap = (HashMetaPage) BufferGetPage(*metabufp);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- /* allocate an empty overflow page */
- oaddr = _hash_getovfladdr(rel, metabufp);
- if (oaddr == InvalidOvflAddress) {
- elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr.");
- }
- ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr)));
- Assert(BlockNumberIsValid(ovflblkno));
- ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE);
- Assert(BufferIsValid(ovflbuf));
- ovflpage = BufferGetPage(ovflbuf);
-
- /* initialize the new overflow page */
- _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
- ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
- ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
- ovflopaque->hasho_nextblkno = InvalidBlockNumber;
- ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
- ovflopaque->hasho_oaddr = oaddr;
- ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
- _hash_wrtnorelbuf(rel, ovflbuf);
-
- /* logically chain overflow page to previous page */
- pageopaque->hasho_nextblkno = ovflblkno;
- _hash_wrtnorelbuf(rel, buf);
- return (ovflbuf);
+
+ OverflowPageAddress oaddr;
+ BlockNumber ovflblkno;
+ Buffer ovflbuf;
+ HashMetaPage metap;
+ HashPageOpaque ovflopaque;
+ HashPageOpaque pageopaque;
+ Page page;
+ Page ovflpage;
+
+ /* this had better be the last page in a bucket chain */
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno));
+
+ metap = (HashMetaPage) BufferGetPage(*metabufp);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ /* allocate an empty overflow page */
+ oaddr = _hash_getovfladdr(rel, metabufp);
+ if (oaddr == InvalidOvflAddress)
+ {
+ elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr.");
+ }
+ ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr)));
+ Assert(BlockNumberIsValid(ovflblkno));
+ ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE);
+ Assert(BufferIsValid(ovflbuf));
+ ovflpage = BufferGetPage(ovflbuf);
+
+ /* initialize the new overflow page */
+ _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
+ ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
+ ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
+ ovflopaque->hasho_nextblkno = InvalidBlockNumber;
+ ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
+ ovflopaque->hasho_oaddr = oaddr;
+ ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
+ _hash_wrtnorelbuf(rel, ovflbuf);
+
+ /* logically chain overflow page to previous page */
+ pageopaque->hasho_nextblkno = ovflblkno;
+ _hash_wrtnorelbuf(rel, buf);
+ return (ovflbuf);
}
/*
- * _hash_getovfladdr()
+ * _hash_getovfladdr()
*
- * Find an available overflow page and return its address.
+ * Find an available overflow page and return its address.
*
- * When we enter this function, we have a read lock on *metabufp which
- * we change to a write lock immediately. Before exiting, the write lock
- * is exchanged for a read lock.
+ * When we enter this function, we have a read lock on *metabufp which
+ * we change to a write lock immediately. Before exiting, the write lock
+ * is exchanged for a read lock.
*
*/
-static OverflowPageAddress
-_hash_getovfladdr(Relation rel, Buffer *metabufp)
+static OverflowPageAddress
+_hash_getovfladdr(Relation rel, Buffer * metabufp)
{
- HashMetaPage metap;
- Buffer mapbuf = 0;
- BlockNumber blkno;
- PageOffset offset;
- OverflowPageAddress oaddr;
- SplitNumber splitnum;
- uint32 *freep = NULL;
- uint32 max_free;
- uint32 bit;
- uint32 first_page;
- uint32 free_bit;
- uint32 free_page;
- uint32 in_use_bits;
- uint32 i, j;
-
- metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
-
- splitnum = metap->OVFL_POINT;
- max_free = metap->SPARES[splitnum];
-
- free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT);
- free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
-
- /* Look through all the free maps to find the first free block */
- first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT);
- for ( i = first_page; i <= free_page; i++ ) {
- Page mappage;
-
- blkno = metap->hashm_mapp[i];
- mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
- mappage = BufferGetPage(mapbuf);
- _hash_checkpage(mappage, LH_BITMAP_PAGE);
- freep = HashPageGetBitmap(mappage);
- Assert(freep);
-
- if (i == free_page)
- in_use_bits = free_bit;
- else
- in_use_bits = BMPGSZ_BIT(metap) - 1;
-
- if (i == first_page) {
- bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
- j = bit / BITS_PER_MAP;
- bit = bit & ~(BITS_PER_MAP - 1);
- } else {
- bit = 0;
- j = 0;
+ HashMetaPage metap;
+ Buffer mapbuf = 0;
+ BlockNumber blkno;
+ PageOffset offset;
+ OverflowPageAddress oaddr;
+ SplitNumber splitnum;
+ uint32 *freep = NULL;
+ uint32 max_free;
+ uint32 bit;
+ uint32 first_page;
+ uint32 free_bit;
+ uint32 free_page;
+ uint32 in_use_bits;
+ uint32 i,
+ j;
+
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
+
+ splitnum = metap->OVFL_POINT;
+ max_free = metap->SPARES[splitnum];
+
+ free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT);
+ free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
+
+ /* Look through all the free maps to find the first free block */
+ first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT);
+ for (i = first_page; i <= free_page; i++)
+ {
+ Page mappage;
+
+ blkno = metap->hashm_mapp[i];
+ mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
+ mappage = BufferGetPage(mapbuf);
+ _hash_checkpage(mappage, LH_BITMAP_PAGE);
+ freep = HashPageGetBitmap(mappage);
+ Assert(freep);
+
+ if (i == free_page)
+ in_use_bits = free_bit;
+ else
+ in_use_bits = BMPGSZ_BIT(metap) - 1;
+
+ if (i == first_page)
+ {
+ bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
+ j = bit / BITS_PER_MAP;
+ bit = bit & ~(BITS_PER_MAP - 1);
+ }
+ else
+ {
+ bit = 0;
+ j = 0;
+ }
+ for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
+ if (freep[j] != ALL_SET)
+ goto found;
+ }
+
+ /* No Free Page Found - have to allocate a new page */
+ metap->LAST_FREED = metap->SPARES[splitnum];
+ metap->SPARES[splitnum]++;
+ offset = metap->SPARES[splitnum] -
+ (splitnum ? metap->SPARES[splitnum - 1] : 0);
+
+#define OVMSG "HASH: Out of overflow pages. Out of luck.\n"
+
+ if (offset > SPLITMASK)
+ {
+ if (++splitnum >= NCACHED)
+ {
+ elog(WARN, OVMSG);
+ }
+ metap->OVFL_POINT = splitnum;
+ metap->SPARES[splitnum] = metap->SPARES[splitnum - 1];
+ metap->SPARES[splitnum - 1]--;
+ offset = 0;
}
- for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
- if (freep[j] != ALL_SET)
- goto found;
- }
-
- /* No Free Page Found - have to allocate a new page */
- metap->LAST_FREED = metap->SPARES[splitnum];
- metap->SPARES[splitnum]++;
- offset = metap->SPARES[splitnum] -
- (splitnum ? metap->SPARES[splitnum - 1] : 0);
-
-#define OVMSG "HASH: Out of overflow pages. Out of luck.\n"
-
- if (offset > SPLITMASK) {
- if (++splitnum >= NCACHED) {
- elog(WARN, OVMSG);
+
+ /* Check if we need to allocate a new bitmap page */
+ if (free_bit == BMPGSZ_BIT(metap) - 1)
+ {
+ /* won't be needing old map page */
+
+ _hash_relbuf(rel, mapbuf, HASH_WRITE);
+
+ free_page++;
+ if (free_page >= NCACHED)
+ {
+ elog(WARN, OVMSG);
+ }
+
+ /*
+ * This is tricky. The 1 indicates that you want the new page
+ * allocated with 1 clear bit. Actually, you are going to
+ * allocate 2 pages from this map. The first is going to be the
+ * map page, the second is the overflow page we were looking for.
+ * The init_bitmap routine automatically, sets the first bit of
+ * itself to indicate that the bitmap itself is in use. We would
+ * explicitly set the second bit, but don't have to if we tell
+ * init_bitmap not to leave it clear in the first place.
+ */
+ if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
+ 1, free_page))
+ {
+ elog(WARN, "overflow_page: problem with _hash_initbitmap.");
+ }
+ metap->SPARES[splitnum]++;
+ offset++;
+ if (offset > SPLITMASK)
+ {
+ if (++splitnum >= NCACHED)
+ {
+ elog(WARN, OVMSG);
+ }
+ metap->OVFL_POINT = splitnum;
+ metap->SPARES[splitnum] = metap->SPARES[splitnum - 1];
+ metap->SPARES[splitnum - 1]--;
+ offset = 0;
+ }
}
- metap->OVFL_POINT = splitnum;
- metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
- metap->SPARES[splitnum-1]--;
- offset = 0;
- }
-
- /* Check if we need to allocate a new bitmap page */
- if (free_bit == BMPGSZ_BIT(metap) - 1) {
- /* won't be needing old map page */
-
- _hash_relbuf(rel, mapbuf, HASH_WRITE);
-
- free_page++;
- if (free_page >= NCACHED) {
- elog(WARN, OVMSG);
+ else
+ {
+
+ /*
+ * Free_bit addresses the last used bit. Bump it to address the
+ * first available bit.
+ */
+ free_bit++;
+ SETBIT(freep, free_bit);
+ _hash_wrtbuf(rel, mapbuf);
}
-
+
+ /* Calculate address of the new overflow page */
+ oaddr = OADDR_OF(splitnum, offset);
+ _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
+ return (oaddr);
+
+found:
+ bit = bit + _hash_firstfreebit(freep[j]);
+ SETBIT(freep, bit);
+ _hash_wrtbuf(rel, mapbuf);
+
/*
- * This is tricky. The 1 indicates that you want the new page
- * allocated with 1 clear bit. Actually, you are going to
- * allocate 2 pages from this map. The first is going to be
- * the map page, the second is the overflow page we were
- * looking for. The init_bitmap routine automatically, sets
- * the first bit of itself to indicate that the bitmap itself
- * is in use. We would explicitly set the second bit, but
- * don't have to if we tell init_bitmap not to leave it clear
- * in the first place.
+ * Bits are addressed starting with 0, but overflow pages are
+ * addressed beginning at 1. Bit is a bit addressnumber, so we need to
+ * increment it to convert it to a page number.
*/
- if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
- 1, free_page)) {
- elog(WARN, "overflow_page: problem with _hash_initbitmap.");
+
+ bit = 1 + bit + (i * BMPGSZ_BIT(metap));
+ if (bit >= metap->LAST_FREED)
+ {
+ metap->LAST_FREED = bit - 1;
}
- metap->SPARES[splitnum]++;
- offset++;
- if (offset > SPLITMASK) {
- if (++splitnum >= NCACHED) {
+
+ /* Calculate the split number for this page */
+ for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
+ ;
+ offset = (i ? bit - metap->SPARES[i - 1] : bit);
+ if (offset >= SPLITMASK)
+ {
elog(WARN, OVMSG);
- }
- metap->OVFL_POINT = splitnum;
- metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
- metap->SPARES[splitnum-1]--;
- offset = 0;
}
- } else {
-
- /*
- * Free_bit addresses the last used bit. Bump it to address
- * the first available bit.
- */
- free_bit++;
- SETBIT(freep, free_bit);
- _hash_wrtbuf(rel, mapbuf);
- }
-
- /* Calculate address of the new overflow page */
- oaddr = OADDR_OF(splitnum, offset);
- _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
- return (oaddr);
-
- found:
- bit = bit + _hash_firstfreebit(freep[j]);
- SETBIT(freep, bit);
- _hash_wrtbuf(rel, mapbuf);
-
- /*
- * Bits are addressed starting with 0, but overflow pages are addressed
- * beginning at 1. Bit is a bit addressnumber, so we need to increment
- * it to convert it to a page number.
- */
-
- bit = 1 + bit + (i * BMPGSZ_BIT(metap));
- if (bit >= metap->LAST_FREED) {
- metap->LAST_FREED = bit - 1;
- }
-
- /* Calculate the split number for this page */
- for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
- ;
- offset = (i ? bit - metap->SPARES[i - 1] : bit);
- if (offset >= SPLITMASK) {
- elog(WARN, OVMSG);
- }
-
- /* initialize this page */
- oaddr = OADDR_OF(i, offset);
- _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
- return (oaddr);
+
+ /* initialize this page */
+ oaddr = OADDR_OF(i, offset);
+ _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
+ return (oaddr);
}
/*
- * _hash_firstfreebit()
+ * _hash_firstfreebit()
+ *
+ * Return the first bit that is not set in the argument 'map'. This
+ * function is used to find an available overflow page within a
+ * splitnumber.
*
- * Return the first bit that is not set in the argument 'map'. This
- * function is used to find an available overflow page within a
- * splitnumber.
- *
*/
-static uint32
+static uint32
_hash_firstfreebit(uint32 map)
{
- uint32 i, mask;
-
- mask = 0x1;
- for (i = 0; i < BITS_PER_MAP; i++) {
- if (!(mask & map))
- return (i);
- mask = mask << 1;
- }
- return (i);
+ uint32 i,
+ mask;
+
+ mask = 0x1;
+ for (i = 0; i < BITS_PER_MAP; i++)
+ {
+ if (!(mask & map))
+ return (i);
+ mask = mask << 1;
+ }
+ return (i);
}
/*
- * _hash_freeovflpage() -
+ * _hash_freeovflpage() -
*
- * Mark this overflow page as free and return a buffer with
- * the page that follows it (which may be defined as
- * InvalidBuffer).
+ * Mark this overflow page as free and return a buffer with
+ * the page that follows it (which may be defined as
+ * InvalidBuffer).
*
*/
Buffer
_hash_freeovflpage(Relation rel, Buffer ovflbuf)
{
- HashMetaPage metap;
- Buffer metabuf;
- Buffer mapbuf;
- BlockNumber prevblkno;
- BlockNumber blkno;
- BlockNumber nextblkno;
- HashPageOpaque ovflopaque;
- Page ovflpage;
- Page mappage;
- OverflowPageAddress addr;
- SplitNumber splitnum;
- uint32 *freep;
- uint32 ovflpgno;
- int32 bitmappage, bitmapbit;
- Bucket bucket;
-
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- ovflpage = BufferGetPage(ovflbuf);
- _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE);
- ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
- addr = ovflopaque->hasho_oaddr;
- nextblkno = ovflopaque->hasho_nextblkno;
- prevblkno = ovflopaque->hasho_prevblkno;
- bucket = ovflopaque->hasho_bucket;
- memset(ovflpage, 0, BufferGetPageSize(ovflbuf));
- _hash_wrtbuf(rel, ovflbuf);
-
- /*
- * fix up the bucket chain. this is a doubly-linked list, so we
- * must fix up the bucket chain members behind and ahead of the
- * overflow page being deleted.
- *
- * XXX this should look like:
- * - lock prev/next
- * - modify/write prev/next (how to do write ordering with a
- * doubly-linked list?)
- * - unlock prev/next
- */
- if (BlockNumberIsValid(prevblkno)) {
- Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE);
- Page prevpage = BufferGetPage(prevbuf);
- HashPageOpaque prevopaque =
- (HashPageOpaque) PageGetSpecialPointer(prevpage);
-
- _hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- Assert(prevopaque->hasho_bucket == bucket);
- prevopaque->hasho_nextblkno = nextblkno;
- _hash_wrtbuf(rel, prevbuf);
- }
- if (BlockNumberIsValid(nextblkno)) {
- Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE);
- Page nextpage = BufferGetPage(nextbuf);
- HashPageOpaque nextopaque =
- (HashPageOpaque) PageGetSpecialPointer(nextpage);
-
- _hash_checkpage(nextpage, LH_OVERFLOW_PAGE);
- Assert(nextopaque->hasho_bucket == bucket);
- nextopaque->hasho_prevblkno = prevblkno;
- _hash_wrtbuf(rel, nextbuf);
- }
-
- /*
- * Fix up the overflow page bitmap that tracks this particular
- * overflow page. The bitmap can be found in the MetaPageData
- * array element hashm_mapp[bitmappage].
- */
- splitnum = (addr >> SPLITSHIFT);
- ovflpgno =
- (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
-
- if (ovflpgno < metap->LAST_FREED) {
- metap->LAST_FREED = ovflpgno;
- }
-
- bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT));
- bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
-
- blkno = metap->hashm_mapp[bitmappage];
- mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
- mappage = BufferGetPage(mapbuf);
- _hash_checkpage(mappage, LH_BITMAP_PAGE);
- freep = HashPageGetBitmap(mappage);
- CLRBIT(freep, bitmapbit);
- _hash_wrtbuf(rel, mapbuf);
-
- _hash_relbuf(rel, metabuf, HASH_WRITE);
-
- /*
- * now instantiate the page that replaced this one,
- * if it exists, and return that buffer with a write lock.
- */
- if (BlockNumberIsValid(nextblkno)) {
- return (_hash_getbuf(rel, nextblkno, HASH_WRITE));
- } else {
- return (InvalidBuffer);
- }
+ HashMetaPage metap;
+ Buffer metabuf;
+ Buffer mapbuf;
+ BlockNumber prevblkno;
+ BlockNumber blkno;
+ BlockNumber nextblkno;
+ HashPageOpaque ovflopaque;
+ Page ovflpage;
+ Page mappage;
+ OverflowPageAddress addr;
+ SplitNumber splitnum;
+ uint32 *freep;
+ uint32 ovflpgno;
+ int32 bitmappage,
+ bitmapbit;
+ Bucket bucket;
+
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ ovflpage = BufferGetPage(ovflbuf);
+ _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE);
+ ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
+ addr = ovflopaque->hasho_oaddr;
+ nextblkno = ovflopaque->hasho_nextblkno;
+ prevblkno = ovflopaque->hasho_prevblkno;
+ bucket = ovflopaque->hasho_bucket;
+ memset(ovflpage, 0, BufferGetPageSize(ovflbuf));
+ _hash_wrtbuf(rel, ovflbuf);
+
+ /*
+ * fix up the bucket chain. this is a doubly-linked list, so we must
+ * fix up the bucket chain members behind and ahead of the overflow
+ * page being deleted.
+ *
+ * XXX this should look like: - lock prev/next - modify/write prev/next
+ * (how to do write ordering with a doubly-linked list?) - unlock
+ * prev/next
+ */
+ if (BlockNumberIsValid(prevblkno))
+ {
+ Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE);
+ Page prevpage = BufferGetPage(prevbuf);
+ HashPageOpaque prevopaque =
+ (HashPageOpaque) PageGetSpecialPointer(prevpage);
+
+ _hash_checkpage(prevpage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ Assert(prevopaque->hasho_bucket == bucket);
+ prevopaque->hasho_nextblkno = nextblkno;
+ _hash_wrtbuf(rel, prevbuf);
+ }
+ if (BlockNumberIsValid(nextblkno))
+ {
+ Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE);
+ Page nextpage = BufferGetPage(nextbuf);
+ HashPageOpaque nextopaque =
+ (HashPageOpaque) PageGetSpecialPointer(nextpage);
+
+ _hash_checkpage(nextpage, LH_OVERFLOW_PAGE);
+ Assert(nextopaque->hasho_bucket == bucket);
+ nextopaque->hasho_prevblkno = prevblkno;
+ _hash_wrtbuf(rel, nextbuf);
+ }
+
+ /*
+ * Fix up the overflow page bitmap that tracks this particular
+ * overflow page. The bitmap can be found in the MetaPageData array
+ * element hashm_mapp[bitmappage].
+ */
+ splitnum = (addr >> SPLITSHIFT);
+ ovflpgno =
+ (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
+
+ if (ovflpgno < metap->LAST_FREED)
+ {
+ metap->LAST_FREED = ovflpgno;
+ }
+
+ bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT));
+ bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
+
+ blkno = metap->hashm_mapp[bitmappage];
+ mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
+ mappage = BufferGetPage(mapbuf);
+ _hash_checkpage(mappage, LH_BITMAP_PAGE);
+ freep = HashPageGetBitmap(mappage);
+ CLRBIT(freep, bitmapbit);
+ _hash_wrtbuf(rel, mapbuf);
+
+ _hash_relbuf(rel, metabuf, HASH_WRITE);
+
+ /*
+ * now instantiate the page that replaced this one, if it exists, and
+ * return that buffer with a write lock.
+ */
+ if (BlockNumberIsValid(nextblkno))
+ {
+ return (_hash_getbuf(rel, nextblkno, HASH_WRITE));
+ }
+ else
+ {
+ return (InvalidBuffer);
+ }
}
/*
- * _hash_initbitmap()
- *
- * Initialize a new bitmap page. The metapage has a write-lock upon
- * entering the function.
+ * _hash_initbitmap()
+ *
+ * Initialize a new bitmap page. The metapage has a write-lock upon
+ * entering the function.
*
* 'pnum' is the OverflowPageAddress of the new bitmap page.
* 'nbits' is how many bits to clear (i.e., make available) in the new
@@ -404,211 +427,219 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
* metapage's array of bitmap page OverflowPageAddresses.
*/
-#define INT_MASK ((1 << INT_TO_BIT) -1)
+#define INT_MASK ((1 << INT_TO_BIT) -1)
int32
_hash_initbitmap(Relation rel,
- HashMetaPage metap,
- int32 pnum,
- int32 nbits,
- int32 ndx)
+ HashMetaPage metap,
+ int32 pnum,
+ int32 nbits,
+ int32 ndx)
{
- Buffer buf;
- BlockNumber blkno;
- Page pg;
- HashPageOpaque op;
- uint32 *freep;
- int clearbytes, clearints;
-
- blkno = OADDR_TO_BLKNO(pnum);
- buf = _hash_getbuf(rel, blkno, HASH_WRITE);
- pg = BufferGetPage(buf);
- _hash_pageinit(pg, BufferGetPageSize(buf));
- op = (HashPageOpaque) PageGetSpecialPointer(pg);
- op->hasho_oaddr = InvalidOvflAddress;
- op->hasho_prevblkno = InvalidBlockNumber;
- op->hasho_nextblkno = InvalidBlockNumber;
- op->hasho_flag = LH_BITMAP_PAGE;
- op->hasho_bucket = -1;
-
- freep = HashPageGetBitmap(pg);
-
- /* set all of the bits above 'nbits' to 1 */
- clearints = ((nbits - 1) >> INT_TO_BIT) + 1;
- clearbytes = clearints << INT_TO_BYTE;
- memset((char *) freep, 0, clearbytes);
- memset(((char *) freep) + clearbytes, 0xFF,
- BMPGSZ_BYTE(metap) - clearbytes);
- freep[clearints - 1] = ALL_SET << (nbits & INT_MASK);
-
- /* bit 0 represents the new bitmap page */
- SETBIT(freep, 0);
-
- /* metapage already has a write lock */
- metap->hashm_nmaps++;
- metap->hashm_mapp[ndx] = blkno;
-
- /* write out the new bitmap page (releasing its locks) */
- _hash_wrtbuf(rel, buf);
-
- return (0);
+ Buffer buf;
+ BlockNumber blkno;
+ Page pg;
+ HashPageOpaque op;
+ uint32 *freep;
+ int clearbytes,
+ clearints;
+
+ blkno = OADDR_TO_BLKNO(pnum);
+ buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+ pg = BufferGetPage(buf);
+ _hash_pageinit(pg, BufferGetPageSize(buf));
+ op = (HashPageOpaque) PageGetSpecialPointer(pg);
+ op->hasho_oaddr = InvalidOvflAddress;
+ op->hasho_prevblkno = InvalidBlockNumber;
+ op->hasho_nextblkno = InvalidBlockNumber;
+ op->hasho_flag = LH_BITMAP_PAGE;
+ op->hasho_bucket = -1;
+
+ freep = HashPageGetBitmap(pg);
+
+ /* set all of the bits above 'nbits' to 1 */
+ clearints = ((nbits - 1) >> INT_TO_BIT) + 1;
+ clearbytes = clearints << INT_TO_BYTE;
+ memset((char *) freep, 0, clearbytes);
+ memset(((char *) freep) + clearbytes, 0xFF,
+ BMPGSZ_BYTE(metap) - clearbytes);
+ freep[clearints - 1] = ALL_SET << (nbits & INT_MASK);
+
+ /* bit 0 represents the new bitmap page */
+ SETBIT(freep, 0);
+
+ /* metapage already has a write lock */
+ metap->hashm_nmaps++;
+ metap->hashm_mapp[ndx] = blkno;
+
+ /* write out the new bitmap page (releasing its locks) */
+ _hash_wrtbuf(rel, buf);
+
+ return (0);
}
/*
- * _hash_squeezebucket(rel, bucket)
+ * _hash_squeezebucket(rel, bucket)
*
- * Try to squeeze the tuples onto pages occuring earlier in the
- * bucket chain in an attempt to free overflow pages. When we start
- * the "squeezing", the page from which we start taking tuples (the
- * "read" page) is the last bucket in the bucket chain and the page
- * onto which we start squeezing tuples (the "write" page) is the
- * first page in the bucket chain. The read page works backward and
- * the write page works forward; the procedure terminates when the
- * read page and write page are the same page.
+ * Try to squeeze the tuples onto pages occuring earlier in the
+ * bucket chain in an attempt to free overflow pages. When we start
+ * the "squeezing", the page from which we start taking tuples (the
+ * "read" page) is the last bucket in the bucket chain and the page
+ * onto which we start squeezing tuples (the "write" page) is the
+ * first page in the bucket chain. The read page works backward and
+ * the write page works forward; the procedure terminates when the
+ * read page and write page are the same page.
*/
void
_hash_squeezebucket(Relation rel,
- HashMetaPage metap,
- Bucket bucket)
+ HashMetaPage metap,
+ Bucket bucket)
{
- Buffer wbuf;
- Buffer rbuf = 0;
- BlockNumber wblkno;
- BlockNumber rblkno;
- Page wpage;
- Page rpage;
- HashPageOpaque wopaque;
- HashPageOpaque ropaque;
- OffsetNumber woffnum;
- OffsetNumber roffnum;
- HashItem hitem;
- int itemsz;
-
-/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */
-
- /*
- * start squeezing into the base bucket page.
- */
- wblkno = BUCKET_TO_BLKNO(bucket);
- wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
- wpage = BufferGetPage(wbuf);
- _hash_checkpage(wpage, LH_BUCKET_PAGE);
- wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
-
- /*
- * if there aren't any overflow pages, there's nothing to squeeze.
- */
- if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) {
- _hash_relbuf(rel, wbuf, HASH_WRITE);
- return;
- }
-
- /*
- * find the last page in the bucket chain by starting at the base
- * bucket page and working forward.
- *
- * XXX if chains tend to be long, we should probably move forward
- * using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when
- * we reach the end. if they are short we probably don't care
- * very much. if the hash function is working at all, they had
- * better be short..
- */
- ropaque = wopaque;
- do {
- rblkno = ropaque->hasho_nextblkno;
- if (ropaque != wopaque) {
- _hash_relbuf(rel, rbuf, HASH_WRITE);
- }
- rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
- rpage = BufferGetPage(rbuf);
- _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
- Assert(!PageIsEmpty(rpage));
- ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
- Assert(ropaque->hasho_bucket == bucket);
- } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
-
- /*
- * squeeze the tuples.
- */
- roffnum = FirstOffsetNumber;
- for(;;) {
- hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum));
- itemsz = IndexTupleDSize(hitem->hash_itup)
- + (sizeof(HashItemData) - sizeof(IndexTupleData));
- itemsz = DOUBLEALIGN(itemsz);
-
+ Buffer wbuf;
+ Buffer rbuf = 0;
+ BlockNumber wblkno;
+ BlockNumber rblkno;
+ Page wpage;
+ Page rpage;
+ HashPageOpaque wopaque;
+ HashPageOpaque ropaque;
+ OffsetNumber woffnum;
+ OffsetNumber roffnum;
+ HashItem hitem;
+ int itemsz;
+
+/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */
+
/*
- * walk up the bucket chain, looking for a page big enough for
- * this item.
+ * start squeezing into the base bucket page.
*/
- while (PageGetFreeSpace(wpage) < itemsz) {
- wblkno = wopaque->hasho_nextblkno;
+ wblkno = BUCKET_TO_BLKNO(bucket);
+ wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+ wpage = BufferGetPage(wbuf);
+ _hash_checkpage(wpage, LH_BUCKET_PAGE);
+ wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
- _hash_wrtbuf(rel, wbuf);
-
- if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) {
- _hash_wrtbuf(rel, rbuf);
- /* wbuf is already released */
+ /*
+ * if there aren't any overflow pages, there's nothing to squeeze.
+ */
+ if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
+ {
+ _hash_relbuf(rel, wbuf, HASH_WRITE);
return;
- }
-
- wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
- wpage = BufferGetPage(wbuf);
- _hash_checkpage(wpage, LH_OVERFLOW_PAGE);
- Assert(!PageIsEmpty(wpage));
- wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
- Assert(wopaque->hasho_bucket == bucket);
}
-
- /*
- * if we're here, we have found room so insert on the "write"
- * page.
- */
- woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage));
- PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED);
-
- /*
- * delete the tuple from the "read" page.
- * PageIndexTupleDelete repacks the ItemId array, so 'roffnum'
- * will be "advanced" to the "next" ItemId.
+
+ /*
+ * find the last page in the bucket chain by starting at the base
+ * bucket page and working forward.
+ *
+ * XXX if chains tend to be long, we should probably move forward using
+ * HASH_READ and then _hash_chgbufaccess to HASH_WRITE when we reach
+ * the end. if they are short we probably don't care very much. if
+ * the hash function is working at all, they had better be short..
*/
- PageIndexTupleDelete(rpage, roffnum);
- _hash_wrtnorelbuf(rel, rbuf);
-
+ ropaque = wopaque;
+ do
+ {
+ rblkno = ropaque->hasho_nextblkno;
+ if (ropaque != wopaque)
+ {
+ _hash_relbuf(rel, rbuf, HASH_WRITE);
+ }
+ rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+ rpage = BufferGetPage(rbuf);
+ _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
+ Assert(!PageIsEmpty(rpage));
+ ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
+ Assert(ropaque->hasho_bucket == bucket);
+ } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
+
/*
- * if the "read" page is now empty because of the deletion,
- * free it.
+ * squeeze the tuples.
*/
- if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) {
- rblkno = ropaque->hasho_prevblkno;
- Assert(BlockNumberIsValid(rblkno));
-
- /*
- * free this overflow page. the extra _hash_relbuf is
- * because _hash_freeovflpage gratuitously returns the
- * next page (we want the previous page and will get it
- * ourselves later).
- */
- rbuf = _hash_freeovflpage(rel, rbuf);
- if (BufferIsValid(rbuf)) {
- _hash_relbuf(rel, rbuf, HASH_WRITE);
- }
-
- if (rblkno == wblkno) {
- /* rbuf is already released */
- _hash_wrtbuf(rel, wbuf);
- return;
- }
-
- rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
- rpage = BufferGetPage(rbuf);
- _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
- Assert(!PageIsEmpty(rpage));
- ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
- Assert(ropaque->hasho_bucket == bucket);
-
- roffnum = FirstOffsetNumber;
+ roffnum = FirstOffsetNumber;
+ for (;;)
+ {
+ hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum));
+ itemsz = IndexTupleDSize(hitem->hash_itup)
+ + (sizeof(HashItemData) - sizeof(IndexTupleData));
+ itemsz = DOUBLEALIGN(itemsz);
+
+ /*
+ * walk up the bucket chain, looking for a page big enough for
+ * this item.
+ */
+ while (PageGetFreeSpace(wpage) < itemsz)
+ {
+ wblkno = wopaque->hasho_nextblkno;
+
+ _hash_wrtbuf(rel, wbuf);
+
+ if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno))
+ {
+ _hash_wrtbuf(rel, rbuf);
+ /* wbuf is already released */
+ return;
+ }
+
+ wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+ wpage = BufferGetPage(wbuf);
+ _hash_checkpage(wpage, LH_OVERFLOW_PAGE);
+ Assert(!PageIsEmpty(wpage));
+ wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
+ Assert(wopaque->hasho_bucket == bucket);
+ }
+
+ /*
+ * if we're here, we have found room so insert on the "write"
+ * page.
+ */
+ woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage));
+ PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED);
+
+ /*
+ * delete the tuple from the "read" page. PageIndexTupleDelete
+ * repacks the ItemId array, so 'roffnum' will be "advanced" to
+ * the "next" ItemId.
+ */
+ PageIndexTupleDelete(rpage, roffnum);
+ _hash_wrtnorelbuf(rel, rbuf);
+
+ /*
+ * if the "read" page is now empty because of the deletion, free
+ * it.
+ */
+ if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE))
+ {
+ rblkno = ropaque->hasho_prevblkno;
+ Assert(BlockNumberIsValid(rblkno));
+
+ /*
+ * free this overflow page. the extra _hash_relbuf is because
+ * _hash_freeovflpage gratuitously returns the next page (we
+ * want the previous page and will get it ourselves later).
+ */
+ rbuf = _hash_freeovflpage(rel, rbuf);
+ if (BufferIsValid(rbuf))
+ {
+ _hash_relbuf(rel, rbuf, HASH_WRITE);
+ }
+
+ if (rblkno == wblkno)
+ {
+ /* rbuf is already released */
+ _hash_wrtbuf(rel, wbuf);
+ return;
+ }
+
+ rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+ rpage = BufferGetPage(rbuf);
+ _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
+ Assert(!PageIsEmpty(rpage));
+ ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
+ Assert(ropaque->hasho_bucket == bucket);
+
+ roffnum = FirstOffsetNumber;
+ }
}
- }
}
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 49c8f03f524..6c819b652d2 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -1,30 +1,30 @@
/*-------------------------------------------------------------------------
*
* hashpage.c--
- * Hash table page management code for the Postgres hash access method
+ * Hash table page management code for the Postgres hash access method
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.9 1997/08/18 20:51:34 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.10 1997/09/07 04:38:00 momjian Exp $
*
* NOTES
- * Postgres hash pages look like ordinary relation pages. The opaque
- * data at high addresses includes information about the page including
- * whether a page is an overflow page or a true bucket, the block
- * numbers of the preceding and following pages, and the overflow
- * address of the page if it is an overflow page.
+ * Postgres hash pages look like ordinary relation pages. The opaque
+ * data at high addresses includes information about the page including
+ * whether a page is an overflow page or a true bucket, the block
+ * numbers of the preceding and following pages, and the overflow
+ * address of the page if it is an overflow page.
*
- * The first page in a hash relation, page zero, is special -- it stores
- * information describing the hash table; it is referred to as teh
- * "meta page." Pages one and higher store the actual data.
+ * The first page in a hash relation, page zero, is special -- it stores
+ * information describing the hash table; it is referred to as teh
+ * "meta page." Pages one and higher store the actual data.
*
*-------------------------------------------------------------------------
*/
#include <postgres.h>
-
+
#include <access/hash.h>
#include <storage/bufmgr.h>
#include <miscadmin.h>
@@ -33,411 +33,429 @@
#include <access/genam.h>
#ifndef HAVE_MEMMOVE
-# include <regex/utils.h>
+#include <regex/utils.h>
#else
-# include <string.h>
+#include <string.h>
#endif
-static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access);
-static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access);
-static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket);
-
-/*
- * We use high-concurrency locking on hash indices. There are two cases in
- * which we don't do locking. One is when we're building the index.
- * Since the creating transaction has not committed, no one can see
- * the index, and there's no reason to share locks. The second case
- * is when we're just starting up the database system. We use some
- * special-purpose initialization code in the relation cache manager
- * (see utils/cache/relcache.c) to allow us to do indexed scans on
- * the system catalogs before we'd normally be able to. This happens
- * before the lock table is fully initialized, so we can't use it.
- * Strictly speaking, this violates 2pl, but we don't do 2pl on the
- * system catalogs anyway.
+static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access);
+static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access);
+static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket);
+
+/*
+ * We use high-concurrency locking on hash indices. There are two cases in
+ * which we don't do locking. One is when we're building the index.
+ * Since the creating transaction has not committed, no one can see
+ * the index, and there's no reason to share locks. The second case
+ * is when we're just starting up the database system. We use some
+ * special-purpose initialization code in the relation cache manager
+ * (see utils/cache/relcache.c) to allow us to do indexed scans on
+ * the system catalogs before we'd normally be able to. This happens
+ * before the lock table is fully initialized, so we can't use it.
+ * Strictly speaking, this violates 2pl, but we don't do 2pl on the
+ * system catalogs anyway.
*/
-#define USELOCKING (!BuildingHash && !IsInitProcessingMode())
+#define USELOCKING (!BuildingHash && !IsInitProcessingMode())
/*
- * _hash_metapinit() -- Initialize the metadata page of a hash index,
- * the two buckets that we begin with and the initial
- * bitmap page.
+ * _hash_metapinit() -- Initialize the metadata page of a hash index,
+ * the two buckets that we begin with and the initial
+ * bitmap page.
*/
void
_hash_metapinit(Relation rel)
{
- HashMetaPage metap;
- HashPageOpaque pageopaque;
- Buffer metabuf;
- Buffer buf;
- Page pg;
- int nbuckets;
- uint32 nelem; /* number elements */
- uint32 lg2nelem; /* _hash_log2(nelem) */
- uint32 nblocks;
- uint16 i;
-
- /* can't be sharing this with anyone, now... */
- if (USELOCKING)
- RelationSetLockForWrite(rel);
-
- if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) {
- elog(WARN, "Cannot initialize non-empty hash table %s",
- RelationGetRelationName(rel));
- }
-
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
- pg = BufferGetPage(metabuf);
- metap = (HashMetaPage) pg;
- _hash_pageinit(pg, BufferGetPageSize(metabuf));
-
- metap->hashm_magic = HASH_MAGIC;
- metap->hashm_version = HASH_VERSION;
- metap->hashm_nkeys = 0;
- metap->hashm_nmaps = 0;
- metap->hashm_ffactor = DEFAULT_FFACTOR;
- metap->hashm_bsize = BufferGetPageSize(metabuf);
- metap->hashm_bshift = _hash_log2(metap->hashm_bsize);
- for (i = metap->hashm_bshift; i > 0; --i) {
- if ((1 << i) < (metap->hashm_bsize -
- (DOUBLEALIGN(sizeof(PageHeaderData)) +
- DOUBLEALIGN(sizeof(HashPageOpaqueData))))) {
- break;
+ HashMetaPage metap;
+ HashPageOpaque pageopaque;
+ Buffer metabuf;
+ Buffer buf;
+ Page pg;
+ int nbuckets;
+ uint32 nelem; /* number elements */
+ uint32 lg2nelem; /* _hash_log2(nelem) */
+ uint32 nblocks;
+ uint16 i;
+
+ /* can't be sharing this with anyone, now... */
+ if (USELOCKING)
+ RelationSetLockForWrite(rel);
+
+ if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0)
+ {
+ elog(WARN, "Cannot initialize non-empty hash table %s",
+ RelationGetRelationName(rel));
+ }
+
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+ pg = BufferGetPage(metabuf);
+ metap = (HashMetaPage) pg;
+ _hash_pageinit(pg, BufferGetPageSize(metabuf));
+
+ metap->hashm_magic = HASH_MAGIC;
+ metap->hashm_version = HASH_VERSION;
+ metap->hashm_nkeys = 0;
+ metap->hashm_nmaps = 0;
+ metap->hashm_ffactor = DEFAULT_FFACTOR;
+ metap->hashm_bsize = BufferGetPageSize(metabuf);
+ metap->hashm_bshift = _hash_log2(metap->hashm_bsize);
+ for (i = metap->hashm_bshift; i > 0; --i)
+ {
+ if ((1 << i) < (metap->hashm_bsize -
+ (DOUBLEALIGN(sizeof(PageHeaderData)) +
+ DOUBLEALIGN(sizeof(HashPageOpaqueData)))))
+ {
+ break;
+ }
}
- }
- Assert(i);
- metap->hashm_bmsize = 1 << i;
- metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
-
- /*
- * Make nelem = 2 rather than 0 so that we end up allocating space
- * for the next greater power of two number of buckets.
- */
- nelem = 2;
- lg2nelem = 1; /*_hash_log2(MAX(nelem, 2)) */
- nbuckets = 2; /*1 << lg2nelem */
-
- memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
- memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
-
- metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */
- metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */
- metap->hashm_ovflpoint = 1; /* lg2nelem */
- metap->hashm_lastfreed = 2;
-
- metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */
- metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */
-
- pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
- pageopaque->hasho_oaddr = InvalidOvflAddress;
- pageopaque->hasho_prevblkno = InvalidBlockNumber;
- pageopaque->hasho_nextblkno = InvalidBlockNumber;
- pageopaque->hasho_flag = LH_META_PAGE;
- pageopaque->hasho_bucket = -1;
-
- /*
- * First bitmap page is at: splitpoint lg2nelem page offset 1 which
- * turns out to be page 3. Couldn't initialize page 3 until we created
- * the first two buckets above.
- */
- if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0))
- elog(WARN, "Problem with _hash_initbitmap.");
-
- /* all done */
- _hash_wrtnorelbuf(rel, metabuf);
-
- /*
- * initialize the first two buckets
- */
- for (i = 0; i <= 1; i++) {
- buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
- pg = BufferGetPage(buf);
- _hash_pageinit(pg, BufferGetPageSize(buf));
+ Assert(i);
+ metap->hashm_bmsize = 1 << i;
+ metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
+
+ /*
+ * Make nelem = 2 rather than 0 so that we end up allocating space for
+ * the next greater power of two number of buckets.
+ */
+ nelem = 2;
+ lg2nelem = 1; /* _hash_log2(MAX(nelem, 2)) */
+ nbuckets = 2; /* 1 << lg2nelem */
+
+ memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
+ memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
+
+ metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */
+ metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */
+ metap->hashm_ovflpoint = 1; /* lg2nelem */
+ metap->hashm_lastfreed = 2;
+
+ metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */
+ metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */
+
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
pageopaque->hasho_oaddr = InvalidOvflAddress;
pageopaque->hasho_prevblkno = InvalidBlockNumber;
pageopaque->hasho_nextblkno = InvalidBlockNumber;
- pageopaque->hasho_flag = LH_BUCKET_PAGE;
- pageopaque->hasho_bucket = i;
- _hash_wrtbuf(rel, buf);
- }
-
- _hash_relbuf(rel, metabuf, HASH_WRITE);
-
- if (USELOCKING)
- RelationUnsetLockForWrite(rel);
+ pageopaque->hasho_flag = LH_META_PAGE;
+ pageopaque->hasho_bucket = -1;
+
+ /*
+ * First bitmap page is at: splitpoint lg2nelem page offset 1 which
+ * turns out to be page 3. Couldn't initialize page 3 until we
+ * created the first two buckets above.
+ */
+ if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0))
+ elog(WARN, "Problem with _hash_initbitmap.");
+
+ /* all done */
+ _hash_wrtnorelbuf(rel, metabuf);
+
+ /*
+ * initialize the first two buckets
+ */
+ for (i = 0; i <= 1; i++)
+ {
+ buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
+ pg = BufferGetPage(buf);
+ _hash_pageinit(pg, BufferGetPageSize(buf));
+ pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
+ pageopaque->hasho_oaddr = InvalidOvflAddress;
+ pageopaque->hasho_prevblkno = InvalidBlockNumber;
+ pageopaque->hasho_nextblkno = InvalidBlockNumber;
+ pageopaque->hasho_flag = LH_BUCKET_PAGE;
+ pageopaque->hasho_bucket = i;
+ _hash_wrtbuf(rel, buf);
+ }
+
+ _hash_relbuf(rel, metabuf, HASH_WRITE);
+
+ if (USELOCKING)
+ RelationUnsetLockForWrite(rel);
}
/*
- * _hash_getbuf() -- Get a buffer by block number for read or write.
+ * _hash_getbuf() -- Get a buffer by block number for read or write.
*
- * When this routine returns, the appropriate lock is set on the
- * requested buffer its reference count is correct.
+ * When this routine returns, the appropriate lock is set on the
+ * requested buffer its reference count is correct.
*
- * XXX P_NEW is not used because, unlike the tree structures, we
- * need the bucket blocks to be at certain block numbers. we must
- * depend on the caller to call _hash_pageinit on the block if it
- * knows that this is a new block.
+ * XXX P_NEW is not used because, unlike the tree structures, we
+ * need the bucket blocks to be at certain block numbers. we must
+ * depend on the caller to call _hash_pageinit on the block if it
+ * knows that this is a new block.
*/
Buffer
_hash_getbuf(Relation rel, BlockNumber blkno, int access)
{
- Buffer buf;
-
- if (blkno == P_NEW) {
- elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW");
- }
- switch (access) {
- case HASH_WRITE:
- case HASH_READ:
- _hash_setpagelock(rel, blkno, access);
- break;
- default:
- elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %s",
- access, RelationGetRelationName(rel));
- break;
- }
- buf = ReadBuffer(rel, blkno);
-
- /* ref count and lock type are correct */
- return (buf);
+ Buffer buf;
+
+ if (blkno == P_NEW)
+ {
+ elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW");
+ }
+ switch (access)
+ {
+ case HASH_WRITE:
+ case HASH_READ:
+ _hash_setpagelock(rel, blkno, access);
+ break;
+ default:
+ elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %s",
+ access, RelationGetRelationName(rel));
+ break;
+ }
+ buf = ReadBuffer(rel, blkno);
+
+ /* ref count and lock type are correct */
+ return (buf);
}
/*
- * _hash_relbuf() -- release a locked buffer.
+ * _hash_relbuf() -- release a locked buffer.
*/
void
_hash_relbuf(Relation rel, Buffer buf, int access)
{
- BlockNumber blkno;
-
- blkno = BufferGetBlockNumber(buf);
-
- switch (access) {
- case HASH_WRITE:
- case HASH_READ:
- _hash_unsetpagelock(rel, blkno, access);
- break;
- default:
- elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %s",
- access, blkno, RelationGetRelationName(rel));
- }
-
- ReleaseBuffer(buf);
+ BlockNumber blkno;
+
+ blkno = BufferGetBlockNumber(buf);
+
+ switch (access)
+ {
+ case HASH_WRITE:
+ case HASH_READ:
+ _hash_unsetpagelock(rel, blkno, access);
+ break;
+ default:
+ elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %s",
+ access, blkno, RelationGetRelationName(rel));
+ }
+
+ ReleaseBuffer(buf);
}
/*
- * _hash_wrtbuf() -- write a hash page to disk.
+ * _hash_wrtbuf() -- write a hash page to disk.
*
- * This routine releases the lock held on the buffer and our reference
- * to it. It is an error to call _hash_wrtbuf() without a write lock
- * or a reference to the buffer.
+ * This routine releases the lock held on the buffer and our reference
+ * to it. It is an error to call _hash_wrtbuf() without a write lock
+ * or a reference to the buffer.
*/
void
_hash_wrtbuf(Relation rel, Buffer buf)
{
- BlockNumber blkno;
-
- blkno = BufferGetBlockNumber(buf);
- WriteBuffer(buf);
- _hash_unsetpagelock(rel, blkno, HASH_WRITE);
+ BlockNumber blkno;
+
+ blkno = BufferGetBlockNumber(buf);
+ WriteBuffer(buf);
+ _hash_unsetpagelock(rel, blkno, HASH_WRITE);
}
/*
- * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
- * our reference or lock.
+ * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
+ * our reference or lock.
*
- * It is an error to call _hash_wrtnorelbuf() without a write lock
- * or a reference to the buffer.
+ * It is an error to call _hash_wrtnorelbuf() without a write lock
+ * or a reference to the buffer.
*/
void
_hash_wrtnorelbuf(Relation rel, Buffer buf)
{
- BlockNumber blkno;
-
- blkno = BufferGetBlockNumber(buf);
- WriteNoReleaseBuffer(buf);
+ BlockNumber blkno;
+
+ blkno = BufferGetBlockNumber(buf);
+ WriteNoReleaseBuffer(buf);
}
Page
_hash_chgbufaccess(Relation rel,
- Buffer *bufp,
- int from_access,
- int to_access)
+ Buffer * bufp,
+ int from_access,
+ int to_access)
{
- BlockNumber blkno;
-
- blkno = BufferGetBlockNumber(*bufp);
-
- switch (from_access) {
- case HASH_WRITE:
- _hash_wrtbuf(rel, *bufp);
- break;
- case HASH_READ:
- _hash_relbuf(rel, *bufp, from_access);
- break;
- default:
- elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %s",
- from_access, blkno, RelationGetRelationName(rel));
- break;
- }
- *bufp = _hash_getbuf(rel, blkno, to_access);
- return (BufferGetPage(*bufp));
+ BlockNumber blkno;
+
+ blkno = BufferGetBlockNumber(*bufp);
+
+ switch (from_access)
+ {
+ case HASH_WRITE:
+ _hash_wrtbuf(rel, *bufp);
+ break;
+ case HASH_READ:
+ _hash_relbuf(rel, *bufp, from_access);
+ break;
+ default:
+ elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %s",
+ from_access, blkno, RelationGetRelationName(rel));
+ break;
+ }
+ *bufp = _hash_getbuf(rel, blkno, to_access);
+ return (BufferGetPage(*bufp));
}
/*
- * _hash_pageinit() -- Initialize a new page.
+ * _hash_pageinit() -- Initialize a new page.
*/
void
_hash_pageinit(Page page, Size size)
{
- Assert(((PageHeader) page)->pd_lower == 0);
- Assert(((PageHeader) page)->pd_upper == 0);
- Assert(((PageHeader) page)->pd_special == 0);
-
- /*
- * Cargo-cult programming -- don't really need this to be zero, but
- * creating new pages is an infrequent occurrence and it makes me feel
- * good when I know they're empty.
- */
- memset(page, 0, size);
-
- PageInit(page, size, sizeof(HashPageOpaqueData));
+ Assert(((PageHeader) page)->pd_lower == 0);
+ Assert(((PageHeader) page)->pd_upper == 0);
+ Assert(((PageHeader) page)->pd_special == 0);
+
+ /*
+ * Cargo-cult programming -- don't really need this to be zero, but
+ * creating new pages is an infrequent occurrence and it makes me feel
+ * good when I know they're empty.
+ */
+ memset(page, 0, size);
+
+ PageInit(page, size, sizeof(HashPageOpaqueData));
}
static void
_hash_setpagelock(Relation rel,
- BlockNumber blkno,
- int access)
+ BlockNumber blkno,
+ int access)
{
- ItemPointerData iptr;
-
- if (USELOCKING) {
- ItemPointerSet(&iptr, blkno, 1);
-
- switch (access) {
- case HASH_WRITE:
- RelationSetSingleWLockPage(rel, &iptr);
- break;
- case HASH_READ:
- RelationSetSingleRLockPage(rel, &iptr);
- break;
- default:
- elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %s",
- access, blkno, RelationGetRelationName(rel));
- break;
+ ItemPointerData iptr;
+
+ if (USELOCKING)
+ {
+ ItemPointerSet(&iptr, blkno, 1);
+
+ switch (access)
+ {
+ case HASH_WRITE:
+ RelationSetSingleWLockPage(rel, &iptr);
+ break;
+ case HASH_READ:
+ RelationSetSingleRLockPage(rel, &iptr);
+ break;
+ default:
+ elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %s",
+ access, blkno, RelationGetRelationName(rel));
+ break;
+ }
}
- }
}
static void
_hash_unsetpagelock(Relation rel,
- BlockNumber blkno,
- int access)
+ BlockNumber blkno,
+ int access)
{
- ItemPointerData iptr;
-
- if (USELOCKING) {
- ItemPointerSet(&iptr, blkno, 1);
-
- switch (access) {
- case HASH_WRITE:
- RelationUnsetSingleWLockPage(rel, &iptr);
- break;
- case HASH_READ:
- RelationUnsetSingleRLockPage(rel, &iptr);
- break;
- default:
- elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %s",
- access, blkno, RelationGetRelationName(rel));
- break;
+ ItemPointerData iptr;
+
+ if (USELOCKING)
+ {
+ ItemPointerSet(&iptr, blkno, 1);
+
+ switch (access)
+ {
+ case HASH_WRITE:
+ RelationUnsetSingleWLockPage(rel, &iptr);
+ break;
+ case HASH_READ:
+ RelationUnsetSingleRLockPage(rel, &iptr);
+ break;
+ default:
+ elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %s",
+ access, blkno, RelationGetRelationName(rel));
+ break;
+ }
}
- }
}
void
_hash_pagedel(Relation rel, ItemPointer tid)
{
- Buffer buf;
- Buffer metabuf;
- Page page;
- BlockNumber blkno;
- OffsetNumber offno;
- HashMetaPage metap;
- HashPageOpaque opaque;
-
- blkno = ItemPointerGetBlockNumber(tid);
- offno = ItemPointerGetOffsetNumber(tid);
-
- buf = _hash_getbuf(rel, blkno, HASH_WRITE);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- opaque = (HashPageOpaque) PageGetSpecialPointer(page);
-
- PageIndexTupleDelete(page, offno);
- _hash_wrtnorelbuf(rel, buf);
-
- if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) {
- buf = _hash_freeovflpage(rel, buf);
- if (BufferIsValid(buf)) {
- _hash_relbuf(rel, buf, HASH_WRITE);
+ Buffer buf;
+ Buffer metabuf;
+ Page page;
+ BlockNumber blkno;
+ OffsetNumber offno;
+ HashMetaPage metap;
+ HashPageOpaque opaque;
+
+ blkno = ItemPointerGetBlockNumber(tid);
+ offno = ItemPointerGetOffsetNumber(tid);
+
+ buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+ PageIndexTupleDelete(page, offno);
+ _hash_wrtnorelbuf(rel, buf);
+
+ if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE))
+ {
+ buf = _hash_freeovflpage(rel, buf);
+ if (BufferIsValid(buf))
+ {
+ _hash_relbuf(rel, buf, HASH_WRITE);
+ }
}
- } else {
- _hash_relbuf(rel, buf, HASH_WRITE);
- }
-
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
- ++metap->hashm_nkeys;
- _hash_wrtbuf(rel, metabuf);
+ else
+ {
+ _hash_relbuf(rel, buf, HASH_WRITE);
+ }
+
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+ ++metap->hashm_nkeys;
+ _hash_wrtbuf(rel, metabuf);
}
void
_hash_expandtable(Relation rel, Buffer metabuf)
{
- HashMetaPage metap;
- Bucket old_bucket;
- Bucket new_bucket;
- uint32 spare_ndx;
-
-/* elog(DEBUG, "_hash_expandtable: expanding..."); */
-
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
- new_bucket = ++metap->MAX_BUCKET;
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
- old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
-
- /*
- * If the split point is increasing (MAX_BUCKET's log base 2
- * * increases), we need to copy the current contents of the spare
- * split bucket to the next bucket.
- */
- spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
- if (spare_ndx > metap->OVFL_POINT) {
-
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
- metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
- metap->OVFL_POINT = spare_ndx;
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
- }
-
- if (new_bucket > metap->HIGH_MASK) {
-
- /* Starting a new doubling */
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
- metap->LOW_MASK = metap->HIGH_MASK;
- metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
-
- }
- /* Relocate records to the new bucket */
- _hash_splitpage(rel, metabuf, old_bucket, new_bucket);
+ HashMetaPage metap;
+ Bucket old_bucket;
+ Bucket new_bucket;
+ uint32 spare_ndx;
+
+/* elog(DEBUG, "_hash_expandtable: expanding..."); */
+
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
+ new_bucket = ++metap->MAX_BUCKET;
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
+ old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
+
+ /*
+ * If the split point is increasing (MAX_BUCKET's log base 2 *
+ * increases), we need to copy the current contents of the spare split
+ * bucket to the next bucket.
+ */
+ spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
+ if (spare_ndx > metap->OVFL_POINT)
+ {
+
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
+ metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
+ metap->OVFL_POINT = spare_ndx;
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
+ }
+
+ if (new_bucket > metap->HIGH_MASK)
+ {
+
+ /* Starting a new doubling */
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
+ metap->LOW_MASK = metap->HIGH_MASK;
+ metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
+ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
+
+ }
+ /* Relocate records to the new bucket */
+ _hash_splitpage(rel, metabuf, old_bucket, new_bucket);
}
@@ -450,224 +468,243 @@ _hash_expandtable(Relation rel, Buffer metabuf)
*/
static void
_hash_splitpage(Relation rel,
- Buffer metabuf,
- Bucket obucket,
- Bucket nbucket)
+ Buffer metabuf,
+ Bucket obucket,
+ Bucket nbucket)
{
- Bucket bucket;
- Buffer obuf;
- Buffer nbuf;
- Buffer ovflbuf;
- BlockNumber oblkno;
- BlockNumber nblkno;
- bool null;
- Datum datum;
- HashItem hitem;
- HashPageOpaque oopaque;
- HashPageOpaque nopaque;
- HashMetaPage metap;
- IndexTuple itup;
- int itemsz;
- OffsetNumber ooffnum;
- OffsetNumber noffnum;
- OffsetNumber omaxoffnum;
- Page opage;
- Page npage;
- TupleDesc itupdesc;
-
-/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d",
- obucket, obucket, nbucket);
+ Bucket bucket;
+ Buffer obuf;
+ Buffer nbuf;
+ Buffer ovflbuf;
+ BlockNumber oblkno;
+ BlockNumber nblkno;
+ bool null;
+ Datum datum;
+ HashItem hitem;
+ HashPageOpaque oopaque;
+ HashPageOpaque nopaque;
+ HashMetaPage metap;
+ IndexTuple itup;
+ int itemsz;
+ OffsetNumber ooffnum;
+ OffsetNumber noffnum;
+ OffsetNumber omaxoffnum;
+ Page opage;
+ Page npage;
+ TupleDesc itupdesc;
+
+/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d",
+ obucket, obucket, nbucket);
*/
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- /* get the buffers & pages */
- oblkno = BUCKET_TO_BLKNO(obucket);
- nblkno = BUCKET_TO_BLKNO(nbucket);
- obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
- nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
- opage = BufferGetPage(obuf);
- npage = BufferGetPage(nbuf);
-
- /* initialize the new bucket */
- _hash_pageinit(npage, BufferGetPageSize(nbuf));
- nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
- nopaque->hasho_prevblkno = InvalidBlockNumber;
- nopaque->hasho_nextblkno = InvalidBlockNumber;
- nopaque->hasho_flag = LH_BUCKET_PAGE;
- nopaque->hasho_oaddr = InvalidOvflAddress;
- nopaque->hasho_bucket = nbucket;
- _hash_wrtnorelbuf(rel, nbuf);
-
- /*
- * make sure the old bucket isn't empty. advance 'opage' and
- * friends through the overflow bucket chain until we find a
- * non-empty page.
- *
- * XXX we should only need this once, if we are careful to
- * preserve the invariant that overflow pages are never empty.
- */
- _hash_checkpage(opage, LH_BUCKET_PAGE);
- oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
- if (PageIsEmpty(opage)) {
- oblkno = oopaque->hasho_nextblkno;
- _hash_relbuf(rel, obuf, HASH_WRITE);
- if (!BlockNumberIsValid(oblkno)) {
- /*
- * the old bucket is completely empty; of course, the new
- * bucket will be as well, but since it's a base bucket
- * page we don't care.
- */
- _hash_relbuf(rel, nbuf, HASH_WRITE);
- return;
- }
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ /* get the buffers & pages */
+ oblkno = BUCKET_TO_BLKNO(obucket);
+ nblkno = BUCKET_TO_BLKNO(nbucket);
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+ nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
opage = BufferGetPage(obuf);
- _hash_checkpage(opage, LH_OVERFLOW_PAGE);
- if (PageIsEmpty(opage)) {
- elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno);
- }
- oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
- }
-
- /*
- * we are now guaranteed that 'opage' is not empty. partition the
- * tuples in the old bucket between the old bucket and the new
- * bucket, advancing along their respective overflow bucket chains
- * and adding overflow pages as needed.
- */
- ooffnum = FirstOffsetNumber;
- omaxoffnum = PageGetMaxOffsetNumber(opage);
- for (;;) {
+ npage = BufferGetPage(nbuf);
+
+ /* initialize the new bucket */
+ _hash_pageinit(npage, BufferGetPageSize(nbuf));
+ nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
+ nopaque->hasho_prevblkno = InvalidBlockNumber;
+ nopaque->hasho_nextblkno = InvalidBlockNumber;
+ nopaque->hasho_flag = LH_BUCKET_PAGE;
+ nopaque->hasho_oaddr = InvalidOvflAddress;
+ nopaque->hasho_bucket = nbucket;
+ _hash_wrtnorelbuf(rel, nbuf);
+
/*
- * at each iteration through this loop, each of these variables
- * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum
+ * make sure the old bucket isn't empty. advance 'opage' and friends
+ * through the overflow bucket chain until we find a non-empty page.
+ *
+ * XXX we should only need this once, if we are careful to preserve the
+ * invariant that overflow pages are never empty.
*/
+ _hash_checkpage(opage, LH_BUCKET_PAGE);
+ oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+ if (PageIsEmpty(opage))
+ {
+ oblkno = oopaque->hasho_nextblkno;
+ _hash_relbuf(rel, obuf, HASH_WRITE);
+ if (!BlockNumberIsValid(oblkno))
+ {
- /* check if we're at the end of the page */
- if (ooffnum > omaxoffnum) {
- /* at end of page, but check for overflow page */
- oblkno = oopaque->hasho_nextblkno;
- if (BlockNumberIsValid(oblkno)) {
- /*
- * we ran out of tuples on this particular page, but
- * we have more overflow pages; re-init values.
- */
- _hash_wrtbuf(rel, obuf);
+ /*
+ * the old bucket is completely empty; of course, the new
+ * bucket will be as well, but since it's a base bucket page
+ * we don't care.
+ */
+ _hash_relbuf(rel, nbuf, HASH_WRITE);
+ return;
+ }
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
opage = BufferGetPage(obuf);
_hash_checkpage(opage, LH_OVERFLOW_PAGE);
- oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
-
- /* we're guaranteed that an ovfl page has at least 1 tuple */
- if (PageIsEmpty(opage)) {
- elog(WARN, "_hash_splitpage: empty ovfl page %d!",
- oblkno);
+ if (PageIsEmpty(opage))
+ {
+ elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno);
}
- ooffnum = FirstOffsetNumber;
- omaxoffnum = PageGetMaxOffsetNumber(opage);
- } else {
+ oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+ }
+
+ /*
+ * we are now guaranteed that 'opage' is not empty. partition the
+ * tuples in the old bucket between the old bucket and the new bucket,
+ * advancing along their respective overflow bucket chains and adding
+ * overflow pages as needed.
+ */
+ ooffnum = FirstOffsetNumber;
+ omaxoffnum = PageGetMaxOffsetNumber(opage);
+ for (;;)
+ {
+
/*
- * we're at the end of the bucket chain, so now we're
- * really done with everything. before quitting, call
- * _hash_squeezebucket to ensure the tuples in the
- * bucket (including the overflow pages) are packed as
- * tightly as possible.
+ * at each iteration through this loop, each of these variables
+ * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum
*/
- _hash_wrtbuf(rel, obuf);
- _hash_wrtbuf(rel, nbuf);
- _hash_squeezebucket(rel, metap, obucket);
- return;
- }
- }
-
- /* hash on the tuple */
- hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
- itup = &(hitem->hash_itup);
- itupdesc = RelationGetTupleDescriptor(rel);
- datum = index_getattr(itup, 1, itupdesc, &null);
- bucket = _hash_call(rel, metap, datum);
-
- if (bucket == nbucket) {
- /*
- * insert the tuple into the new bucket. if it doesn't
- * fit on the current page in the new bucket, we must
- * allocate a new overflow page and place the tuple on
- * that page instead.
- */
- itemsz = IndexTupleDSize(hitem->hash_itup)
- + (sizeof(HashItemData) - sizeof(IndexTupleData));
-
- itemsz = DOUBLEALIGN(itemsz);
-
- if (PageGetFreeSpace(npage) < itemsz) {
- ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf);
- _hash_wrtbuf(rel, nbuf);
- nbuf = ovflbuf;
- npage = BufferGetPage(nbuf);
- _hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- }
-
- noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
- PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED);
- _hash_wrtnorelbuf(rel, nbuf);
-
- /*
- * now delete the tuple from the old bucket. after this
- * section of code, 'ooffnum' will actually point to the
- * ItemId to which we would point if we had advanced it
- * before the deletion (PageIndexTupleDelete repacks the
- * ItemId array). this also means that 'omaxoffnum' is
- * exactly one less than it used to be, so we really can
- * just decrement it instead of calling
- * PageGetMaxOffsetNumber.
- */
- PageIndexTupleDelete(opage, ooffnum);
- _hash_wrtnorelbuf(rel, obuf);
- omaxoffnum = OffsetNumberPrev(omaxoffnum);
-
- /*
- * tidy up. if the old page was an overflow page and it
- * is now empty, we must free it (we want to preserve the
- * invariant that overflow pages cannot be empty).
- */
- if (PageIsEmpty(opage) &&
- (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) {
- obuf = _hash_freeovflpage(rel, obuf);
-
- /* check that we're not through the bucket chain */
- if (BufferIsInvalid(obuf)) {
- _hash_wrtbuf(rel, nbuf);
- _hash_squeezebucket(rel, metap, obucket);
- return;
+
+ /* check if we're at the end of the page */
+ if (ooffnum > omaxoffnum)
+ {
+ /* at end of page, but check for overflow page */
+ oblkno = oopaque->hasho_nextblkno;
+ if (BlockNumberIsValid(oblkno))
+ {
+
+ /*
+ * we ran out of tuples on this particular page, but we
+ * have more overflow pages; re-init values.
+ */
+ _hash_wrtbuf(rel, obuf);
+ obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+ opage = BufferGetPage(obuf);
+ _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+ oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+
+ /* we're guaranteed that an ovfl page has at least 1 tuple */
+ if (PageIsEmpty(opage))
+ {
+ elog(WARN, "_hash_splitpage: empty ovfl page %d!",
+ oblkno);
+ }
+ ooffnum = FirstOffsetNumber;
+ omaxoffnum = PageGetMaxOffsetNumber(opage);
+ }
+ else
+ {
+
+ /*
+ * we're at the end of the bucket chain, so now we're
+ * really done with everything. before quitting, call
+ * _hash_squeezebucket to ensure the tuples in the bucket
+ * (including the overflow pages) are packed as tightly as
+ * possible.
+ */
+ _hash_wrtbuf(rel, obuf);
+ _hash_wrtbuf(rel, nbuf);
+ _hash_squeezebucket(rel, metap, obucket);
+ return;
+ }
}
-
- /*
- * re-init. again, we're guaranteed that an ovfl page
- * has at least one tuple.
- */
- opage = BufferGetPage(obuf);
- _hash_checkpage(opage, LH_OVERFLOW_PAGE);
- oblkno = BufferGetBlockNumber(obuf);
- oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
- if (PageIsEmpty(opage)) {
- elog(WARN, "_hash_splitpage: empty overflow page %d",
- oblkno);
+
+ /* hash on the tuple */
+ hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
+ itup = &(hitem->hash_itup);
+ itupdesc = RelationGetTupleDescriptor(rel);
+ datum = index_getattr(itup, 1, itupdesc, &null);
+ bucket = _hash_call(rel, metap, datum);
+
+ if (bucket == nbucket)
+ {
+
+ /*
+ * insert the tuple into the new bucket. if it doesn't fit on
+ * the current page in the new bucket, we must allocate a new
+ * overflow page and place the tuple on that page instead.
+ */
+ itemsz = IndexTupleDSize(hitem->hash_itup)
+ + (sizeof(HashItemData) - sizeof(IndexTupleData));
+
+ itemsz = DOUBLEALIGN(itemsz);
+
+ if (PageGetFreeSpace(npage) < itemsz)
+ {
+ ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf);
+ _hash_wrtbuf(rel, nbuf);
+ nbuf = ovflbuf;
+ npage = BufferGetPage(nbuf);
+ _hash_checkpage(npage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ }
+
+ noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
+ PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED);
+ _hash_wrtnorelbuf(rel, nbuf);
+
+ /*
+ * now delete the tuple from the old bucket. after this
+ * section of code, 'ooffnum' will actually point to the
+ * ItemId to which we would point if we had advanced it before
+ * the deletion (PageIndexTupleDelete repacks the ItemId
+ * array). this also means that 'omaxoffnum' is exactly one
+ * less than it used to be, so we really can just decrement it
+ * instead of calling PageGetMaxOffsetNumber.
+ */
+ PageIndexTupleDelete(opage, ooffnum);
+ _hash_wrtnorelbuf(rel, obuf);
+ omaxoffnum = OffsetNumberPrev(omaxoffnum);
+
+ /*
+ * tidy up. if the old page was an overflow page and it is
+ * now empty, we must free it (we want to preserve the
+ * invariant that overflow pages cannot be empty).
+ */
+ if (PageIsEmpty(opage) &&
+ (oopaque->hasho_flag & LH_OVERFLOW_PAGE))
+ {
+ obuf = _hash_freeovflpage(rel, obuf);
+
+ /* check that we're not through the bucket chain */
+ if (BufferIsInvalid(obuf))
+ {
+ _hash_wrtbuf(rel, nbuf);
+ _hash_squeezebucket(rel, metap, obucket);
+ return;
+ }
+
+ /*
+ * re-init. again, we're guaranteed that an ovfl page has
+ * at least one tuple.
+ */
+ opage = BufferGetPage(obuf);
+ _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+ oblkno = BufferGetBlockNumber(obuf);
+ oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+ if (PageIsEmpty(opage))
+ {
+ elog(WARN, "_hash_splitpage: empty overflow page %d",
+ oblkno);
+ }
+ ooffnum = FirstOffsetNumber;
+ omaxoffnum = PageGetMaxOffsetNumber(opage);
+ }
+ }
+ else
+ {
+
+ /*
+ * the tuple stays on this page. we didn't move anything, so
+ * we didn't delete anything and therefore we don't have to
+ * change 'omaxoffnum'.
+ *
+ * XXX any hash value from [0, nbucket-1] will map to this
+ * bucket, which doesn't make sense to me.
+ */
+ ooffnum = OffsetNumberNext(ooffnum);
}
- ooffnum = FirstOffsetNumber;
- omaxoffnum = PageGetMaxOffsetNumber(opage);
- }
- } else {
- /*
- * the tuple stays on this page. we didn't move anything,
- * so we didn't delete anything and therefore we don't
- * have to change 'omaxoffnum'.
- *
- * XXX any hash value from [0, nbucket-1] will map to this
- * bucket, which doesn't make sense to me.
- */
- ooffnum = OffsetNumberNext(ooffnum);
}
- }
- /*NOTREACHED*/
+ /* NOTREACHED */
}
diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c
index bd776d68c0d..79fa33f747c 100644
--- a/src/backend/access/hash/hashscan.c
+++ b/src/backend/access/hash/hashscan.c
@@ -1,160 +1,167 @@
/*-------------------------------------------------------------------------
*
* hashscan.c--
- * manage scans on hash tables
+ * manage scans on hash tables
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.8 1996/11/15 18:36:31 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.9 1997/09/07 04:38:01 momjian Exp $
*
* NOTES
- * Because we can be doing an index scan on a relation while we
- * update it, we need to avoid missing data that moves around in
- * the index. The routines and global variables in this file
- * guarantee that all scans in the local address space stay
- * correctly positioned. This is all we need to worry about, since
- * write locking guarantees that no one else will be on the same
- * page at the same time as we are.
+ * Because we can be doing an index scan on a relation while we
+ * update it, we need to avoid missing data that moves around in
+ * the index. The routines and global variables in this file
+ * guarantee that all scans in the local address space stay
+ * correctly positioned. This is all we need to worry about, since
+ * write locking guarantees that no one else will be on the same
+ * page at the same time as we are.
*
- * The scheme is to manage a list of active scans in the current
- * backend. Whenever we add or remove records from an index, we
- * check the list of active scans to see if any has been affected.
- * A scan is affected only if it is on the same relation, and the
- * same page, as the update.
+ * The scheme is to manage a list of active scans in the current
+ * backend. Whenever we add or remove records from an index, we
+ * check the list of active scans to see if any has been affected.
+ * A scan is affected only if it is on the same relation, and the
+ * same page, as the update.
*
*-------------------------------------------------------------------------
*/
#include <postgres.h>
-
+
#include <access/hash.h>
-static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
-static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
+static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
+static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
-typedef struct HashScanListData {
- IndexScanDesc hashsl_scan;
- struct HashScanListData *hashsl_next;
-} HashScanListData;
+typedef struct HashScanListData
+{
+ IndexScanDesc hashsl_scan;
+ struct HashScanListData *hashsl_next;
+} HashScanListData;
-typedef HashScanListData *HashScanList;
+typedef HashScanListData *HashScanList;
-static HashScanList HashScans = (HashScanList) NULL;
+static HashScanList HashScans = (HashScanList) NULL;
/*
- * _Hash_regscan() -- register a new scan.
+ * _Hash_regscan() -- register a new scan.
*/
void
_hash_regscan(IndexScanDesc scan)
{
- HashScanList new_el;
-
- new_el = (HashScanList) palloc(sizeof(HashScanListData));
- new_el->hashsl_scan = scan;
- new_el->hashsl_next = HashScans;
- HashScans = new_el;
+ HashScanList new_el;
+
+ new_el = (HashScanList) palloc(sizeof(HashScanListData));
+ new_el->hashsl_scan = scan;
+ new_el->hashsl_next = HashScans;
+ HashScans = new_el;
}
/*
- * _hash_dropscan() -- drop a scan from the scan list
+ * _hash_dropscan() -- drop a scan from the scan list
*/
void
_hash_dropscan(IndexScanDesc scan)
{
- HashScanList chk, last;
-
- last = (HashScanList) NULL;
- for (chk = HashScans;
- chk != (HashScanList) NULL && chk->hashsl_scan != scan;
- chk = chk->hashsl_next) {
- last = chk;
- }
-
- if (chk == (HashScanList) NULL)
- elog(WARN, "hash scan list trashed; can't find 0x%lx", scan);
-
- if (last == (HashScanList) NULL)
- HashScans = chk->hashsl_next;
- else
- last->hashsl_next = chk->hashsl_next;
-
- pfree (chk);
+ HashScanList chk,
+ last;
+
+ last = (HashScanList) NULL;
+ for (chk = HashScans;
+ chk != (HashScanList) NULL && chk->hashsl_scan != scan;
+ chk = chk->hashsl_next)
+ {
+ last = chk;
+ }
+
+ if (chk == (HashScanList) NULL)
+ elog(WARN, "hash scan list trashed; can't find 0x%lx", scan);
+
+ if (last == (HashScanList) NULL)
+ HashScans = chk->hashsl_next;
+ else
+ last->hashsl_next = chk->hashsl_next;
+
+ pfree(chk);
}
void
_hash_adjscans(Relation rel, ItemPointer tid)
{
- HashScanList l;
- Oid relid;
-
- relid = rel->rd_id;
- for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) {
- if (relid == l->hashsl_scan->relation->rd_id)
- _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid),
- ItemPointerGetOffsetNumber(tid));
- }
+ HashScanList l;
+ Oid relid;
+
+ relid = rel->rd_id;
+ for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next)
+ {
+ if (relid == l->hashsl_scan->relation->rd_id)
+ _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid),
+ ItemPointerGetOffsetNumber(tid));
+ }
}
static void
_hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{
- ItemPointer current;
- Buffer buf;
- Buffer metabuf;
- HashScanOpaque so;
-
- if (!_hash_scantouched(scan, blkno, offno))
- return;
-
- metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ);
-
- so = (HashScanOpaque) scan->opaque;
- buf = so->hashso_curbuf;
-
- current = &(scan->currentItemData);
- if (ItemPointerIsValid(current)
- && ItemPointerGetBlockNumber(current) == blkno
- && ItemPointerGetOffsetNumber(current) >= offno) {
- _hash_step(scan, &buf, BackwardScanDirection, metabuf);
- so->hashso_curbuf = buf;
- }
-
- current = &(scan->currentMarkData);
- if (ItemPointerIsValid(current)
- && ItemPointerGetBlockNumber(current) == blkno
- && ItemPointerGetOffsetNumber(current) >= offno) {
- ItemPointerData tmp;
- tmp = *current;
- *current = scan->currentItemData;
- scan->currentItemData = tmp;
- _hash_step(scan, &buf, BackwardScanDirection, metabuf);
- so->hashso_mrkbuf = buf;
- tmp = *current;
- *current = scan->currentItemData;
- scan->currentItemData = tmp;
- }
+ ItemPointer current;
+ Buffer buf;
+ Buffer metabuf;
+ HashScanOpaque so;
+
+ if (!_hash_scantouched(scan, blkno, offno))
+ return;
+
+ metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ);
+
+ so = (HashScanOpaque) scan->opaque;
+ buf = so->hashso_curbuf;
+
+ current = &(scan->currentItemData);
+ if (ItemPointerIsValid(current)
+ && ItemPointerGetBlockNumber(current) == blkno
+ && ItemPointerGetOffsetNumber(current) >= offno)
+ {
+ _hash_step(scan, &buf, BackwardScanDirection, metabuf);
+ so->hashso_curbuf = buf;
+ }
+
+ current = &(scan->currentMarkData);
+ if (ItemPointerIsValid(current)
+ && ItemPointerGetBlockNumber(current) == blkno
+ && ItemPointerGetOffsetNumber(current) >= offno)
+ {
+ ItemPointerData tmp;
+
+ tmp = *current;
+ *current = scan->currentItemData;
+ scan->currentItemData = tmp;
+ _hash_step(scan, &buf, BackwardScanDirection, metabuf);
+ so->hashso_mrkbuf = buf;
+ tmp = *current;
+ *current = scan->currentItemData;
+ scan->currentItemData = tmp;
+ }
}
-static bool
+static bool
_hash_scantouched(IndexScanDesc scan,
- BlockNumber blkno,
- OffsetNumber offno)
+ BlockNumber blkno,
+ OffsetNumber offno)
{
- ItemPointer current;
-
- current = &(scan->currentItemData);
- if (ItemPointerIsValid(current)
- && ItemPointerGetBlockNumber(current) == blkno
- && ItemPointerGetOffsetNumber(current) >= offno)
- return (true);
-
- current = &(scan->currentMarkData);
- if (ItemPointerIsValid(current)
- && ItemPointerGetBlockNumber(current) == blkno
- && ItemPointerGetOffsetNumber(current) >= offno)
- return (true);
-
- return (false);
+ ItemPointer current;
+
+ current = &(scan->currentItemData);
+ if (ItemPointerIsValid(current)
+ && ItemPointerGetBlockNumber(current) == blkno
+ && ItemPointerGetOffsetNumber(current) >= offno)
+ return (true);
+
+ current = &(scan->currentMarkData);
+ if (ItemPointerIsValid(current)
+ && ItemPointerGetBlockNumber(current) == blkno
+ && ItemPointerGetOffsetNumber(current) >= offno)
+ return (true);
+
+ return (false);
}
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index bc67b7f5aac..0a42ad05065 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -1,423 +1,467 @@
/*-------------------------------------------------------------------------
*
* hashsearch.c--
- * search code for postgres hash tables
+ * search code for postgres hash tables
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.10 1997/06/28 05:45:40 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.11 1997/09/07 04:38:02 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <postgres.h>
-
+
#include <access/hash.h>
#include <storage/bufmgr.h>
#ifndef HAVE_MEMMOVE
-# include "regex/utils.h"
+#include "regex/utils.h"
#else
-# include <string.h>
-#endif
+#include <string.h>
+#endif
/*
- * _hash_search() -- Finds the page/bucket that the contains the
- * scankey and loads it into *bufP. the buffer has a read lock.
+ * _hash_search() -- Finds the page/bucket that the contains the
+ * scankey and loads it into *bufP. the buffer has a read lock.
*/
void
_hash_search(Relation rel,
- int keysz,
- ScanKey scankey,
- Buffer *bufP,
- HashMetaPage metap)
+ int keysz,
+ ScanKey scankey,
+ Buffer * bufP,
+ HashMetaPage metap)
{
- BlockNumber blkno;
- Datum keyDatum;
- Bucket bucket;
-
- if (scankey == (ScanKey) NULL ||
- (keyDatum = scankey[0].sk_argument) == (Datum) NULL) {
- /*
- * If the scankey argument is NULL, all tuples will satisfy
- * the scan so we start the scan at the first bucket (bucket
- * 0).
- */
- bucket = 0;
- } else {
- bucket = _hash_call(rel, metap, keyDatum);
- }
-
- blkno = BUCKET_TO_BLKNO(bucket);
-
- *bufP = _hash_getbuf(rel, blkno, HASH_READ);
+ BlockNumber blkno;
+ Datum keyDatum;
+ Bucket bucket;
+
+ if (scankey == (ScanKey) NULL ||
+ (keyDatum = scankey[0].sk_argument) == (Datum) NULL)
+ {
+
+ /*
+ * If the scankey argument is NULL, all tuples will satisfy the
+ * scan so we start the scan at the first bucket (bucket 0).
+ */
+ bucket = 0;
+ }
+ else
+ {
+ bucket = _hash_call(rel, metap, keyDatum);
+ }
+
+ blkno = BUCKET_TO_BLKNO(bucket);
+
+ *bufP = _hash_getbuf(rel, blkno, HASH_READ);
}
/*
- * _hash_next() -- Get the next item in a scan.
+ * _hash_next() -- Get the next item in a scan.
*
- * On entry, we have a valid currentItemData in the scan, and a
- * read lock on the page that contains that item. We do not have
- * the page pinned. We return the next item in the scan. On
- * exit, we have the page containing the next item locked but not
- * pinned.
+ * On entry, we have a valid currentItemData in the scan, and a
+ * read lock on the page that contains that item. We do not have
+ * the page pinned. We return the next item in the scan. On
+ * exit, we have the page containing the next item locked but not
+ * pinned.
*/
RetrieveIndexResult
_hash_next(IndexScanDesc scan, ScanDirection dir)
{
- Relation rel;
- Buffer buf;
- Buffer metabuf;
- Page page;
- OffsetNumber offnum;
- RetrieveIndexResult res;
- ItemPointer current;
- HashItem hitem;
- IndexTuple itup;
- HashScanOpaque so;
-
- rel = scan->relation;
- so = (HashScanOpaque) scan->opaque;
- current = &(scan->currentItemData);
-
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
-
- /*
- * XXX 10 may 91: somewhere there's a bug in our management of the
- * cached buffer for this scan. wei discovered it. the following
- * is a workaround so he can work until i figure out what's going on.
- */
-
- if (!BufferIsValid(so->hashso_curbuf)) {
- so->hashso_curbuf = _hash_getbuf(rel,
- ItemPointerGetBlockNumber(current),
- HASH_READ);
- }
-
- /* we still have the buffer pinned and locked */
- buf = so->hashso_curbuf;
-
- /*
- * step to next valid tuple. note that _hash_step releases our
- * lock on 'metabuf'; if we switch to a new 'buf' while looking
- * for the next tuple, we come back with a lock on that buffer.
- */
- if (!_hash_step(scan, &buf, dir, metabuf)) {
- return ((RetrieveIndexResult) NULL);
- }
-
- /* if we're here, _hash_step found a valid tuple */
- current = &(scan->currentItemData);
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &hitem->hash_itup;
- res = FormRetrieveIndexResult(current, &(itup->t_tid));
-
- return (res);
+ Relation rel;
+ Buffer buf;
+ Buffer metabuf;
+ Page page;
+ OffsetNumber offnum;
+ RetrieveIndexResult res;
+ ItemPointer current;
+ HashItem hitem;
+ IndexTuple itup;
+ HashScanOpaque so;
+
+ rel = scan->relation;
+ so = (HashScanOpaque) scan->opaque;
+ current = &(scan->currentItemData);
+
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+
+ /*
+ * XXX 10 may 91: somewhere there's a bug in our management of the
+ * cached buffer for this scan. wei discovered it. the following is
+ * a workaround so he can work until i figure out what's going on.
+ */
+
+ if (!BufferIsValid(so->hashso_curbuf))
+ {
+ so->hashso_curbuf = _hash_getbuf(rel,
+ ItemPointerGetBlockNumber(current),
+ HASH_READ);
+ }
+
+ /* we still have the buffer pinned and locked */
+ buf = so->hashso_curbuf;
+
+ /*
+ * step to next valid tuple. note that _hash_step releases our lock
+ * on 'metabuf'; if we switch to a new 'buf' while looking for the
+ * next tuple, we come back with a lock on that buffer.
+ */
+ if (!_hash_step(scan, &buf, dir, metabuf))
+ {
+ return ((RetrieveIndexResult) NULL);
+ }
+
+ /* if we're here, _hash_step found a valid tuple */
+ current = &(scan->currentItemData);
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+ itup = &hitem->hash_itup;
+ res = FormRetrieveIndexResult(current, &(itup->t_tid));
+
+ return (res);
}
static void
_hash_readnext(Relation rel,
- Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
+ Buffer * bufp, Page * pagep, HashPageOpaque * opaquep)
{
- BlockNumber blkno;
-
- blkno = (*opaquep)->hasho_nextblkno;
- _hash_relbuf(rel, *bufp, HASH_READ);
- *bufp = InvalidBuffer;
- if (BlockNumberIsValid(blkno)) {
- *bufp = _hash_getbuf(rel, blkno, HASH_READ);
- *pagep = BufferGetPage(*bufp);
- _hash_checkpage(*pagep, LH_OVERFLOW_PAGE);
- *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
- Assert(!PageIsEmpty(*pagep));
- }
+ BlockNumber blkno;
+
+ blkno = (*opaquep)->hasho_nextblkno;
+ _hash_relbuf(rel, *bufp, HASH_READ);
+ *bufp = InvalidBuffer;
+ if (BlockNumberIsValid(blkno))
+ {
+ *bufp = _hash_getbuf(rel, blkno, HASH_READ);
+ *pagep = BufferGetPage(*bufp);
+ _hash_checkpage(*pagep, LH_OVERFLOW_PAGE);
+ *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
+ Assert(!PageIsEmpty(*pagep));
+ }
}
static void
_hash_readprev(Relation rel,
- Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
+ Buffer * bufp, Page * pagep, HashPageOpaque * opaquep)
{
- BlockNumber blkno;
-
- blkno = (*opaquep)->hasho_prevblkno;
- _hash_relbuf(rel, *bufp, HASH_READ);
- *bufp = InvalidBuffer;
- if (BlockNumberIsValid(blkno)) {
- *bufp = _hash_getbuf(rel, blkno, HASH_READ);
- *pagep = BufferGetPage(*bufp);
- _hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
- if (PageIsEmpty(*pagep)) {
- Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE);
- _hash_relbuf(rel, *bufp, HASH_READ);
- *bufp = InvalidBuffer;
+ BlockNumber blkno;
+
+ blkno = (*opaquep)->hasho_prevblkno;
+ _hash_relbuf(rel, *bufp, HASH_READ);
+ *bufp = InvalidBuffer;
+ if (BlockNumberIsValid(blkno))
+ {
+ *bufp = _hash_getbuf(rel, blkno, HASH_READ);
+ *pagep = BufferGetPage(*bufp);
+ _hash_checkpage(*pagep, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
+ if (PageIsEmpty(*pagep))
+ {
+ Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE);
+ _hash_relbuf(rel, *bufp, HASH_READ);
+ *bufp = InvalidBuffer;
+ }
}
- }
}
/*
- * _hash_first() -- Find the first item in a scan.
+ * _hash_first() -- Find the first item in a scan.
*
- * Return the RetrieveIndexResult of the first item in the tree that
- * satisfies the qualificatin associated with the scan descriptor. On
- * exit, the page containing the current index tuple is read locked
- * and pinned, and the scan's opaque data entry is updated to
- * include the buffer.
+ * Return the RetrieveIndexResult of the first item in the tree that
+ * satisfies the qualificatin associated with the scan descriptor. On
+ * exit, the page containing the current index tuple is read locked
+ * and pinned, and the scan's opaque data entry is updated to
+ * include the buffer.
*/
RetrieveIndexResult
_hash_first(IndexScanDesc scan, ScanDirection dir)
{
- Relation rel;
- Buffer buf;
- Buffer metabuf;
- Page page;
- HashPageOpaque opaque;
- HashMetaPage metap;
- HashItem hitem;
- IndexTuple itup;
- ItemPointer current;
- OffsetNumber offnum;
- RetrieveIndexResult res;
- HashScanOpaque so;
-
- rel = scan->relation;
- so = (HashScanOpaque) scan->opaque;
- current = &(scan->currentItemData);
-
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- /*
- * XXX -- The attribute number stored in the scan key is the attno
- * in the heap relation. We need to transmogrify this into
- * the index relation attno here. For the moment, we have
- * hardwired attno == 1.
- */
-
- /* find the correct bucket page and load it into buf */
- _hash_search(rel, 1, scan->keyData, &buf, metap);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE);
- opaque = (HashPageOpaque) PageGetSpecialPointer(page);
-
- /*
- * if we are scanning forward, we need to find the first non-empty
- * page (if any) in the bucket chain. since overflow pages are
- * never empty, this had better be either the bucket page or the
- * first overflow page.
- *
- * if we are scanning backward, we always go all the way to the
- * end of the bucket chain.
- */
- if (PageIsEmpty(page)) {
- if (BlockNumberIsValid(opaque->hasho_nextblkno)) {
- _hash_readnext(rel, &buf, &page, &opaque);
- } else {
- ItemPointerSetInvalid(current);
- so->hashso_curbuf = InvalidBuffer;
- /*
- * If there is no scankeys, all tuples will satisfy
- * the scan - so we continue in _hash_step to get
- * tuples from all buckets. - vadim 04/29/97
- */
- if ( scan->numberOfKeys >= 1 )
- {
- _hash_relbuf(rel, buf, HASH_READ);
- _hash_relbuf(rel, metabuf, HASH_READ);
- return ((RetrieveIndexResult) NULL);
- }
+ Relation rel;
+ Buffer buf;
+ Buffer metabuf;
+ Page page;
+ HashPageOpaque opaque;
+ HashMetaPage metap;
+ HashItem hitem;
+ IndexTuple itup;
+ ItemPointer current;
+ OffsetNumber offnum;
+ RetrieveIndexResult res;
+ HashScanOpaque so;
+
+ rel = scan->relation;
+ so = (HashScanOpaque) scan->opaque;
+ current = &(scan->currentItemData);
+
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ /*
+ * XXX -- The attribute number stored in the scan key is the attno in
+ * the heap relation. We need to transmogrify this into the index
+ * relation attno here. For the moment, we have hardwired attno == 1.
+ */
+
+ /* find the correct bucket page and load it into buf */
+ _hash_search(rel, 1, scan->keyData, &buf, metap);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+ /*
+ * if we are scanning forward, we need to find the first non-empty
+ * page (if any) in the bucket chain. since overflow pages are never
+ * empty, this had better be either the bucket page or the first
+ * overflow page.
+ *
+ * if we are scanning backward, we always go all the way to the end of
+ * the bucket chain.
+ */
+ if (PageIsEmpty(page))
+ {
+ if (BlockNumberIsValid(opaque->hasho_nextblkno))
+ {
+ _hash_readnext(rel, &buf, &page, &opaque);
+ }
+ else
+ {
+ ItemPointerSetInvalid(current);
+ so->hashso_curbuf = InvalidBuffer;
+
+ /*
+ * If there is no scankeys, all tuples will satisfy the scan -
+ * so we continue in _hash_step to get tuples from all
+ * buckets. - vadim 04/29/97
+ */
+ if (scan->numberOfKeys >= 1)
+ {
+ _hash_relbuf(rel, buf, HASH_READ);
+ _hash_relbuf(rel, metabuf, HASH_READ);
+ return ((RetrieveIndexResult) NULL);
+ }
+ }
}
- }
- if (ScanDirectionIsBackward(dir)) {
- while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
- _hash_readnext(rel, &buf, &page, &opaque);
+ if (ScanDirectionIsBackward(dir))
+ {
+ while (BlockNumberIsValid(opaque->hasho_nextblkno))
+ {
+ _hash_readnext(rel, &buf, &page, &opaque);
+ }
+ }
+
+ if (!_hash_step(scan, &buf, dir, metabuf))
+ {
+ return ((RetrieveIndexResult) NULL);
}
- }
-
- if (!_hash_step(scan, &buf, dir, metabuf)) {
- return ((RetrieveIndexResult) NULL);
- }
-
- /* if we're here, _hash_step found a valid tuple */
- current = &(scan->currentItemData);
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &hitem->hash_itup;
- res = FormRetrieveIndexResult(current, &(itup->t_tid));
-
- return (res);
+
+ /* if we're here, _hash_step found a valid tuple */
+ current = &(scan->currentItemData);
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+ itup = &hitem->hash_itup;
+ res = FormRetrieveIndexResult(current, &(itup->t_tid));
+
+ return (res);
}
/*
- * _hash_step() -- step to the next valid item in a scan in the bucket.
+ * _hash_step() -- step to the next valid item in a scan in the bucket.
*
- * If no valid record exists in the requested direction, return
- * false. Else, return true and set the CurrentItemData for the
- * scan to the right thing.
- *
- * 'bufP' points to the buffer which contains the current page
- * that we'll step through.
+ * If no valid record exists in the requested direction, return
+ * false. Else, return true and set the CurrentItemData for the
+ * scan to the right thing.
*
- * 'metabuf' is released when this returns.
+ * 'bufP' points to the buffer which contains the current page
+ * that we'll step through.
+ *
+ * 'metabuf' is released when this returns.
*/
bool
-_hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
+_hash_step(IndexScanDesc scan, Buffer * bufP, ScanDirection dir, Buffer metabuf)
{
- Relation rel;
- ItemPointer current;
- HashScanOpaque so;
- int allbuckets;
- HashMetaPage metap;
- Buffer buf;
- Page page;
- HashPageOpaque opaque;
- OffsetNumber maxoff;
- OffsetNumber offnum;
- Bucket bucket;
- BlockNumber blkno;
- HashItem hitem;
- IndexTuple itup;
-
- rel = scan->relation;
- current = &(scan->currentItemData);
- so = (HashScanOpaque) scan->opaque;
- allbuckets = (scan->numberOfKeys < 1);
-
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- buf = *bufP;
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
- opaque = (HashPageOpaque) PageGetSpecialPointer(page);
-
- /*
- * If _hash_step is called from _hash_first, current will not be
- * valid, so we can't dereference it. However, in that case, we
- * presumably want to start at the beginning/end of the page...
- */
- maxoff = PageGetMaxOffsetNumber(page);
- if (ItemPointerIsValid(current)) {
- offnum = ItemPointerGetOffsetNumber(current);
- } else {
- offnum = InvalidOffsetNumber;
- }
-
- /*
- * 'offnum' now points to the last tuple we have seen (if any).
- *
- * continue to step through tuples until:
- * 1) we get to the end of the bucket chain or
- * 2) we find a valid tuple.
- */
- do {
- bucket = opaque->hasho_bucket;
-
- switch (dir) {
- case ForwardScanDirection:
- if (offnum != InvalidOffsetNumber) {
- offnum = OffsetNumberNext(offnum); /* move forward */
- } else {
- offnum = FirstOffsetNumber; /* new page */
- }
- while (offnum > maxoff) {
- /*
- * either this page is empty (maxoff ==
- * InvalidOffsetNumber) or we ran off the end.
- */
- _hash_readnext(rel, &buf, &page, &opaque);
- if (BufferIsInvalid(buf)) { /* end of chain */
- if (allbuckets && bucket < metap->hashm_maxbucket) {
- ++bucket;
- blkno = BUCKET_TO_BLKNO(bucket);
- buf = _hash_getbuf(rel, blkno, HASH_READ);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE);
- opaque = (HashPageOpaque) PageGetSpecialPointer(page);
- Assert(opaque->hasho_bucket == bucket);
- while (PageIsEmpty(page) &&
- BlockNumberIsValid(opaque->hasho_nextblkno)) {
- _hash_readnext(rel, &buf, &page, &opaque);
+ Relation rel;
+ ItemPointer current;
+ HashScanOpaque so;
+ int allbuckets;
+ HashMetaPage metap;
+ Buffer buf;
+ Page page;
+ HashPageOpaque opaque;
+ OffsetNumber maxoff;
+ OffsetNumber offnum;
+ Bucket bucket;
+ BlockNumber blkno;
+ HashItem hitem;
+ IndexTuple itup;
+
+ rel = scan->relation;
+ current = &(scan->currentItemData);
+ so = (HashScanOpaque) scan->opaque;
+ allbuckets = (scan->numberOfKeys < 1);
+
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ buf = *bufP;
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+ /*
+ * If _hash_step is called from _hash_first, current will not be
+ * valid, so we can't dereference it. However, in that case, we
+ * presumably want to start at the beginning/end of the page...
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ if (ItemPointerIsValid(current))
+ {
+ offnum = ItemPointerGetOffsetNumber(current);
+ }
+ else
+ {
+ offnum = InvalidOffsetNumber;
+ }
+
+ /*
+ * 'offnum' now points to the last tuple we have seen (if any).
+ *
+ * continue to step through tuples until: 1) we get to the end of the
+ * bucket chain or 2) we find a valid tuple.
+ */
+ do
+ {
+ bucket = opaque->hasho_bucket;
+
+ switch (dir)
+ {
+ case ForwardScanDirection:
+ if (offnum != InvalidOffsetNumber)
+ {
+ offnum = OffsetNumberNext(offnum); /* move forward */
}
- maxoff = PageGetMaxOffsetNumber(page);
- offnum = FirstOffsetNumber;
- } else {
- maxoff = offnum = InvalidOffsetNumber;
- break; /* while */
- }
- } else {
- /* _hash_readnext never returns an empty page */
- maxoff = PageGetMaxOffsetNumber(page);
- offnum = FirstOffsetNumber;
- }
- }
- break;
- case BackwardScanDirection:
- if (offnum != InvalidOffsetNumber) {
- offnum = OffsetNumberPrev(offnum); /* move back */
- } else {
- offnum = maxoff; /* new page */
- }
- while (offnum < FirstOffsetNumber) {
- /*
- * either this page is empty (offnum ==
- * InvalidOffsetNumber) or we ran off the end.
- */
- _hash_readprev(rel, &buf, &page, &opaque);
- if (BufferIsInvalid(buf)) { /* end of chain */
- if (allbuckets && bucket > 0) {
- --bucket;
- blkno = BUCKET_TO_BLKNO(bucket);
- buf = _hash_getbuf(rel, blkno, HASH_READ);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE);
- opaque = (HashPageOpaque) PageGetSpecialPointer(page);
- Assert(opaque->hasho_bucket == bucket);
- while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
- _hash_readnext(rel, &buf, &page, &opaque);
+ else
+ {
+ offnum = FirstOffsetNumber; /* new page */
+ }
+ while (offnum > maxoff)
+ {
+
+ /*
+ * either this page is empty (maxoff ==
+ * InvalidOffsetNumber) or we ran off the end.
+ */
+ _hash_readnext(rel, &buf, &page, &opaque);
+ if (BufferIsInvalid(buf))
+ { /* end of chain */
+ if (allbuckets && bucket < metap->hashm_maxbucket)
+ {
+ ++bucket;
+ blkno = BUCKET_TO_BLKNO(bucket);
+ buf = _hash_getbuf(rel, blkno, HASH_READ);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ Assert(opaque->hasho_bucket == bucket);
+ while (PageIsEmpty(page) &&
+ BlockNumberIsValid(opaque->hasho_nextblkno))
+ {
+ _hash_readnext(rel, &buf, &page, &opaque);
+ }
+ maxoff = PageGetMaxOffsetNumber(page);
+ offnum = FirstOffsetNumber;
+ }
+ else
+ {
+ maxoff = offnum = InvalidOffsetNumber;
+ break; /* while */
+ }
+ }
+ else
+ {
+ /* _hash_readnext never returns an empty page */
+ maxoff = PageGetMaxOffsetNumber(page);
+ offnum = FirstOffsetNumber;
+ }
+ }
+ break;
+ case BackwardScanDirection:
+ if (offnum != InvalidOffsetNumber)
+ {
+ offnum = OffsetNumberPrev(offnum); /* move back */
+ }
+ else
+ {
+ offnum = maxoff;/* new page */
}
- maxoff = offnum = PageGetMaxOffsetNumber(page);
- } else {
- maxoff = offnum = InvalidOffsetNumber;
- break; /* while */
- }
- } else {
- /* _hash_readprev never returns an empty page */
- maxoff = offnum = PageGetMaxOffsetNumber(page);
+ while (offnum < FirstOffsetNumber)
+ {
+
+ /*
+ * either this page is empty (offnum ==
+ * InvalidOffsetNumber) or we ran off the end.
+ */
+ _hash_readprev(rel, &buf, &page, &opaque);
+ if (BufferIsInvalid(buf))
+ { /* end of chain */
+ if (allbuckets && bucket > 0)
+ {
+ --bucket;
+ blkno = BUCKET_TO_BLKNO(bucket);
+ buf = _hash_getbuf(rel, blkno, HASH_READ);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ Assert(opaque->hasho_bucket == bucket);
+ while (BlockNumberIsValid(opaque->hasho_nextblkno))
+ {
+ _hash_readnext(rel, &buf, &page, &opaque);
+ }
+ maxoff = offnum = PageGetMaxOffsetNumber(page);
+ }
+ else
+ {
+ maxoff = offnum = InvalidOffsetNumber;
+ break; /* while */
+ }
+ }
+ else
+ {
+ /* _hash_readprev never returns an empty page */
+ maxoff = offnum = PageGetMaxOffsetNumber(page);
+ }
+ }
+ break;
+ default:
+ /* NoMovementScanDirection */
+ /* this should not be reached */
+ break;
}
- }
- break;
- default:
- /* NoMovementScanDirection */
- /* this should not be reached */
- break;
- }
- /* we ran off the end of the world without finding a match */
- if (offnum == InvalidOffsetNumber) {
- _hash_relbuf(rel, metabuf, HASH_READ);
- *bufP = so->hashso_curbuf = InvalidBuffer;
- ItemPointerSetInvalid(current);
- return(false);
- }
-
- /* get ready to check this tuple */
- hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
- itup = &hitem->hash_itup;
- } while (!_hash_checkqual(scan, itup));
-
- /* if we made it to here, we've found a valid tuple */
- _hash_relbuf(rel, metabuf, HASH_READ);
- blkno = BufferGetBlockNumber(buf);
- *bufP = so->hashso_curbuf = buf;
- ItemPointerSet(current, blkno, offnum);
- return(true);
+ /* we ran off the end of the world without finding a match */
+ if (offnum == InvalidOffsetNumber)
+ {
+ _hash_relbuf(rel, metabuf, HASH_READ);
+ *bufP = so->hashso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(current);
+ return (false);
+ }
+
+ /* get ready to check this tuple */
+ hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+ itup = &hitem->hash_itup;
+ } while (!_hash_checkqual(scan, itup));
+
+ /* if we made it to here, we've found a valid tuple */
+ _hash_relbuf(rel, metabuf, HASH_READ);
+ blkno = BufferGetBlockNumber(buf);
+ *bufP = so->hashso_curbuf = buf;
+ ItemPointerSet(current, blkno, offnum);
+ return (true);
}
diff --git a/src/backend/access/hash/hashstrat.c b/src/backend/access/hash/hashstrat.c
index d2f1e513c38..f1bdbdb8a3a 100644
--- a/src/backend/access/hash/hashstrat.c
+++ b/src/backend/access/hash/hashstrat.c
@@ -1,80 +1,83 @@
/*-------------------------------------------------------------------------
*
* btstrat.c--
- * Srategy map entries for the btree indexed access method
+ * Srategy map entries for the btree indexed access method
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.9 1997/08/20 02:01:42 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.10 1997/09/07 04:38:03 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <postgres.h>
-
+
#include <access/hash.h>
#include <access/istrat.h>
-/*
- * only one valid strategy for hash tables: equality.
+/*
+ * only one valid strategy for hash tables: equality.
*/
#ifdef NOT_USED
-static StrategyNumber HTNegate[1] = {
- InvalidStrategy
+static StrategyNumber HTNegate[1] = {
+ InvalidStrategy
};
-static StrategyNumber HTCommute[1] = {
- HTEqualStrategyNumber
+static StrategyNumber HTCommute[1] = {
+ HTEqualStrategyNumber
};
-static StrategyNumber HTNegateCommute[1] = {
- InvalidStrategy
+static StrategyNumber HTNegateCommute[1] = {
+ InvalidStrategy
};
-static StrategyEvaluationData HTEvaluationData = {
- /* XXX static for simplicity */
+static StrategyEvaluationData HTEvaluationData = {
+ /* XXX static for simplicity */
- HTMaxStrategyNumber,
- (StrategyTransformMap)HTNegate,
- (StrategyTransformMap)HTCommute,
- (StrategyTransformMap)HTNegateCommute,
- {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
+ HTMaxStrategyNumber,
+ (StrategyTransformMap) HTNegate,
+ (StrategyTransformMap) HTCommute,
+ (StrategyTransformMap) HTNegateCommute,
+ {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
};
+
#endif
/* ----------------------------------------------------------------
- * RelationGetHashStrategy
+ * RelationGetHashStrategy
* ----------------------------------------------------------------
*/
#ifdef NOT_USED
-static StrategyNumber
+static StrategyNumber
_hash_getstrat(Relation rel,
- AttrNumber attno,
- RegProcedure proc)
+ AttrNumber attno,
+ RegProcedure proc)
{
- StrategyNumber strat;
+ StrategyNumber strat;
- strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc);
+ strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc);
- Assert(StrategyNumberIsValid(strat));
+ Assert(StrategyNumberIsValid(strat));
- return (strat);
+ return (strat);
}
+
#endif
#ifdef NOT_USED
-static bool
+static bool
_hash_invokestrat(Relation rel,
- AttrNumber attno,
- StrategyNumber strat,
- Datum left,
- Datum right)
+ AttrNumber attno,
+ StrategyNumber strat,
+ Datum left,
+ Datum right)
{
- return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat,
- left, right));
+ return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat,
+ left, right));
}
+
#endif
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index dd0b4737454..f9fbe0e2d17 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -1,109 +1,110 @@
/*-------------------------------------------------------------------------
*
* btutils.c--
- * Utility code for Postgres btree implementation.
+ * Utility code for Postgres btree implementation.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.9 1997/08/14 05:01:32 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.10 1997/09/07 04:38:04 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <postgres.h>
-
+
#include <access/hash.h>
#include <fmgr.h>
#include <utils/memutils.h>
#include <access/iqual.h>
#ifndef HAVE_MEMMOVE
-# include <regex/utils.h>
+#include <regex/utils.h>
#else
-# include <string.h>
+#include <string.h>
#endif
ScanKey
_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap)
{
- ScanKey skey;
- TupleDesc itupdesc;
- int natts;
- AttrNumber i;
- Datum arg;
- RegProcedure proc;
- bool null;
-
- natts = rel->rd_rel->relnatts;
- itupdesc = RelationGetTupleDescriptor(rel);
-
- skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
-
- for (i = 0; i < natts; i++) {
- arg = index_getattr(itup, i + 1, itupdesc, &null);
- proc = metap->hashm_procid;
- ScanKeyEntryInitialize(&skey[i],
- 0x0, (AttrNumber) (i + 1), proc, arg);
- }
-
- return (skey);
-}
+ ScanKey skey;
+ TupleDesc itupdesc;
+ int natts;
+ AttrNumber i;
+ Datum arg;
+ RegProcedure proc;
+ bool null;
+
+ natts = rel->rd_rel->relnatts;
+ itupdesc = RelationGetTupleDescriptor(rel);
+
+ skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
+
+ for (i = 0; i < natts; i++)
+ {
+ arg = index_getattr(itup, i + 1, itupdesc, &null);
+ proc = metap->hashm_procid;
+ ScanKeyEntryInitialize(&skey[i],
+ 0x0, (AttrNumber) (i + 1), proc, arg);
+ }
+
+ return (skey);
+}
void
_hash_freeskey(ScanKey skey)
{
- pfree(skey);
+ pfree(skey);
}
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
- if (scan->numberOfKeys > 0)
- return (index_keytest(itup,
- RelationGetTupleDescriptor(scan->relation),
- scan->numberOfKeys, scan->keyData));
- else
- return (true);
+ if (scan->numberOfKeys > 0)
+ return (index_keytest(itup,
+ RelationGetTupleDescriptor(scan->relation),
+ scan->numberOfKeys, scan->keyData));
+ else
+ return (true);
}
HashItem
_hash_formitem(IndexTuple itup)
{
- int nbytes_hitem;
- HashItem hitem;
- Size tuplen;
-
- /* disallow nulls in hash keys */
- if (itup->t_info & INDEX_NULL_MASK)
- elog(WARN, "hash indices cannot include null keys");
-
- /* make a copy of the index tuple with room for the sequence number */
- tuplen = IndexTupleSize(itup);
- nbytes_hitem = tuplen +
- (sizeof(HashItemData) - sizeof(IndexTupleData));
-
- hitem = (HashItem) palloc(nbytes_hitem);
- memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen);
-
- return (hitem);
+ int nbytes_hitem;
+ HashItem hitem;
+ Size tuplen;
+
+ /* disallow nulls in hash keys */
+ if (itup->t_info & INDEX_NULL_MASK)
+ elog(WARN, "hash indices cannot include null keys");
+
+ /* make a copy of the index tuple with room for the sequence number */
+ tuplen = IndexTupleSize(itup);
+ nbytes_hitem = tuplen +
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
+
+ hitem = (HashItem) palloc(nbytes_hitem);
+ memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen);
+
+ return (hitem);
}
Bucket
_hash_call(Relation rel, HashMetaPage metap, Datum key)
{
- uint32 n;
- Bucket bucket;
- RegProcedure proc;
-
- proc = metap->hashm_procid;
- n = (uint32) fmgr(proc, key);
- bucket = n & metap->hashm_highmask;
- if (bucket > metap->hashm_maxbucket)
- bucket = bucket & metap->hashm_lowmask;
- return (bucket);
+ uint32 n;
+ Bucket bucket;
+ RegProcedure proc;
+
+ proc = metap->hashm_procid;
+ n = (uint32) fmgr(proc, key);
+ bucket = n & metap->hashm_highmask;
+ if (bucket > metap->hashm_maxbucket)
+ bucket = bucket & metap->hashm_lowmask;
+ return (bucket);
}
/*
@@ -112,12 +113,13 @@ _hash_call(Relation rel, HashMetaPage metap, Datum key)
uint32
_hash_log2(uint32 num)
{
- uint32 i, limit;
-
- limit = 1;
- for (i = 0; limit < num; limit = limit << 1, i++)
- ;
- return (i);
+ uint32 i,
+ limit;
+
+ limit = 1;
+ for (i = 0; limit < num; limit = limit << 1, i++)
+ ;
+ return (i);
}
/*
@@ -126,19 +128,20 @@ _hash_log2(uint32 num)
void
_hash_checkpage(Page page, int flags)
{
- HashPageOpaque opaque;
+ HashPageOpaque opaque;
- Assert(page);
- Assert(((PageHeader)(page))->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData)));
+ Assert(page);
+ Assert(((PageHeader) (page))->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData)));
#if 1
- Assert(((PageHeader)(page))->pd_upper <=
- (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
- Assert(((PageHeader)(page))->pd_special ==
- (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
- Assert(((PageHeader)(page))->pd_opaque.od_pagesize == BLCKSZ);
+ Assert(((PageHeader) (page))->pd_upper <=
+ (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
+ Assert(((PageHeader) (page))->pd_special ==
+ (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
+ Assert(((PageHeader) (page))->pd_opaque.od_pagesize == BLCKSZ);
#endif
- if (flags) {
- opaque = (HashPageOpaque) PageGetSpecialPointer(page);
- Assert(opaque->hasho_flag & flags);
- }
+ if (flags)
+ {
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ Assert(opaque->hasho_flag & flags);
+ }
}