diff options
Diffstat (limited to 'src/backend/access/hash')
-rw-r--r-- | src/backend/access/hash/hash.c | 767 | ||||
-rw-r--r-- | src/backend/access/hash/hashfunc.c | 411 | ||||
-rw-r--r-- | src/backend/access/hash/hashinsert.c | 386 | ||||
-rw-r--r-- | src/backend/access/hash/hashovfl.c | 1065 | ||||
-rw-r--r-- | src/backend/access/hash/hashpage.c | 1107 | ||||
-rw-r--r-- | src/backend/access/hash/hashscan.c | 229 | ||||
-rw-r--r-- | src/backend/access/hash/hashsearch.c | 758 | ||||
-rw-r--r-- | src/backend/access/hash/hashstrat.c | 69 | ||||
-rw-r--r-- | src/backend/access/hash/hashutil.c | 161 |
9 files changed, 2570 insertions, 2383 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 89f81fc56a5..e13539c4ad9 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -1,16 +1,16 @@ /*------------------------------------------------------------------------- * * hash.c-- - * Implementation of Margo Seltzer's Hashing package for postgres. + * Implementation of Margo Seltzer's Hashing package for postgres. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.12 1997/01/10 09:46:13 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.13 1997/09/07 04:37:49 momjian Exp $ * * NOTES - * This file contains only the public interface routines. + * This file contains only the public interface routines. * *------------------------------------------------------------------------- */ @@ -26,452 +26,483 @@ #include <miscadmin.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -bool BuildingHash = false; +bool BuildingHash = false; /* - * hashbuild() -- build a new hash index. + * hashbuild() -- build a new hash index. * - * We use a global variable to record the fact that we're creating - * a new index. This is used to avoid high-concurrency locking, - * since the index won't be visible until this transaction commits - * and since building is guaranteed to be single-threaded. + * We use a global variable to record the fact that we're creating + * a new index. This is used to avoid high-concurrency locking, + * since the index won't be visible until this transaction commits + * and since building is guaranteed to be single-threaded. */ void hashbuild(Relation heap, - Relation index, - int natts, - AttrNumber *attnum, - IndexStrategy istrat, - uint16 pcount, - Datum *params, - FuncIndexInfo *finfo, - PredInfo *predInfo) + Relation index, + int natts, + AttrNumber * attnum, + IndexStrategy istrat, + uint16 pcount, + Datum * params, + FuncIndexInfo * finfo, + PredInfo * predInfo) { - HeapScanDesc hscan; - Buffer buffer; - HeapTuple htup; - IndexTuple itup; - TupleDesc htupdesc, itupdesc; - Datum *attdata; - bool *nulls; - InsertIndexResult res; - int nhtups, nitups; - int i; - HashItem hitem; + HeapScanDesc hscan; + Buffer buffer; + HeapTuple htup; + IndexTuple itup; + TupleDesc htupdesc, + itupdesc; + Datum *attdata; + bool *nulls; + InsertIndexResult res; + int nhtups, + nitups; + int i; + HashItem hitem; + #ifndef OMIT_PARTIAL_INDEX - ExprContext *econtext; - TupleTable tupleTable; - TupleTableSlot *slot; + ExprContext *econtext; + TupleTable tupleTable; + TupleTableSlot *slot; + #endif - Oid hrelid, irelid; - Node *pred, *oldPred; - - /* note that this is a new btree */ - BuildingHash = true; - - pred = predInfo->pred; - oldPred = predInfo->oldPred; - - /* initialize the hash index metadata page (if this is a new index) */ - if (oldPred == NULL) - _hash_metapinit(index); - - /* get tuple descriptors for heap and index relations */ - htupdesc = RelationGetTupleDescriptor(heap); - itupdesc = RelationGetTupleDescriptor(index); - - /* get space for data items that'll appear in the index tuple */ - attdata = (Datum *) palloc(natts * sizeof(Datum)); - nulls = (bool *) palloc(natts * sizeof(bool)); - - /* - * If this is a predicate (partial) index, we will need to evaluate the - * predicate using ExecQual, which requires the current tuple to be in a - * slot of a TupleTable. In addition, ExecQual must have an ExprContext - * referring to that slot. Here, we initialize dummy TupleTable and - * ExprContext objects for this purpose. --Nels, Feb '92 - */ + Oid hrelid, + irelid; + Node *pred, + *oldPred; + + /* note that this is a new btree */ + BuildingHash = true; + + pred = predInfo->pred; + oldPred = predInfo->oldPred; + + /* initialize the hash index metadata page (if this is a new index) */ + if (oldPred == NULL) + _hash_metapinit(index); + + /* get tuple descriptors for heap and index relations */ + htupdesc = RelationGetTupleDescriptor(heap); + itupdesc = RelationGetTupleDescriptor(index); + + /* get space for data items that'll appear in the index tuple */ + attdata = (Datum *) palloc(natts * sizeof(Datum)); + nulls = (bool *) palloc(natts * sizeof(bool)); + + /* + * If this is a predicate (partial) index, we will need to evaluate + * the predicate using ExecQual, which requires the current tuple to + * be in a slot of a TupleTable. In addition, ExecQual must have an + * ExprContext referring to that slot. Here, we initialize dummy + * TupleTable and ExprContext objects for this purpose. --Nels, Feb + * '92 + */ #ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, htupdesc, buffer); - } - else /* quiet the compiler */ + if (pred != NULL || oldPred != NULL) + { + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + econtext = makeNode(ExprContext); + FillDummyExprContext(econtext, slot, htupdesc, buffer); + } + else +/* quiet the compiler */ { econtext = NULL; tupleTable = 0; slot = 0; } -#endif /* OMIT_PARTIAL_INDEX */ - - /* start a heap scan */ - hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); - htup = heap_getnext(hscan, 0, &buffer); - - /* build the index */ - nhtups = nitups = 0; - - for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) { - - nhtups++; - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) { - /*SetSlotContents(slot, htup); */ +#endif /* OMIT_PARTIAL_INDEX */ + + /* start a heap scan */ + hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); + htup = heap_getnext(hscan, 0, &buffer); + + /* build the index */ + nhtups = nitups = 0; + + for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) + { + + nhtups++; + + /* + * If oldPred != NULL, this is an EXTEND INDEX command, so skip + * this tuple if it was already in the existing partial index + */ + if (oldPred != NULL) + { + /* SetSlotContents(slot, htup); */ #ifndef OMIT_PARTIAL_INDEX - slot->val = htup; - if (ExecQual((List*)oldPred, econtext) == true) { + slot->val = htup; + if (ExecQual((List *) oldPred, econtext) == true) + { + nitups++; + continue; + } +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* + * Skip this tuple if it doesn't satisfy the partial-index + * predicate + */ + if (pred != NULL) + { +#ifndef OMIT_PARTIAL_INDEX + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) pred, econtext) == false) + continue; +#endif /* OMIT_PARTIAL_INDEX */ + } + nitups++; - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. + */ + for (i = 1; i <= natts; i++) + { + int attoff; + bool attnull; + + /* + * Offsets are from the start of the tuple, and are + * zero-based; indices are one-based. The next call returns i + * - 1. That's data hiding for you. + */ + + /* attoff = i - 1 */ + attoff = AttrNumberGetAttrOffset(i); + + /* + * below, attdata[attoff] set to equal some datum & attnull is + * changed to indicate whether or not the attribute is null + * for this tuple + */ + attdata[attoff] = GetIndexValue(htup, + htupdesc, + attoff, + attnum, + finfo, + &attnull, + buffer); + nulls[attoff] = (attnull ? 'n' : ' '); + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(itupdesc, attdata, nulls); + + /* + * If the single index key is null, we don't insert it into the + * index. Hash tables support scans on '='. Relational algebra + * says that A = B returns null if either A or B is null. This + * means that no qualification used in an index scan could ever + * return true on a null attribute. It also means that indices + * can't be used by ISNULL or NOTNULL scans, but that's an + * artifact of the strategy map architecture chosen in 1986, not + * of the way nulls are handled here. + */ + + if (itup->t_info & INDEX_NULL_MASK) + { + pfree(itup); + continue; + } + + itup->t_tid = htup->t_ctid; + hitem = _hash_formitem(itup); + res = _hash_doinsert(index, hitem); + pfree(hitem); + pfree(itup); + pfree(res); } - - /* Skip this tuple if it doesn't satisfy the partial-index predicate */ - if (pred != NULL) { + + /* okay, all heap tuples are indexed */ + heap_endscan(hscan); + + if (pred != NULL || oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /*SetSlotContents(slot, htup); */ - slot->val = htup; - if (ExecQual((List*)pred, econtext) == false) - continue; -#endif /* OMIT_PARTIAL_INDEX */ -} - - nitups++; - - /* - * For the current heap tuple, extract all the attributes - * we use in this index, and note which are null. - */ - for (i = 1; i <= natts; i++) { - int attoff; - bool attnull; - - /* - * Offsets are from the start of the tuple, and are - * zero-based; indices are one-based. The next call - * returns i - 1. That's data hiding for you. - */ - - /* attoff = i - 1 */ - attoff = AttrNumberGetAttrOffset(i); - - /* below, attdata[attoff] set to equal some datum & - * attnull is changed to indicate whether or not the attribute - * is null for this tuple - */ - attdata[attoff] = GetIndexValue(htup, - htupdesc, - attoff, - attnum, - finfo, - &attnull, - buffer); - nulls[attoff] = (attnull ? 'n' : ' '); + ExecDestroyTupleTable(tupleTable, true); + pfree(econtext); +#endif /* OMIT_PARTIAL_INDEX */ } - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(itupdesc, attdata, nulls); - + /* - * If the single index key is null, we don't insert it into - * the index. Hash tables support scans on '='. - * Relational algebra says that A = B - * returns null if either A or B is null. This - * means that no qualification used in an index scan could ever - * return true on a null attribute. It also means that indices - * can't be used by ISNULL or NOTNULL scans, but that's an - * artifact of the strategy map architecture chosen in 1986, not - * of the way nulls are handled here. + * Since we just counted the tuples in the heap, we update its stats + * in pg_class to guarantee that the planner takes advantage of the + * index we just created. Finally, only update statistics during + * normal index definitions, not for indices on system catalogs + * created during bootstrap processing. We must close the relations + * before updatings statistics to guarantee that the relcache entries + * are flushed when we increment the command counter in UpdateStats(). */ - - if (itup->t_info & INDEX_NULL_MASK) { - pfree(itup); - continue; - } - - itup->t_tid = htup->t_ctid; - hitem = _hash_formitem(itup); - res = _hash_doinsert(index, hitem); - pfree(hitem); - pfree(itup); - pfree(res); - } - - /* okay, all heap tuples are indexed */ - heap_endscan(hscan); - - if (pred != NULL || oldPred != NULL) { -#ifndef OMIT_PARTIAL_INDEX - ExecDestroyTupleTable(tupleTable, true); - pfree(econtext); -#endif /* OMIT_PARTIAL_INDEX */ - } - - /* - * Since we just counted the tuples in the heap, we update its - * stats in pg_class to guarantee that the planner takes advantage - * of the index we just created. Finally, only update statistics - * during normal index definitions, not for indices on system catalogs - * created during bootstrap processing. We must close the relations - * before updatings statistics to guarantee that the relcache entries - * are flushed when we increment the command counter in UpdateStats(). - */ - if (IsNormalProcessingMode()) + if (IsNormalProcessingMode()) { - hrelid = heap->rd_id; - irelid = index->rd_id; - heap_close(heap); - index_close(index); - UpdateStats(hrelid, nhtups, true); - UpdateStats(irelid, nitups, false); - if (oldPred != NULL) { - if (nitups == nhtups) pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } + hrelid = heap->rd_id; + irelid = index->rd_id; + heap_close(heap); + index_close(index); + UpdateStats(hrelid, nhtups, true); + UpdateStats(irelid, nitups, false); + if (oldPred != NULL) + { + if (nitups == nhtups) + pred = NULL; + UpdateIndexPredicate(irelid, oldPred, pred); + } } - - /* be tidy */ - pfree(nulls); - pfree(attdata); - - /* all done */ - BuildingHash = false; + + /* be tidy */ + pfree(nulls); + pfree(attdata); + + /* all done */ + BuildingHash = false; } /* - * hashinsert() -- insert an index tuple into a hash table. + * hashinsert() -- insert an index tuple into a hash table. * - * Hash on the index tuple's key, find the appropriate location - * for the new tuple, put it there, and return an InsertIndexResult - * to the caller. + * Hash on the index tuple's key, find the appropriate location + * for the new tuple, put it there, and return an InsertIndexResult + * to the caller. */ InsertIndexResult -hashinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) +hashinsert(Relation rel, Datum * datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) { - HashItem hitem; - IndexTuple itup; - InsertIndexResult res; - - - /* generate an index tuple */ - itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls); - itup->t_tid = *ht_ctid; - - if (itup->t_info & INDEX_NULL_MASK) - return ((InsertIndexResult) NULL); - - hitem = _hash_formitem(itup); - - res = _hash_doinsert(rel, hitem); - - pfree(hitem); - pfree(itup); - - return (res); + HashItem hitem; + IndexTuple itup; + InsertIndexResult res; + + + /* generate an index tuple */ + itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls); + itup->t_tid = *ht_ctid; + + if (itup->t_info & INDEX_NULL_MASK) + return ((InsertIndexResult) NULL); + + hitem = _hash_formitem(itup); + + res = _hash_doinsert(rel, hitem); + + pfree(hitem); + pfree(itup); + + return (res); } /* - * hashgettuple() -- Get the next tuple in the scan. + * hashgettuple() -- Get the next tuple in the scan. */ -char * +char * hashgettuple(IndexScanDesc scan, ScanDirection dir) { - RetrieveIndexResult res; - - /* - * If we've already initialized this scan, we can just advance it - * in the appropriate direction. If we haven't done so yet, we - * call a routine to get the first item in the scan. - */ - - if (ItemPointerIsValid(&(scan->currentItemData))) - res = _hash_next(scan, dir); - else - res = _hash_first(scan, dir); - - return ((char *) res); + RetrieveIndexResult res; + + /* + * If we've already initialized this scan, we can just advance it in + * the appropriate direction. If we haven't done so yet, we call a + * routine to get the first item in the scan. + */ + + if (ItemPointerIsValid(&(scan->currentItemData))) + res = _hash_next(scan, dir); + else + res = _hash_first(scan, dir); + + return ((char *) res); } /* - * hashbeginscan() -- start a scan on a hash index + * hashbeginscan() -- start a scan on a hash index */ -char * +char * hashbeginscan(Relation rel, - bool fromEnd, - uint16 keysz, - ScanKey scankey) + bool fromEnd, + uint16 keysz, + ScanKey scankey) { - IndexScanDesc scan; - HashScanOpaque so; - - scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); - so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); - so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer; - scan->opaque = so; - scan->flags = 0x0; - - /* register scan in case we change pages it's using */ - _hash_regscan(scan); - - return ((char *) scan); + IndexScanDesc scan; + HashScanOpaque so; + + scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); + so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); + so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer; + scan->opaque = so; + scan->flags = 0x0; + + /* register scan in case we change pages it's using */ + _hash_regscan(scan); + + return ((char *) scan); } /* - * hashrescan() -- rescan an index relation + * hashrescan() -- rescan an index relation */ void hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey) { - ItemPointer iptr; - HashScanOpaque so; - - so = (HashScanOpaque) scan->opaque; - - /* we hold a read lock on the current page in the scan */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); - so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); - so->hashso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* reset the scan key */ - if (scan->numberOfKeys > 0) { - memmove(scan->keyData, - scankey, - scan->numberOfKeys * sizeof(ScanKeyData)); - } + ItemPointer iptr; + HashScanOpaque so; + + so = (HashScanOpaque) scan->opaque; + + /* we hold a read lock on the current page in the scan */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* reset the scan key */ + if (scan->numberOfKeys > 0) + { + memmove(scan->keyData, + scankey, + scan->numberOfKeys * sizeof(ScanKeyData)); + } } /* - * hashendscan() -- close down a scan + * hashendscan() -- close down a scan */ void hashendscan(IndexScanDesc scan) { - - ItemPointer iptr; - HashScanOpaque so; - - so = (HashScanOpaque) scan->opaque; - - /* release any locks we still hold */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); - so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - if (BufferIsValid(so->hashso_mrkbuf)) - _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); - so->hashso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* don't need scan registered anymore */ - _hash_dropscan(scan); - - /* be tidy */ - pfree (scan->opaque); + + ItemPointer iptr; + HashScanOpaque so; + + so = (HashScanOpaque) scan->opaque; + + /* release any locks we still hold */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + if (BufferIsValid(so->hashso_mrkbuf)) + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* don't need scan registered anymore */ + _hash_dropscan(scan); + + /* be tidy */ + pfree(scan->opaque); } /* - * hashmarkpos() -- save current scan position + * hashmarkpos() -- save current scan position * */ void hashmarkpos(IndexScanDesc scan) { - ItemPointer iptr; - HashScanOpaque so; - - /* see if we ever call this code. if we do, then so_mrkbuf a - * useful element in the scan->opaque structure. if this procedure - * is never called, so_mrkbuf should be removed from the scan->opaque - * structure. - */ - elog(NOTICE, "Hashmarkpos() called."); - - so = (HashScanOpaque) scan->opaque; - - /* release lock on old marked data, if any */ - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); - so->hashso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* bump lock on currentItemData and copy to currentMarkData */ - if (ItemPointerIsValid(&(scan->currentItemData))) { - so->hashso_mrkbuf = _hash_getbuf(scan->relation, - BufferGetBlockNumber(so->hashso_curbuf), - HASH_READ); - scan->currentMarkData = scan->currentItemData; - } + ItemPointer iptr; + HashScanOpaque so; + + /* + * see if we ever call this code. if we do, then so_mrkbuf a useful + * element in the scan->opaque structure. if this procedure is never + * called, so_mrkbuf should be removed from the scan->opaque + * structure. + */ + elog(NOTICE, "Hashmarkpos() called."); + + so = (HashScanOpaque) scan->opaque; + + /* release lock on old marked data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentItemData and copy to currentMarkData */ + if (ItemPointerIsValid(&(scan->currentItemData))) + { + so->hashso_mrkbuf = _hash_getbuf(scan->relation, + BufferGetBlockNumber(so->hashso_curbuf), + HASH_READ); + scan->currentMarkData = scan->currentItemData; + } } /* - * hashrestrpos() -- restore scan to last saved position + * hashrestrpos() -- restore scan to last saved position */ void hashrestrpos(IndexScanDesc scan) { - ItemPointer iptr; - HashScanOpaque so; - - /* see if we ever call this code. if we do, then so_mrkbuf a - * useful element in the scan->opaque structure. if this procedure - * is never called, so_mrkbuf should be removed from the scan->opaque - * structure. - */ - elog(NOTICE, "Hashrestrpos() called."); - - so = (HashScanOpaque) scan->opaque; - - /* release lock on current data, if any */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); - so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* bump lock on currentMarkData and copy to currentItemData */ - if (ItemPointerIsValid(&(scan->currentMarkData))) { - so->hashso_curbuf = - _hash_getbuf(scan->relation, - BufferGetBlockNumber(so->hashso_mrkbuf), - HASH_READ); - - scan->currentItemData = scan->currentMarkData; - } + ItemPointer iptr; + HashScanOpaque so; + + /* + * see if we ever call this code. if we do, then so_mrkbuf a useful + * element in the scan->opaque structure. if this procedure is never + * called, so_mrkbuf should be removed from the scan->opaque + * structure. + */ + elog(NOTICE, "Hashrestrpos() called."); + + so = (HashScanOpaque) scan->opaque; + + /* release lock on current data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentMarkData and copy to currentItemData */ + if (ItemPointerIsValid(&(scan->currentMarkData))) + { + so->hashso_curbuf = + _hash_getbuf(scan->relation, + BufferGetBlockNumber(so->hashso_mrkbuf), + HASH_READ); + + scan->currentItemData = scan->currentMarkData; + } } /* stubs */ void hashdelete(Relation rel, ItemPointer tid) { - /* adjust any active scans that will be affected by this deletion */ - _hash_adjscans(rel, tid); - - /* delete the data from the page */ - _hash_pagedel(rel, tid); -} + /* adjust any active scans that will be affected by this deletion */ + _hash_adjscans(rel, tid); + /* delete the data from the page */ + _hash_pagedel(rel, tid); +} diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 5862800b21d..a3cbaa1a94c 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -1,17 +1,17 @@ /*------------------------------------------------------------------------- * * hashfunc.c-- - * Comparison functions for hash access method. + * Comparison functions for hash access method. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.3 1996/11/10 02:57:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.4 1997/09/07 04:37:53 momjian Exp $ * * NOTES - * These functions are stored in pg_amproc. For each operator class - * defined on hash tables, they compute the hash value of the argument. + * These functions are stored in pg_amproc. For each operator class + * defined on hash tables, they compute the hash value of the argument. * *------------------------------------------------------------------------- */ @@ -20,206 +20,223 @@ #include "access/hash.h" -uint32 hashint2(int16 key) +uint32 +hashint2(int16 key) { - return ((uint32) ~key); + return ((uint32) ~ key); } -uint32 hashint4(uint32 key) +uint32 +hashint4(uint32 key) { - return (~key); + return (~key); } /* Hash function from Chris Torek. */ -uint32 hashfloat4(float32 keyp) +uint32 +hashfloat4(float32 keyp) { - int len; - int loop; - uint32 h; - char *kp = (char *) keyp; + int len; + int loop; + uint32 h; + char *kp = (char *) keyp; - len = sizeof(float32data); + len = sizeof(float32data); -#define HASH4a h = (h << 5) - h + *kp++; -#define HASH4b h = (h << 5) + h + *kp++; +#define HASH4a h = (h << 5) - h + *kp++; +#define HASH4b h = (h << 5) + h + *kp++; #define HASH4 HASH4b - h = 0; - if (len > 0) { - loop = (len + 8 - 1) >> 3; - - switch (len & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASH4; - case 7: - HASH4; - case 6: - HASH4; - case 5: - HASH4; - case 4: - HASH4; - case 3: - HASH4; - case 2: - HASH4; - case 1: - HASH4; - } while (--loop); + h = 0; + if (len > 0) + { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) + { + case 0: + do + { /* All fall throughs */ + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } } - } - return (h); -} + return (h); +} -uint32 hashfloat8(float64 keyp) +uint32 +hashfloat8(float64 keyp) { - int len; - int loop; - uint32 h; - char *kp = (char *) keyp; + int len; + int loop; + uint32 h; + char *kp = (char *) keyp; - len = sizeof(float64data); + len = sizeof(float64data); -#define HASH4a h = (h << 5) - h + *kp++; -#define HASH4b h = (h << 5) + h + *kp++; +#define HASH4a h = (h << 5) - h + *kp++; +#define HASH4b h = (h << 5) + h + *kp++; #define HASH4 HASH4b - h = 0; - if (len > 0) { - loop = (len + 8 - 1) >> 3; - - switch (len & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASH4; - case 7: - HASH4; - case 6: - HASH4; - case 5: - HASH4; - case 4: - HASH4; - case 3: - HASH4; - case 2: - HASH4; - case 1: - HASH4; - } while (--loop); + h = 0; + if (len > 0) + { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) + { + case 0: + do + { /* All fall throughs */ + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } } - } - return (h); -} + return (h); +} -uint32 hashoid(Oid key) +uint32 +hashoid(Oid key) { - return ((uint32) ~key); + return ((uint32) ~ key); } -uint32 hashchar(char key) +uint32 +hashchar(char key) { - int len; - uint32 h; + int len; + uint32 h; + + len = sizeof(char); - len = sizeof(char); +#define PRIME1 37 +#define PRIME2 1048583 -#define PRIME1 37 -#define PRIME2 1048583 + h = 0; + /* Convert char to integer */ + h = h * PRIME1 ^ (key - ' '); + h %= PRIME2; - h = 0; - /* Convert char to integer */ - h = h * PRIME1 ^ (key - ' '); - h %= PRIME2; - - return (h); + return (h); } -uint32 hashchar2(uint16 intkey) +uint32 +hashchar2(uint16 intkey) { - uint32 h; - int len; - char *key = (char *) &intkey; - - h = 0; - len = sizeof(uint16); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + char *key = (char *) &intkey; + + h = 0; + len = sizeof(uint16); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashchar4(uint32 intkey) +uint32 +hashchar4(uint32 intkey) { - uint32 h; - int len; - char *key = (char *) &intkey; - - h = 0; - len = sizeof(uint32); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + char *key = (char *) &intkey; + + h = 0; + len = sizeof(uint32); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashchar8(char *key) +uint32 +hashchar8(char *key) { - uint32 h; - int len; - - h = 0; - len = sizeof(char8); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + + h = 0; + len = sizeof(char8); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashname(NameData *n) +uint32 +hashname(NameData * n) { - uint32 h; - int len; - char *key; - - key = n->data; - - h = 0; - len = NAMEDATALEN; - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + char *key; + + key = n->data; + + h = 0; + len = NAMEDATALEN; + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashchar16(char *key) +uint32 +hashchar16(char *key) { - uint32 h; - int len; - - h = 0; - len = sizeof(char16); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + + h = 0; + len = sizeof(char16); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } @@ -234,45 +251,49 @@ uint32 hashchar16(char *key) * * "OZ's original sdbm hash" */ -uint32 hashtext(struct varlena *key) +uint32 +hashtext(struct varlena * key) { - int keylen; - char *keydata; - uint32 n; - int loop; - - keydata = VARDATA(key); - keylen = VARSIZE(key); - - /* keylen includes the four bytes in which string keylength is stored */ - keylen -= sizeof(VARSIZE(key)); - -#define HASHC n = *keydata++ + 65599 * n - - n = 0; - if (keylen > 0) { - loop = (keylen + 8 - 1) >> 3; - - switch (keylen & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASHC; - case 7: - HASHC; - case 6: - HASHC; - case 5: - HASHC; - case 4: - HASHC; - case 3: - HASHC; - case 2: - HASHC; - case 1: - HASHC; - } while (--loop); + int keylen; + char *keydata; + uint32 n; + int loop; + + keydata = VARDATA(key); + keylen = VARSIZE(key); + + /* keylen includes the four bytes in which string keylength is stored */ + keylen -= sizeof(VARSIZE(key)); + +#define HASHC n = *keydata++ + 65599 * n + + n = 0; + if (keylen > 0) + { + loop = (keylen + 8 - 1) >> 3; + + switch (keylen & (8 - 1)) + { + case 0: + do + { /* All fall throughs */ + HASHC; + case 7: + HASHC; + case 6: + HASHC; + case 5: + HASHC; + case 4: + HASHC; + case 3: + HASHC; + case 2: + HASHC; + case 1: + HASHC; + } while (--loop); + } } - } - return (n); -} + return (n); +} diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index f1233c68b2d..4829093589a 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -1,19 +1,19 @@ /*------------------------------------------------------------------------- * * hashinsert.c-- - * Item insertion in hash tables for Postgres. + * Item insertion in hash tables for Postgres. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.8 1997/08/12 22:51:30 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.9 1997/09/07 04:37:56 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #include <utils/memutils.h> @@ -22,211 +22,221 @@ static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, S static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem); /* - * _hash_doinsert() -- Handle insertion of a single HashItem in the table. + * _hash_doinsert() -- Handle insertion of a single HashItem in the table. * - * This routine is called by the public interface routines, hashbuild - * and hashinsert. By here, hashitem is filled in, and has a unique - * (xid, seqno) pair. The datum to be used as a "key" is in the - * hashitem. + * This routine is called by the public interface routines, hashbuild + * and hashinsert. By here, hashitem is filled in, and has a unique + * (xid, seqno) pair. The datum to be used as a "key" is in the + * hashitem. */ InsertIndexResult _hash_doinsert(Relation rel, HashItem hitem) { - Buffer buf; - Buffer metabuf; - BlockNumber blkno; - HashMetaPage metap; - IndexTuple itup; - InsertIndexResult res; - ScanKey itup_scankey; - int natts; - Page page; - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* we need a scan key to do our search, so build one */ - itup = &(hitem->hash_itup); - if ((natts = rel->rd_rel->relnatts) != 1) - elog(WARN, "Hash indices valid for only one index key."); - itup_scankey = _hash_mkscankey(rel, itup, metap); - - /* - * find the first page in the bucket chain containing this key and - * place it in buf. _hash_search obtains a read lock for us. - */ - _hash_search(rel, natts, itup_scankey, &buf, metap); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - - /* - * trade in our read lock for a write lock so that we can do the - * insertion. - */ - blkno = BufferGetBlockNumber(buf); - _hash_relbuf(rel, buf, HASH_READ); - buf = _hash_getbuf(rel, blkno, HASH_WRITE); - - - /* - * XXX btree comment (haven't decided what to do in hash): don't - * think the bucket can be split while we're reading the metapage. - * - * If the page was split between the time that we surrendered our - * read lock and acquired our write lock, then this page may no - * longer be the right place for the key we want to insert. - */ - - /* do the insertion */ - res = _hash_insertonpg(rel, buf, natts, itup_scankey, - hitem, metabuf); - - /* be tidy */ - _hash_freeskey(itup_scankey); - - return (res); + Buffer buf; + Buffer metabuf; + BlockNumber blkno; + HashMetaPage metap; + IndexTuple itup; + InsertIndexResult res; + ScanKey itup_scankey; + int natts; + Page page; + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* we need a scan key to do our search, so build one */ + itup = &(hitem->hash_itup); + if ((natts = rel->rd_rel->relnatts) != 1) + elog(WARN, "Hash indices valid for only one index key."); + itup_scankey = _hash_mkscankey(rel, itup, metap); + + /* + * find the first page in the bucket chain containing this key and + * place it in buf. _hash_search obtains a read lock for us. + */ + _hash_search(rel, natts, itup_scankey, &buf, metap); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + + /* + * trade in our read lock for a write lock so that we can do the + * insertion. + */ + blkno = BufferGetBlockNumber(buf); + _hash_relbuf(rel, buf, HASH_READ); + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + + + /* + * XXX btree comment (haven't decided what to do in hash): don't think + * the bucket can be split while we're reading the metapage. + * + * If the page was split between the time that we surrendered our read + * lock and acquired our write lock, then this page may no longer be + * the right place for the key we want to insert. + */ + + /* do the insertion */ + res = _hash_insertonpg(rel, buf, natts, itup_scankey, + hitem, metabuf); + + /* be tidy */ + _hash_freeskey(itup_scankey); + + return (res); } /* - * _hash_insertonpg() -- Insert a tuple on a particular page in the table. + * _hash_insertonpg() -- Insert a tuple on a particular page in the table. * - * This recursive procedure does the following things: + * This recursive procedure does the following things: * - * + if necessary, splits the target page. - * + inserts the tuple. + * + if necessary, splits the target page. + * + inserts the tuple. * - * On entry, we must have the right buffer on which to do the - * insertion, and the buffer must be pinned and locked. On return, - * we will have dropped both the pin and the write lock on the buffer. + * On entry, we must have the right buffer on which to do the + * insertion, and the buffer must be pinned and locked. On return, + * we will have dropped both the pin and the write lock on the buffer. * */ -static InsertIndexResult +static InsertIndexResult _hash_insertonpg(Relation rel, - Buffer buf, - int keysz, - ScanKey scankey, - HashItem hitem, - Buffer metabuf) + Buffer buf, + int keysz, + ScanKey scankey, + HashItem hitem, + Buffer metabuf) { - InsertIndexResult res; - Page page; - BlockNumber itup_blkno; - OffsetNumber itup_off; - int itemsz; - HashPageOpaque pageopaque; - bool do_expand = false; - Buffer ovflbuf; - HashMetaPage metap; - Bucket bucket; - - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); - bucket = pageopaque->hasho_bucket; - - itemsz = IndexTupleDSize(hitem->hash_itup) - + (sizeof(HashItemData) - sizeof(IndexTupleData)); - itemsz = DOUBLEALIGN(itemsz); - - while (PageGetFreeSpace(page) < itemsz) { - /* - * no space on this page; check for an overflow page - */ - if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) { - /* - * ovfl page exists; go get it. if it doesn't have room, - * we'll find out next pass through the loop test above. - */ - ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno, - HASH_WRITE); - _hash_relbuf(rel, buf, HASH_WRITE); - buf = ovflbuf; - page = BufferGetPage(buf); - } else { - /* - * we're at the end of the bucket chain and we haven't - * found a page with enough room. allocate a new overflow - * page. - */ - do_expand = true; - ovflbuf = _hash_addovflpage(rel, &metabuf, buf); - _hash_relbuf(rel, buf, HASH_WRITE); - buf = ovflbuf; - page = BufferGetPage(buf); - - if (PageGetFreeSpace(page) < itemsz) { - /* it doesn't fit on an empty page -- give up */ - elog(WARN, "hash item too large"); - } - } - _hash_checkpage(page, LH_OVERFLOW_PAGE); + InsertIndexResult res; + Page page; + BlockNumber itup_blkno; + OffsetNumber itup_off; + int itemsz; + HashPageOpaque pageopaque; + bool do_expand = false; + Buffer ovflbuf; + HashMetaPage metap; + Bucket bucket; + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(pageopaque->hasho_bucket == bucket); - } - - itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem); - itup_blkno = BufferGetBlockNumber(buf); - - /* by here, the new tuple is inserted */ - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - - ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); - - if (res != NULL) { - /* - * Increment the number of keys in the table. - * We switch lock access type just for a moment - * to allow greater accessibility to the metapage. - */ - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, - HASH_READ, HASH_WRITE); - metap->hashm_nkeys += 1; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, - HASH_WRITE, HASH_READ); - - } - - _hash_wrtbuf(rel, buf); - - if (do_expand || - (metap->hashm_nkeys / (metap->hashm_maxbucket + 1)) - > metap->hashm_ffactor) { - _hash_expandtable(rel, metabuf); - } - _hash_relbuf(rel, metabuf, HASH_READ); - return (res); -} + bucket = pageopaque->hasho_bucket; + + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + + while (PageGetFreeSpace(page) < itemsz) + { + + /* + * no space on this page; check for an overflow page + */ + if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) + { + + /* + * ovfl page exists; go get it. if it doesn't have room, + * we'll find out next pass through the loop test above. + */ + ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno, + HASH_WRITE); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + } + else + { + + /* + * we're at the end of the bucket chain and we haven't found a + * page with enough room. allocate a new overflow page. + */ + do_expand = true; + ovflbuf = _hash_addovflpage(rel, &metabuf, buf); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + + if (PageGetFreeSpace(page) < itemsz) + { + /* it doesn't fit on an empty page -- give up */ + elog(WARN, "hash item too large"); + } + } + _hash_checkpage(page, LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(pageopaque->hasho_bucket == bucket); + } + + itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem); + itup_blkno = BufferGetBlockNumber(buf); + + /* by here, the new tuple is inserted */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + + ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); + + if (res != NULL) + { + + /* + * Increment the number of keys in the table. We switch lock + * access type just for a moment to allow greater accessibility to + * the metapage. + */ + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_READ, HASH_WRITE); + metap->hashm_nkeys += 1; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_WRITE, HASH_READ); + + } + + _hash_wrtbuf(rel, buf); + + if (do_expand || + (metap->hashm_nkeys / (metap->hashm_maxbucket + 1)) + > metap->hashm_ffactor) + { + _hash_expandtable(rel, metabuf); + } + _hash_relbuf(rel, metabuf, HASH_READ); + return (res); +} /* - * _hash_pgaddtup() -- add a tuple to a particular page in the index. + * _hash_pgaddtup() -- add a tuple to a particular page in the index. * - * This routine adds the tuple to the page as requested, and keeps the - * write lock and reference associated with the page's buffer. It is - * an error to call pgaddtup() without a write lock and reference. + * This routine adds the tuple to the page as requested, and keeps the + * write lock and reference associated with the page's buffer. It is + * an error to call pgaddtup() without a write lock and reference. */ -static OffsetNumber +static OffsetNumber _hash_pgaddtup(Relation rel, - Buffer buf, - int keysz, - ScanKey itup_scankey, - Size itemsize, - HashItem hitem) + Buffer buf, + int keysz, + ScanKey itup_scankey, + Size itemsize, + HashItem hitem) { - OffsetNumber itup_off; - Page page; - - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - - itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); - PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED); - - /* write the buffer, but hold our lock */ - _hash_wrtnorelbuf(rel, buf); - - return (itup_off); + OffsetNumber itup_off; + Page page; + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + + itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED); + + /* write the buffer, but hold our lock */ + _hash_wrtnorelbuf(rel, buf); + + return (itup_off); } diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index d976c4818c8..b6882d4d3e1 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -1,400 +1,423 @@ /*------------------------------------------------------------------------- * * hashovfl.c-- - * Overflow page management code for the Postgres hash access method + * Overflow page management code for the Postgres hash access method * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.9 1997/08/12 22:51:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.10 1997/09/07 04:37:57 momjian Exp $ * * NOTES - * Overflow pages look like ordinary relation pages. + * Overflow pages look like ordinary relation pages. * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #include <utils/memutils.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp); -static uint32 _hash_firstfreebit(uint32 map); +static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer * metabufp); +static uint32 _hash_firstfreebit(uint32 map); /* - * _hash_addovflpage + * _hash_addovflpage + * + * Add an overflow page to the page currently pointed to by the buffer + * argument 'buf'. * - * Add an overflow page to the page currently pointed to by the buffer - * argument 'buf'. + * *Metabufp has a read lock upon entering the function; buf has a + * write lock. * - * *Metabufp has a read lock upon entering the function; buf has a - * write lock. - * */ Buffer -_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf) +_hash_addovflpage(Relation rel, Buffer * metabufp, Buffer buf) { - - OverflowPageAddress oaddr; - BlockNumber ovflblkno; - Buffer ovflbuf; - HashMetaPage metap; - HashPageOpaque ovflopaque; - HashPageOpaque pageopaque; - Page page; - Page ovflpage; - - /* this had better be the last page in a bucket chain */ - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno)); - - metap = (HashMetaPage) BufferGetPage(*metabufp); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* allocate an empty overflow page */ - oaddr = _hash_getovfladdr(rel, metabufp); - if (oaddr == InvalidOvflAddress) { - elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr."); - } - ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr))); - Assert(BlockNumberIsValid(ovflblkno)); - ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE); - Assert(BufferIsValid(ovflbuf)); - ovflpage = BufferGetPage(ovflbuf); - - /* initialize the new overflow page */ - _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf)); - ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); - ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf); - ovflopaque->hasho_nextblkno = InvalidBlockNumber; - ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; - ovflopaque->hasho_oaddr = oaddr; - ovflopaque->hasho_bucket = pageopaque->hasho_bucket; - _hash_wrtnorelbuf(rel, ovflbuf); - - /* logically chain overflow page to previous page */ - pageopaque->hasho_nextblkno = ovflblkno; - _hash_wrtnorelbuf(rel, buf); - return (ovflbuf); + + OverflowPageAddress oaddr; + BlockNumber ovflblkno; + Buffer ovflbuf; + HashMetaPage metap; + HashPageOpaque ovflopaque; + HashPageOpaque pageopaque; + Page page; + Page ovflpage; + + /* this had better be the last page in a bucket chain */ + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno)); + + metap = (HashMetaPage) BufferGetPage(*metabufp); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* allocate an empty overflow page */ + oaddr = _hash_getovfladdr(rel, metabufp); + if (oaddr == InvalidOvflAddress) + { + elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr."); + } + ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr))); + Assert(BlockNumberIsValid(ovflblkno)); + ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE); + Assert(BufferIsValid(ovflbuf)); + ovflpage = BufferGetPage(ovflbuf); + + /* initialize the new overflow page */ + _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf)); + ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); + ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf); + ovflopaque->hasho_nextblkno = InvalidBlockNumber; + ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; + ovflopaque->hasho_oaddr = oaddr; + ovflopaque->hasho_bucket = pageopaque->hasho_bucket; + _hash_wrtnorelbuf(rel, ovflbuf); + + /* logically chain overflow page to previous page */ + pageopaque->hasho_nextblkno = ovflblkno; + _hash_wrtnorelbuf(rel, buf); + return (ovflbuf); } /* - * _hash_getovfladdr() + * _hash_getovfladdr() * - * Find an available overflow page and return its address. + * Find an available overflow page and return its address. * - * When we enter this function, we have a read lock on *metabufp which - * we change to a write lock immediately. Before exiting, the write lock - * is exchanged for a read lock. + * When we enter this function, we have a read lock on *metabufp which + * we change to a write lock immediately. Before exiting, the write lock + * is exchanged for a read lock. * */ -static OverflowPageAddress -_hash_getovfladdr(Relation rel, Buffer *metabufp) +static OverflowPageAddress +_hash_getovfladdr(Relation rel, Buffer * metabufp) { - HashMetaPage metap; - Buffer mapbuf = 0; - BlockNumber blkno; - PageOffset offset; - OverflowPageAddress oaddr; - SplitNumber splitnum; - uint32 *freep = NULL; - uint32 max_free; - uint32 bit; - uint32 first_page; - uint32 free_bit; - uint32 free_page; - uint32 in_use_bits; - uint32 i, j; - - metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE); - - splitnum = metap->OVFL_POINT; - max_free = metap->SPARES[splitnum]; - - free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT); - free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1); - - /* Look through all the free maps to find the first free block */ - first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT); - for ( i = first_page; i <= free_page; i++ ) { - Page mappage; - - blkno = metap->hashm_mapp[i]; - mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); - mappage = BufferGetPage(mapbuf); - _hash_checkpage(mappage, LH_BITMAP_PAGE); - freep = HashPageGetBitmap(mappage); - Assert(freep); - - if (i == free_page) - in_use_bits = free_bit; - else - in_use_bits = BMPGSZ_BIT(metap) - 1; - - if (i == first_page) { - bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1); - j = bit / BITS_PER_MAP; - bit = bit & ~(BITS_PER_MAP - 1); - } else { - bit = 0; - j = 0; + HashMetaPage metap; + Buffer mapbuf = 0; + BlockNumber blkno; + PageOffset offset; + OverflowPageAddress oaddr; + SplitNumber splitnum; + uint32 *freep = NULL; + uint32 max_free; + uint32 bit; + uint32 first_page; + uint32 free_bit; + uint32 free_page; + uint32 in_use_bits; + uint32 i, + j; + + metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE); + + splitnum = metap->OVFL_POINT; + max_free = metap->SPARES[splitnum]; + + free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT); + free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1); + + /* Look through all the free maps to find the first free block */ + first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT); + for (i = first_page; i <= free_page; i++) + { + Page mappage; + + blkno = metap->hashm_mapp[i]; + mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); + mappage = BufferGetPage(mapbuf); + _hash_checkpage(mappage, LH_BITMAP_PAGE); + freep = HashPageGetBitmap(mappage); + Assert(freep); + + if (i == free_page) + in_use_bits = free_bit; + else + in_use_bits = BMPGSZ_BIT(metap) - 1; + + if (i == first_page) + { + bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1); + j = bit / BITS_PER_MAP; + bit = bit & ~(BITS_PER_MAP - 1); + } + else + { + bit = 0; + j = 0; + } + for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) + if (freep[j] != ALL_SET) + goto found; + } + + /* No Free Page Found - have to allocate a new page */ + metap->LAST_FREED = metap->SPARES[splitnum]; + metap->SPARES[splitnum]++; + offset = metap->SPARES[splitnum] - + (splitnum ? metap->SPARES[splitnum - 1] : 0); + +#define OVMSG "HASH: Out of overflow pages. Out of luck.\n" + + if (offset > SPLITMASK) + { + if (++splitnum >= NCACHED) + { + elog(WARN, OVMSG); + } + metap->OVFL_POINT = splitnum; + metap->SPARES[splitnum] = metap->SPARES[splitnum - 1]; + metap->SPARES[splitnum - 1]--; + offset = 0; } - for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) - if (freep[j] != ALL_SET) - goto found; - } - - /* No Free Page Found - have to allocate a new page */ - metap->LAST_FREED = metap->SPARES[splitnum]; - metap->SPARES[splitnum]++; - offset = metap->SPARES[splitnum] - - (splitnum ? metap->SPARES[splitnum - 1] : 0); - -#define OVMSG "HASH: Out of overflow pages. Out of luck.\n" - - if (offset > SPLITMASK) { - if (++splitnum >= NCACHED) { - elog(WARN, OVMSG); + + /* Check if we need to allocate a new bitmap page */ + if (free_bit == BMPGSZ_BIT(metap) - 1) + { + /* won't be needing old map page */ + + _hash_relbuf(rel, mapbuf, HASH_WRITE); + + free_page++; + if (free_page >= NCACHED) + { + elog(WARN, OVMSG); + } + + /* + * This is tricky. The 1 indicates that you want the new page + * allocated with 1 clear bit. Actually, you are going to + * allocate 2 pages from this map. The first is going to be the + * map page, the second is the overflow page we were looking for. + * The init_bitmap routine automatically, sets the first bit of + * itself to indicate that the bitmap itself is in use. We would + * explicitly set the second bit, but don't have to if we tell + * init_bitmap not to leave it clear in the first place. + */ + if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset), + 1, free_page)) + { + elog(WARN, "overflow_page: problem with _hash_initbitmap."); + } + metap->SPARES[splitnum]++; + offset++; + if (offset > SPLITMASK) + { + if (++splitnum >= NCACHED) + { + elog(WARN, OVMSG); + } + metap->OVFL_POINT = splitnum; + metap->SPARES[splitnum] = metap->SPARES[splitnum - 1]; + metap->SPARES[splitnum - 1]--; + offset = 0; + } } - metap->OVFL_POINT = splitnum; - metap->SPARES[splitnum] = metap->SPARES[splitnum-1]; - metap->SPARES[splitnum-1]--; - offset = 0; - } - - /* Check if we need to allocate a new bitmap page */ - if (free_bit == BMPGSZ_BIT(metap) - 1) { - /* won't be needing old map page */ - - _hash_relbuf(rel, mapbuf, HASH_WRITE); - - free_page++; - if (free_page >= NCACHED) { - elog(WARN, OVMSG); + else + { + + /* + * Free_bit addresses the last used bit. Bump it to address the + * first available bit. + */ + free_bit++; + SETBIT(freep, free_bit); + _hash_wrtbuf(rel, mapbuf); } - + + /* Calculate address of the new overflow page */ + oaddr = OADDR_OF(splitnum, offset); + _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); + return (oaddr); + +found: + bit = bit + _hash_firstfreebit(freep[j]); + SETBIT(freep, bit); + _hash_wrtbuf(rel, mapbuf); + /* - * This is tricky. The 1 indicates that you want the new page - * allocated with 1 clear bit. Actually, you are going to - * allocate 2 pages from this map. The first is going to be - * the map page, the second is the overflow page we were - * looking for. The init_bitmap routine automatically, sets - * the first bit of itself to indicate that the bitmap itself - * is in use. We would explicitly set the second bit, but - * don't have to if we tell init_bitmap not to leave it clear - * in the first place. + * Bits are addressed starting with 0, but overflow pages are + * addressed beginning at 1. Bit is a bit addressnumber, so we need to + * increment it to convert it to a page number. */ - if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset), - 1, free_page)) { - elog(WARN, "overflow_page: problem with _hash_initbitmap."); + + bit = 1 + bit + (i * BMPGSZ_BIT(metap)); + if (bit >= metap->LAST_FREED) + { + metap->LAST_FREED = bit - 1; } - metap->SPARES[splitnum]++; - offset++; - if (offset > SPLITMASK) { - if (++splitnum >= NCACHED) { + + /* Calculate the split number for this page */ + for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++) + ; + offset = (i ? bit - metap->SPARES[i - 1] : bit); + if (offset >= SPLITMASK) + { elog(WARN, OVMSG); - } - metap->OVFL_POINT = splitnum; - metap->SPARES[splitnum] = metap->SPARES[splitnum-1]; - metap->SPARES[splitnum-1]--; - offset = 0; } - } else { - - /* - * Free_bit addresses the last used bit. Bump it to address - * the first available bit. - */ - free_bit++; - SETBIT(freep, free_bit); - _hash_wrtbuf(rel, mapbuf); - } - - /* Calculate address of the new overflow page */ - oaddr = OADDR_OF(splitnum, offset); - _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); - return (oaddr); - - found: - bit = bit + _hash_firstfreebit(freep[j]); - SETBIT(freep, bit); - _hash_wrtbuf(rel, mapbuf); - - /* - * Bits are addressed starting with 0, but overflow pages are addressed - * beginning at 1. Bit is a bit addressnumber, so we need to increment - * it to convert it to a page number. - */ - - bit = 1 + bit + (i * BMPGSZ_BIT(metap)); - if (bit >= metap->LAST_FREED) { - metap->LAST_FREED = bit - 1; - } - - /* Calculate the split number for this page */ - for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++) - ; - offset = (i ? bit - metap->SPARES[i - 1] : bit); - if (offset >= SPLITMASK) { - elog(WARN, OVMSG); - } - - /* initialize this page */ - oaddr = OADDR_OF(i, offset); - _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); - return (oaddr); + + /* initialize this page */ + oaddr = OADDR_OF(i, offset); + _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); + return (oaddr); } /* - * _hash_firstfreebit() + * _hash_firstfreebit() + * + * Return the first bit that is not set in the argument 'map'. This + * function is used to find an available overflow page within a + * splitnumber. * - * Return the first bit that is not set in the argument 'map'. This - * function is used to find an available overflow page within a - * splitnumber. - * */ -static uint32 +static uint32 _hash_firstfreebit(uint32 map) { - uint32 i, mask; - - mask = 0x1; - for (i = 0; i < BITS_PER_MAP; i++) { - if (!(mask & map)) - return (i); - mask = mask << 1; - } - return (i); + uint32 i, + mask; + + mask = 0x1; + for (i = 0; i < BITS_PER_MAP; i++) + { + if (!(mask & map)) + return (i); + mask = mask << 1; + } + return (i); } /* - * _hash_freeovflpage() - + * _hash_freeovflpage() - * - * Mark this overflow page as free and return a buffer with - * the page that follows it (which may be defined as - * InvalidBuffer). + * Mark this overflow page as free and return a buffer with + * the page that follows it (which may be defined as + * InvalidBuffer). * */ Buffer _hash_freeovflpage(Relation rel, Buffer ovflbuf) { - HashMetaPage metap; - Buffer metabuf; - Buffer mapbuf; - BlockNumber prevblkno; - BlockNumber blkno; - BlockNumber nextblkno; - HashPageOpaque ovflopaque; - Page ovflpage; - Page mappage; - OverflowPageAddress addr; - SplitNumber splitnum; - uint32 *freep; - uint32 ovflpgno; - int32 bitmappage, bitmapbit; - Bucket bucket; - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - ovflpage = BufferGetPage(ovflbuf); - _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE); - ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); - addr = ovflopaque->hasho_oaddr; - nextblkno = ovflopaque->hasho_nextblkno; - prevblkno = ovflopaque->hasho_prevblkno; - bucket = ovflopaque->hasho_bucket; - memset(ovflpage, 0, BufferGetPageSize(ovflbuf)); - _hash_wrtbuf(rel, ovflbuf); - - /* - * fix up the bucket chain. this is a doubly-linked list, so we - * must fix up the bucket chain members behind and ahead of the - * overflow page being deleted. - * - * XXX this should look like: - * - lock prev/next - * - modify/write prev/next (how to do write ordering with a - * doubly-linked list?) - * - unlock prev/next - */ - if (BlockNumberIsValid(prevblkno)) { - Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE); - Page prevpage = BufferGetPage(prevbuf); - HashPageOpaque prevopaque = - (HashPageOpaque) PageGetSpecialPointer(prevpage); - - _hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - Assert(prevopaque->hasho_bucket == bucket); - prevopaque->hasho_nextblkno = nextblkno; - _hash_wrtbuf(rel, prevbuf); - } - if (BlockNumberIsValid(nextblkno)) { - Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE); - Page nextpage = BufferGetPage(nextbuf); - HashPageOpaque nextopaque = - (HashPageOpaque) PageGetSpecialPointer(nextpage); - - _hash_checkpage(nextpage, LH_OVERFLOW_PAGE); - Assert(nextopaque->hasho_bucket == bucket); - nextopaque->hasho_prevblkno = prevblkno; - _hash_wrtbuf(rel, nextbuf); - } - - /* - * Fix up the overflow page bitmap that tracks this particular - * overflow page. The bitmap can be found in the MetaPageData - * array element hashm_mapp[bitmappage]. - */ - splitnum = (addr >> SPLITSHIFT); - ovflpgno = - (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1; - - if (ovflpgno < metap->LAST_FREED) { - metap->LAST_FREED = ovflpgno; - } - - bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT)); - bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1); - - blkno = metap->hashm_mapp[bitmappage]; - mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); - mappage = BufferGetPage(mapbuf); - _hash_checkpage(mappage, LH_BITMAP_PAGE); - freep = HashPageGetBitmap(mappage); - CLRBIT(freep, bitmapbit); - _hash_wrtbuf(rel, mapbuf); - - _hash_relbuf(rel, metabuf, HASH_WRITE); - - /* - * now instantiate the page that replaced this one, - * if it exists, and return that buffer with a write lock. - */ - if (BlockNumberIsValid(nextblkno)) { - return (_hash_getbuf(rel, nextblkno, HASH_WRITE)); - } else { - return (InvalidBuffer); - } + HashMetaPage metap; + Buffer metabuf; + Buffer mapbuf; + BlockNumber prevblkno; + BlockNumber blkno; + BlockNumber nextblkno; + HashPageOpaque ovflopaque; + Page ovflpage; + Page mappage; + OverflowPageAddress addr; + SplitNumber splitnum; + uint32 *freep; + uint32 ovflpgno; + int32 bitmappage, + bitmapbit; + Bucket bucket; + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + ovflpage = BufferGetPage(ovflbuf); + _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE); + ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); + addr = ovflopaque->hasho_oaddr; + nextblkno = ovflopaque->hasho_nextblkno; + prevblkno = ovflopaque->hasho_prevblkno; + bucket = ovflopaque->hasho_bucket; + memset(ovflpage, 0, BufferGetPageSize(ovflbuf)); + _hash_wrtbuf(rel, ovflbuf); + + /* + * fix up the bucket chain. this is a doubly-linked list, so we must + * fix up the bucket chain members behind and ahead of the overflow + * page being deleted. + * + * XXX this should look like: - lock prev/next - modify/write prev/next + * (how to do write ordering with a doubly-linked list?) - unlock + * prev/next + */ + if (BlockNumberIsValid(prevblkno)) + { + Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE); + Page prevpage = BufferGetPage(prevbuf); + HashPageOpaque prevopaque = + (HashPageOpaque) PageGetSpecialPointer(prevpage); + + _hash_checkpage(prevpage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + Assert(prevopaque->hasho_bucket == bucket); + prevopaque->hasho_nextblkno = nextblkno; + _hash_wrtbuf(rel, prevbuf); + } + if (BlockNumberIsValid(nextblkno)) + { + Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE); + Page nextpage = BufferGetPage(nextbuf); + HashPageOpaque nextopaque = + (HashPageOpaque) PageGetSpecialPointer(nextpage); + + _hash_checkpage(nextpage, LH_OVERFLOW_PAGE); + Assert(nextopaque->hasho_bucket == bucket); + nextopaque->hasho_prevblkno = prevblkno; + _hash_wrtbuf(rel, nextbuf); + } + + /* + * Fix up the overflow page bitmap that tracks this particular + * overflow page. The bitmap can be found in the MetaPageData array + * element hashm_mapp[bitmappage]. + */ + splitnum = (addr >> SPLITSHIFT); + ovflpgno = + (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1; + + if (ovflpgno < metap->LAST_FREED) + { + metap->LAST_FREED = ovflpgno; + } + + bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT)); + bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1); + + blkno = metap->hashm_mapp[bitmappage]; + mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); + mappage = BufferGetPage(mapbuf); + _hash_checkpage(mappage, LH_BITMAP_PAGE); + freep = HashPageGetBitmap(mappage); + CLRBIT(freep, bitmapbit); + _hash_wrtbuf(rel, mapbuf); + + _hash_relbuf(rel, metabuf, HASH_WRITE); + + /* + * now instantiate the page that replaced this one, if it exists, and + * return that buffer with a write lock. + */ + if (BlockNumberIsValid(nextblkno)) + { + return (_hash_getbuf(rel, nextblkno, HASH_WRITE)); + } + else + { + return (InvalidBuffer); + } } /* - * _hash_initbitmap() - * - * Initialize a new bitmap page. The metapage has a write-lock upon - * entering the function. + * _hash_initbitmap() + * + * Initialize a new bitmap page. The metapage has a write-lock upon + * entering the function. * * 'pnum' is the OverflowPageAddress of the new bitmap page. * 'nbits' is how many bits to clear (i.e., make available) in the new @@ -404,211 +427,219 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) * metapage's array of bitmap page OverflowPageAddresses. */ -#define INT_MASK ((1 << INT_TO_BIT) -1) +#define INT_MASK ((1 << INT_TO_BIT) -1) int32 _hash_initbitmap(Relation rel, - HashMetaPage metap, - int32 pnum, - int32 nbits, - int32 ndx) + HashMetaPage metap, + int32 pnum, + int32 nbits, + int32 ndx) { - Buffer buf; - BlockNumber blkno; - Page pg; - HashPageOpaque op; - uint32 *freep; - int clearbytes, clearints; - - blkno = OADDR_TO_BLKNO(pnum); - buf = _hash_getbuf(rel, blkno, HASH_WRITE); - pg = BufferGetPage(buf); - _hash_pageinit(pg, BufferGetPageSize(buf)); - op = (HashPageOpaque) PageGetSpecialPointer(pg); - op->hasho_oaddr = InvalidOvflAddress; - op->hasho_prevblkno = InvalidBlockNumber; - op->hasho_nextblkno = InvalidBlockNumber; - op->hasho_flag = LH_BITMAP_PAGE; - op->hasho_bucket = -1; - - freep = HashPageGetBitmap(pg); - - /* set all of the bits above 'nbits' to 1 */ - clearints = ((nbits - 1) >> INT_TO_BIT) + 1; - clearbytes = clearints << INT_TO_BYTE; - memset((char *) freep, 0, clearbytes); - memset(((char *) freep) + clearbytes, 0xFF, - BMPGSZ_BYTE(metap) - clearbytes); - freep[clearints - 1] = ALL_SET << (nbits & INT_MASK); - - /* bit 0 represents the new bitmap page */ - SETBIT(freep, 0); - - /* metapage already has a write lock */ - metap->hashm_nmaps++; - metap->hashm_mapp[ndx] = blkno; - - /* write out the new bitmap page (releasing its locks) */ - _hash_wrtbuf(rel, buf); - - return (0); + Buffer buf; + BlockNumber blkno; + Page pg; + HashPageOpaque op; + uint32 *freep; + int clearbytes, + clearints; + + blkno = OADDR_TO_BLKNO(pnum); + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + pg = BufferGetPage(buf); + _hash_pageinit(pg, BufferGetPageSize(buf)); + op = (HashPageOpaque) PageGetSpecialPointer(pg); + op->hasho_oaddr = InvalidOvflAddress; + op->hasho_prevblkno = InvalidBlockNumber; + op->hasho_nextblkno = InvalidBlockNumber; + op->hasho_flag = LH_BITMAP_PAGE; + op->hasho_bucket = -1; + + freep = HashPageGetBitmap(pg); + + /* set all of the bits above 'nbits' to 1 */ + clearints = ((nbits - 1) >> INT_TO_BIT) + 1; + clearbytes = clearints << INT_TO_BYTE; + memset((char *) freep, 0, clearbytes); + memset(((char *) freep) + clearbytes, 0xFF, + BMPGSZ_BYTE(metap) - clearbytes); + freep[clearints - 1] = ALL_SET << (nbits & INT_MASK); + + /* bit 0 represents the new bitmap page */ + SETBIT(freep, 0); + + /* metapage already has a write lock */ + metap->hashm_nmaps++; + metap->hashm_mapp[ndx] = blkno; + + /* write out the new bitmap page (releasing its locks) */ + _hash_wrtbuf(rel, buf); + + return (0); } /* - * _hash_squeezebucket(rel, bucket) + * _hash_squeezebucket(rel, bucket) * - * Try to squeeze the tuples onto pages occuring earlier in the - * bucket chain in an attempt to free overflow pages. When we start - * the "squeezing", the page from which we start taking tuples (the - * "read" page) is the last bucket in the bucket chain and the page - * onto which we start squeezing tuples (the "write" page) is the - * first page in the bucket chain. The read page works backward and - * the write page works forward; the procedure terminates when the - * read page and write page are the same page. + * Try to squeeze the tuples onto pages occuring earlier in the + * bucket chain in an attempt to free overflow pages. When we start + * the "squeezing", the page from which we start taking tuples (the + * "read" page) is the last bucket in the bucket chain and the page + * onto which we start squeezing tuples (the "write" page) is the + * first page in the bucket chain. The read page works backward and + * the write page works forward; the procedure terminates when the + * read page and write page are the same page. */ void _hash_squeezebucket(Relation rel, - HashMetaPage metap, - Bucket bucket) + HashMetaPage metap, + Bucket bucket) { - Buffer wbuf; - Buffer rbuf = 0; - BlockNumber wblkno; - BlockNumber rblkno; - Page wpage; - Page rpage; - HashPageOpaque wopaque; - HashPageOpaque ropaque; - OffsetNumber woffnum; - OffsetNumber roffnum; - HashItem hitem; - int itemsz; - -/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */ - - /* - * start squeezing into the base bucket page. - */ - wblkno = BUCKET_TO_BLKNO(bucket); - wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); - wpage = BufferGetPage(wbuf); - _hash_checkpage(wpage, LH_BUCKET_PAGE); - wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); - - /* - * if there aren't any overflow pages, there's nothing to squeeze. - */ - if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) { - _hash_relbuf(rel, wbuf, HASH_WRITE); - return; - } - - /* - * find the last page in the bucket chain by starting at the base - * bucket page and working forward. - * - * XXX if chains tend to be long, we should probably move forward - * using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when - * we reach the end. if they are short we probably don't care - * very much. if the hash function is working at all, they had - * better be short.. - */ - ropaque = wopaque; - do { - rblkno = ropaque->hasho_nextblkno; - if (ropaque != wopaque) { - _hash_relbuf(rel, rbuf, HASH_WRITE); - } - rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); - rpage = BufferGetPage(rbuf); - _hash_checkpage(rpage, LH_OVERFLOW_PAGE); - Assert(!PageIsEmpty(rpage)); - ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); - Assert(ropaque->hasho_bucket == bucket); - } while (BlockNumberIsValid(ropaque->hasho_nextblkno)); - - /* - * squeeze the tuples. - */ - roffnum = FirstOffsetNumber; - for(;;) { - hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum)); - itemsz = IndexTupleDSize(hitem->hash_itup) - + (sizeof(HashItemData) - sizeof(IndexTupleData)); - itemsz = DOUBLEALIGN(itemsz); - + Buffer wbuf; + Buffer rbuf = 0; + BlockNumber wblkno; + BlockNumber rblkno; + Page wpage; + Page rpage; + HashPageOpaque wopaque; + HashPageOpaque ropaque; + OffsetNumber woffnum; + OffsetNumber roffnum; + HashItem hitem; + int itemsz; + +/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */ + /* - * walk up the bucket chain, looking for a page big enough for - * this item. + * start squeezing into the base bucket page. */ - while (PageGetFreeSpace(wpage) < itemsz) { - wblkno = wopaque->hasho_nextblkno; + wblkno = BUCKET_TO_BLKNO(bucket); + wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); + wpage = BufferGetPage(wbuf); + _hash_checkpage(wpage, LH_BUCKET_PAGE); + wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); - _hash_wrtbuf(rel, wbuf); - - if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) { - _hash_wrtbuf(rel, rbuf); - /* wbuf is already released */ + /* + * if there aren't any overflow pages, there's nothing to squeeze. + */ + if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) + { + _hash_relbuf(rel, wbuf, HASH_WRITE); return; - } - - wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); - wpage = BufferGetPage(wbuf); - _hash_checkpage(wpage, LH_OVERFLOW_PAGE); - Assert(!PageIsEmpty(wpage)); - wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); - Assert(wopaque->hasho_bucket == bucket); } - - /* - * if we're here, we have found room so insert on the "write" - * page. - */ - woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage)); - PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED); - - /* - * delete the tuple from the "read" page. - * PageIndexTupleDelete repacks the ItemId array, so 'roffnum' - * will be "advanced" to the "next" ItemId. + + /* + * find the last page in the bucket chain by starting at the base + * bucket page and working forward. + * + * XXX if chains tend to be long, we should probably move forward using + * HASH_READ and then _hash_chgbufaccess to HASH_WRITE when we reach + * the end. if they are short we probably don't care very much. if + * the hash function is working at all, they had better be short.. */ - PageIndexTupleDelete(rpage, roffnum); - _hash_wrtnorelbuf(rel, rbuf); - + ropaque = wopaque; + do + { + rblkno = ropaque->hasho_nextblkno; + if (ropaque != wopaque) + { + _hash_relbuf(rel, rbuf, HASH_WRITE); + } + rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); + rpage = BufferGetPage(rbuf); + _hash_checkpage(rpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(rpage)); + ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); + Assert(ropaque->hasho_bucket == bucket); + } while (BlockNumberIsValid(ropaque->hasho_nextblkno)); + /* - * if the "read" page is now empty because of the deletion, - * free it. + * squeeze the tuples. */ - if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) { - rblkno = ropaque->hasho_prevblkno; - Assert(BlockNumberIsValid(rblkno)); - - /* - * free this overflow page. the extra _hash_relbuf is - * because _hash_freeovflpage gratuitously returns the - * next page (we want the previous page and will get it - * ourselves later). - */ - rbuf = _hash_freeovflpage(rel, rbuf); - if (BufferIsValid(rbuf)) { - _hash_relbuf(rel, rbuf, HASH_WRITE); - } - - if (rblkno == wblkno) { - /* rbuf is already released */ - _hash_wrtbuf(rel, wbuf); - return; - } - - rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); - rpage = BufferGetPage(rbuf); - _hash_checkpage(rpage, LH_OVERFLOW_PAGE); - Assert(!PageIsEmpty(rpage)); - ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); - Assert(ropaque->hasho_bucket == bucket); - - roffnum = FirstOffsetNumber; + roffnum = FirstOffsetNumber; + for (;;) + { + hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum)); + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + + /* + * walk up the bucket chain, looking for a page big enough for + * this item. + */ + while (PageGetFreeSpace(wpage) < itemsz) + { + wblkno = wopaque->hasho_nextblkno; + + _hash_wrtbuf(rel, wbuf); + + if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) + { + _hash_wrtbuf(rel, rbuf); + /* wbuf is already released */ + return; + } + + wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); + wpage = BufferGetPage(wbuf); + _hash_checkpage(wpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(wpage)); + wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); + Assert(wopaque->hasho_bucket == bucket); + } + + /* + * if we're here, we have found room so insert on the "write" + * page. + */ + woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage)); + PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED); + + /* + * delete the tuple from the "read" page. PageIndexTupleDelete + * repacks the ItemId array, so 'roffnum' will be "advanced" to + * the "next" ItemId. + */ + PageIndexTupleDelete(rpage, roffnum); + _hash_wrtnorelbuf(rel, rbuf); + + /* + * if the "read" page is now empty because of the deletion, free + * it. + */ + if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) + { + rblkno = ropaque->hasho_prevblkno; + Assert(BlockNumberIsValid(rblkno)); + + /* + * free this overflow page. the extra _hash_relbuf is because + * _hash_freeovflpage gratuitously returns the next page (we + * want the previous page and will get it ourselves later). + */ + rbuf = _hash_freeovflpage(rel, rbuf); + if (BufferIsValid(rbuf)) + { + _hash_relbuf(rel, rbuf, HASH_WRITE); + } + + if (rblkno == wblkno) + { + /* rbuf is already released */ + _hash_wrtbuf(rel, wbuf); + return; + } + + rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); + rpage = BufferGetPage(rbuf); + _hash_checkpage(rpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(rpage)); + ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); + Assert(ropaque->hasho_bucket == bucket); + + roffnum = FirstOffsetNumber; + } } - } } diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 49c8f03f524..6c819b652d2 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -1,30 +1,30 @@ /*------------------------------------------------------------------------- * * hashpage.c-- - * Hash table page management code for the Postgres hash access method + * Hash table page management code for the Postgres hash access method * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.9 1997/08/18 20:51:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.10 1997/09/07 04:38:00 momjian Exp $ * * NOTES - * Postgres hash pages look like ordinary relation pages. The opaque - * data at high addresses includes information about the page including - * whether a page is an overflow page or a true bucket, the block - * numbers of the preceding and following pages, and the overflow - * address of the page if it is an overflow page. + * Postgres hash pages look like ordinary relation pages. The opaque + * data at high addresses includes information about the page including + * whether a page is an overflow page or a true bucket, the block + * numbers of the preceding and following pages, and the overflow + * address of the page if it is an overflow page. * - * The first page in a hash relation, page zero, is special -- it stores - * information describing the hash table; it is referred to as teh - * "meta page." Pages one and higher store the actual data. + * The first page in a hash relation, page zero, is special -- it stores + * information describing the hash table; it is referred to as teh + * "meta page." Pages one and higher store the actual data. * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #include <miscadmin.h> @@ -33,411 +33,429 @@ #include <access/genam.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access); -static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access); -static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket); - -/* - * We use high-concurrency locking on hash indices. There are two cases in - * which we don't do locking. One is when we're building the index. - * Since the creating transaction has not committed, no one can see - * the index, and there's no reason to share locks. The second case - * is when we're just starting up the database system. We use some - * special-purpose initialization code in the relation cache manager - * (see utils/cache/relcache.c) to allow us to do indexed scans on - * the system catalogs before we'd normally be able to. This happens - * before the lock table is fully initialized, so we can't use it. - * Strictly speaking, this violates 2pl, but we don't do 2pl on the - * system catalogs anyway. +static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access); +static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access); +static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket); + +/* + * We use high-concurrency locking on hash indices. There are two cases in + * which we don't do locking. One is when we're building the index. + * Since the creating transaction has not committed, no one can see + * the index, and there's no reason to share locks. The second case + * is when we're just starting up the database system. We use some + * special-purpose initialization code in the relation cache manager + * (see utils/cache/relcache.c) to allow us to do indexed scans on + * the system catalogs before we'd normally be able to. This happens + * before the lock table is fully initialized, so we can't use it. + * Strictly speaking, this violates 2pl, but we don't do 2pl on the + * system catalogs anyway. */ -#define USELOCKING (!BuildingHash && !IsInitProcessingMode()) +#define USELOCKING (!BuildingHash && !IsInitProcessingMode()) /* - * _hash_metapinit() -- Initialize the metadata page of a hash index, - * the two buckets that we begin with and the initial - * bitmap page. + * _hash_metapinit() -- Initialize the metadata page of a hash index, + * the two buckets that we begin with and the initial + * bitmap page. */ void _hash_metapinit(Relation rel) { - HashMetaPage metap; - HashPageOpaque pageopaque; - Buffer metabuf; - Buffer buf; - Page pg; - int nbuckets; - uint32 nelem; /* number elements */ - uint32 lg2nelem; /* _hash_log2(nelem) */ - uint32 nblocks; - uint16 i; - - /* can't be sharing this with anyone, now... */ - if (USELOCKING) - RelationSetLockForWrite(rel); - - if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) { - elog(WARN, "Cannot initialize non-empty hash table %s", - RelationGetRelationName(rel)); - } - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); - pg = BufferGetPage(metabuf); - metap = (HashMetaPage) pg; - _hash_pageinit(pg, BufferGetPageSize(metabuf)); - - metap->hashm_magic = HASH_MAGIC; - metap->hashm_version = HASH_VERSION; - metap->hashm_nkeys = 0; - metap->hashm_nmaps = 0; - metap->hashm_ffactor = DEFAULT_FFACTOR; - metap->hashm_bsize = BufferGetPageSize(metabuf); - metap->hashm_bshift = _hash_log2(metap->hashm_bsize); - for (i = metap->hashm_bshift; i > 0; --i) { - if ((1 << i) < (metap->hashm_bsize - - (DOUBLEALIGN(sizeof(PageHeaderData)) + - DOUBLEALIGN(sizeof(HashPageOpaqueData))))) { - break; + HashMetaPage metap; + HashPageOpaque pageopaque; + Buffer metabuf; + Buffer buf; + Page pg; + int nbuckets; + uint32 nelem; /* number elements */ + uint32 lg2nelem; /* _hash_log2(nelem) */ + uint32 nblocks; + uint16 i; + + /* can't be sharing this with anyone, now... */ + if (USELOCKING) + RelationSetLockForWrite(rel); + + if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) + { + elog(WARN, "Cannot initialize non-empty hash table %s", + RelationGetRelationName(rel)); + } + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + pg = BufferGetPage(metabuf); + metap = (HashMetaPage) pg; + _hash_pageinit(pg, BufferGetPageSize(metabuf)); + + metap->hashm_magic = HASH_MAGIC; + metap->hashm_version = HASH_VERSION; + metap->hashm_nkeys = 0; + metap->hashm_nmaps = 0; + metap->hashm_ffactor = DEFAULT_FFACTOR; + metap->hashm_bsize = BufferGetPageSize(metabuf); + metap->hashm_bshift = _hash_log2(metap->hashm_bsize); + for (i = metap->hashm_bshift; i > 0; --i) + { + if ((1 << i) < (metap->hashm_bsize - + (DOUBLEALIGN(sizeof(PageHeaderData)) + + DOUBLEALIGN(sizeof(HashPageOpaqueData))))) + { + break; + } } - } - Assert(i); - metap->hashm_bmsize = 1 << i; - metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); - - /* - * Make nelem = 2 rather than 0 so that we end up allocating space - * for the next greater power of two number of buckets. - */ - nelem = 2; - lg2nelem = 1; /*_hash_log2(MAX(nelem, 2)) */ - nbuckets = 2; /*1 << lg2nelem */ - - memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares)); - memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp)); - - metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */ - metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */ - metap->hashm_ovflpoint = 1; /* lg2nelem */ - metap->hashm_lastfreed = 2; - - metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ - metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ - - pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); - pageopaque->hasho_oaddr = InvalidOvflAddress; - pageopaque->hasho_prevblkno = InvalidBlockNumber; - pageopaque->hasho_nextblkno = InvalidBlockNumber; - pageopaque->hasho_flag = LH_META_PAGE; - pageopaque->hasho_bucket = -1; - - /* - * First bitmap page is at: splitpoint lg2nelem page offset 1 which - * turns out to be page 3. Couldn't initialize page 3 until we created - * the first two buckets above. - */ - if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0)) - elog(WARN, "Problem with _hash_initbitmap."); - - /* all done */ - _hash_wrtnorelbuf(rel, metabuf); - - /* - * initialize the first two buckets - */ - for (i = 0; i <= 1; i++) { - buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE); - pg = BufferGetPage(buf); - _hash_pageinit(pg, BufferGetPageSize(buf)); + Assert(i); + metap->hashm_bmsize = 1 << i; + metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); + + /* + * Make nelem = 2 rather than 0 so that we end up allocating space for + * the next greater power of two number of buckets. + */ + nelem = 2; + lg2nelem = 1; /* _hash_log2(MAX(nelem, 2)) */ + nbuckets = 2; /* 1 << lg2nelem */ + + memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares)); + memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp)); + + metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */ + metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */ + metap->hashm_ovflpoint = 1; /* lg2nelem */ + metap->hashm_lastfreed = 2; + + metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ + metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ + pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque->hasho_oaddr = InvalidOvflAddress; pageopaque->hasho_prevblkno = InvalidBlockNumber; pageopaque->hasho_nextblkno = InvalidBlockNumber; - pageopaque->hasho_flag = LH_BUCKET_PAGE; - pageopaque->hasho_bucket = i; - _hash_wrtbuf(rel, buf); - } - - _hash_relbuf(rel, metabuf, HASH_WRITE); - - if (USELOCKING) - RelationUnsetLockForWrite(rel); + pageopaque->hasho_flag = LH_META_PAGE; + pageopaque->hasho_bucket = -1; + + /* + * First bitmap page is at: splitpoint lg2nelem page offset 1 which + * turns out to be page 3. Couldn't initialize page 3 until we + * created the first two buckets above. + */ + if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0)) + elog(WARN, "Problem with _hash_initbitmap."); + + /* all done */ + _hash_wrtnorelbuf(rel, metabuf); + + /* + * initialize the first two buckets + */ + for (i = 0; i <= 1; i++) + { + buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE); + pg = BufferGetPage(buf); + _hash_pageinit(pg, BufferGetPageSize(buf)); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); + pageopaque->hasho_oaddr = InvalidOvflAddress; + pageopaque->hasho_prevblkno = InvalidBlockNumber; + pageopaque->hasho_nextblkno = InvalidBlockNumber; + pageopaque->hasho_flag = LH_BUCKET_PAGE; + pageopaque->hasho_bucket = i; + _hash_wrtbuf(rel, buf); + } + + _hash_relbuf(rel, metabuf, HASH_WRITE); + + if (USELOCKING) + RelationUnsetLockForWrite(rel); } /* - * _hash_getbuf() -- Get a buffer by block number for read or write. + * _hash_getbuf() -- Get a buffer by block number for read or write. * - * When this routine returns, the appropriate lock is set on the - * requested buffer its reference count is correct. + * When this routine returns, the appropriate lock is set on the + * requested buffer its reference count is correct. * - * XXX P_NEW is not used because, unlike the tree structures, we - * need the bucket blocks to be at certain block numbers. we must - * depend on the caller to call _hash_pageinit on the block if it - * knows that this is a new block. + * XXX P_NEW is not used because, unlike the tree structures, we + * need the bucket blocks to be at certain block numbers. we must + * depend on the caller to call _hash_pageinit on the block if it + * knows that this is a new block. */ Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access) { - Buffer buf; - - if (blkno == P_NEW) { - elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW"); - } - switch (access) { - case HASH_WRITE: - case HASH_READ: - _hash_setpagelock(rel, blkno, access); - break; - default: - elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %s", - access, RelationGetRelationName(rel)); - break; - } - buf = ReadBuffer(rel, blkno); - - /* ref count and lock type are correct */ - return (buf); + Buffer buf; + + if (blkno == P_NEW) + { + elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW"); + } + switch (access) + { + case HASH_WRITE: + case HASH_READ: + _hash_setpagelock(rel, blkno, access); + break; + default: + elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %s", + access, RelationGetRelationName(rel)); + break; + } + buf = ReadBuffer(rel, blkno); + + /* ref count and lock type are correct */ + return (buf); } /* - * _hash_relbuf() -- release a locked buffer. + * _hash_relbuf() -- release a locked buffer. */ void _hash_relbuf(Relation rel, Buffer buf, int access) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - - switch (access) { - case HASH_WRITE: - case HASH_READ: - _hash_unsetpagelock(rel, blkno, access); - break; - default: - elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %s", - access, blkno, RelationGetRelationName(rel)); - } - - ReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + + switch (access) + { + case HASH_WRITE: + case HASH_READ: + _hash_unsetpagelock(rel, blkno, access); + break; + default: + elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %s", + access, blkno, RelationGetRelationName(rel)); + } + + ReleaseBuffer(buf); } /* - * _hash_wrtbuf() -- write a hash page to disk. + * _hash_wrtbuf() -- write a hash page to disk. * - * This routine releases the lock held on the buffer and our reference - * to it. It is an error to call _hash_wrtbuf() without a write lock - * or a reference to the buffer. + * This routine releases the lock held on the buffer and our reference + * to it. It is an error to call _hash_wrtbuf() without a write lock + * or a reference to the buffer. */ void _hash_wrtbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteBuffer(buf); - _hash_unsetpagelock(rel, blkno, HASH_WRITE); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteBuffer(buf); + _hash_unsetpagelock(rel, blkno, HASH_WRITE); } /* - * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release - * our reference or lock. + * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release + * our reference or lock. * - * It is an error to call _hash_wrtnorelbuf() without a write lock - * or a reference to the buffer. + * It is an error to call _hash_wrtnorelbuf() without a write lock + * or a reference to the buffer. */ void _hash_wrtnorelbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteNoReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteNoReleaseBuffer(buf); } Page _hash_chgbufaccess(Relation rel, - Buffer *bufp, - int from_access, - int to_access) + Buffer * bufp, + int from_access, + int to_access) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(*bufp); - - switch (from_access) { - case HASH_WRITE: - _hash_wrtbuf(rel, *bufp); - break; - case HASH_READ: - _hash_relbuf(rel, *bufp, from_access); - break; - default: - elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %s", - from_access, blkno, RelationGetRelationName(rel)); - break; - } - *bufp = _hash_getbuf(rel, blkno, to_access); - return (BufferGetPage(*bufp)); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(*bufp); + + switch (from_access) + { + case HASH_WRITE: + _hash_wrtbuf(rel, *bufp); + break; + case HASH_READ: + _hash_relbuf(rel, *bufp, from_access); + break; + default: + elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %s", + from_access, blkno, RelationGetRelationName(rel)); + break; + } + *bufp = _hash_getbuf(rel, blkno, to_access); + return (BufferGetPage(*bufp)); } /* - * _hash_pageinit() -- Initialize a new page. + * _hash_pageinit() -- Initialize a new page. */ void _hash_pageinit(Page page, Size size) { - Assert(((PageHeader) page)->pd_lower == 0); - Assert(((PageHeader) page)->pd_upper == 0); - Assert(((PageHeader) page)->pd_special == 0); - - /* - * Cargo-cult programming -- don't really need this to be zero, but - * creating new pages is an infrequent occurrence and it makes me feel - * good when I know they're empty. - */ - memset(page, 0, size); - - PageInit(page, size, sizeof(HashPageOpaqueData)); + Assert(((PageHeader) page)->pd_lower == 0); + Assert(((PageHeader) page)->pd_upper == 0); + Assert(((PageHeader) page)->pd_special == 0); + + /* + * Cargo-cult programming -- don't really need this to be zero, but + * creating new pages is an infrequent occurrence and it makes me feel + * good when I know they're empty. + */ + memset(page, 0, size); + + PageInit(page, size, sizeof(HashPageOpaqueData)); } static void _hash_setpagelock(Relation rel, - BlockNumber blkno, - int access) + BlockNumber blkno, + int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, 1); - - switch (access) { - case HASH_WRITE: - RelationSetSingleWLockPage(rel, &iptr); - break; - case HASH_READ: - RelationSetSingleRLockPage(rel, &iptr); - break; - default: - elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %s", - access, blkno, RelationGetRelationName(rel)); - break; + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, 1); + + switch (access) + { + case HASH_WRITE: + RelationSetSingleWLockPage(rel, &iptr); + break; + case HASH_READ: + RelationSetSingleRLockPage(rel, &iptr); + break; + default: + elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %s", + access, blkno, RelationGetRelationName(rel)); + break; + } } - } } static void _hash_unsetpagelock(Relation rel, - BlockNumber blkno, - int access) + BlockNumber blkno, + int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, 1); - - switch (access) { - case HASH_WRITE: - RelationUnsetSingleWLockPage(rel, &iptr); - break; - case HASH_READ: - RelationUnsetSingleRLockPage(rel, &iptr); - break; - default: - elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %s", - access, blkno, RelationGetRelationName(rel)); - break; + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, 1); + + switch (access) + { + case HASH_WRITE: + RelationUnsetSingleWLockPage(rel, &iptr); + break; + case HASH_READ: + RelationUnsetSingleRLockPage(rel, &iptr); + break; + default: + elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %s", + access, blkno, RelationGetRelationName(rel)); + break; + } } - } } void _hash_pagedel(Relation rel, ItemPointer tid) { - Buffer buf; - Buffer metabuf; - Page page; - BlockNumber blkno; - OffsetNumber offno; - HashMetaPage metap; - HashPageOpaque opaque; - - blkno = ItemPointerGetBlockNumber(tid); - offno = ItemPointerGetOffsetNumber(tid); - - buf = _hash_getbuf(rel, blkno, HASH_WRITE); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - - PageIndexTupleDelete(page, offno); - _hash_wrtnorelbuf(rel, buf); - - if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) { - buf = _hash_freeovflpage(rel, buf); - if (BufferIsValid(buf)) { - _hash_relbuf(rel, buf, HASH_WRITE); + Buffer buf; + Buffer metabuf; + Page page; + BlockNumber blkno; + OffsetNumber offno; + HashMetaPage metap; + HashPageOpaque opaque; + + blkno = ItemPointerGetBlockNumber(tid); + offno = ItemPointerGetOffsetNumber(tid); + + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + + PageIndexTupleDelete(page, offno); + _hash_wrtnorelbuf(rel, buf); + + if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) + { + buf = _hash_freeovflpage(rel, buf); + if (BufferIsValid(buf)) + { + _hash_relbuf(rel, buf, HASH_WRITE); + } } - } else { - _hash_relbuf(rel, buf, HASH_WRITE); - } - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - ++metap->hashm_nkeys; - _hash_wrtbuf(rel, metabuf); + else + { + _hash_relbuf(rel, buf, HASH_WRITE); + } + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + ++metap->hashm_nkeys; + _hash_wrtbuf(rel, metabuf); } void _hash_expandtable(Relation rel, Buffer metabuf) { - HashMetaPage metap; - Bucket old_bucket; - Bucket new_bucket; - uint32 spare_ndx; - -/* elog(DEBUG, "_hash_expandtable: expanding..."); */ - - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - new_bucket = ++metap->MAX_BUCKET; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); - old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK); - - /* - * If the split point is increasing (MAX_BUCKET's log base 2 - * * increases), we need to copy the current contents of the spare - * split bucket to the next bucket. - */ - spare_ndx = _hash_log2(metap->MAX_BUCKET + 1); - if (spare_ndx > metap->OVFL_POINT) { - - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT]; - metap->OVFL_POINT = spare_ndx; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); - } - - if (new_bucket > metap->HIGH_MASK) { - - /* Starting a new doubling */ - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - metap->LOW_MASK = metap->HIGH_MASK; - metap->HIGH_MASK = new_bucket | metap->LOW_MASK; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); - - } - /* Relocate records to the new bucket */ - _hash_splitpage(rel, metabuf, old_bucket, new_bucket); + HashMetaPage metap; + Bucket old_bucket; + Bucket new_bucket; + uint32 spare_ndx; + +/* elog(DEBUG, "_hash_expandtable: expanding..."); */ + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + new_bucket = ++metap->MAX_BUCKET; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK); + + /* + * If the split point is increasing (MAX_BUCKET's log base 2 * + * increases), we need to copy the current contents of the spare split + * bucket to the next bucket. + */ + spare_ndx = _hash_log2(metap->MAX_BUCKET + 1); + if (spare_ndx > metap->OVFL_POINT) + { + + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT]; + metap->OVFL_POINT = spare_ndx; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + } + + if (new_bucket > metap->HIGH_MASK) + { + + /* Starting a new doubling */ + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + metap->LOW_MASK = metap->HIGH_MASK; + metap->HIGH_MASK = new_bucket | metap->LOW_MASK; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + + } + /* Relocate records to the new bucket */ + _hash_splitpage(rel, metabuf, old_bucket, new_bucket); } @@ -450,224 +468,243 @@ _hash_expandtable(Relation rel, Buffer metabuf) */ static void _hash_splitpage(Relation rel, - Buffer metabuf, - Bucket obucket, - Bucket nbucket) + Buffer metabuf, + Bucket obucket, + Bucket nbucket) { - Bucket bucket; - Buffer obuf; - Buffer nbuf; - Buffer ovflbuf; - BlockNumber oblkno; - BlockNumber nblkno; - bool null; - Datum datum; - HashItem hitem; - HashPageOpaque oopaque; - HashPageOpaque nopaque; - HashMetaPage metap; - IndexTuple itup; - int itemsz; - OffsetNumber ooffnum; - OffsetNumber noffnum; - OffsetNumber omaxoffnum; - Page opage; - Page npage; - TupleDesc itupdesc; - -/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d", - obucket, obucket, nbucket); + Bucket bucket; + Buffer obuf; + Buffer nbuf; + Buffer ovflbuf; + BlockNumber oblkno; + BlockNumber nblkno; + bool null; + Datum datum; + HashItem hitem; + HashPageOpaque oopaque; + HashPageOpaque nopaque; + HashMetaPage metap; + IndexTuple itup; + int itemsz; + OffsetNumber ooffnum; + OffsetNumber noffnum; + OffsetNumber omaxoffnum; + Page opage; + Page npage; + TupleDesc itupdesc; + +/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d", + obucket, obucket, nbucket); */ - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* get the buffers & pages */ - oblkno = BUCKET_TO_BLKNO(obucket); - nblkno = BUCKET_TO_BLKNO(nbucket); - obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); - nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE); - opage = BufferGetPage(obuf); - npage = BufferGetPage(nbuf); - - /* initialize the new bucket */ - _hash_pageinit(npage, BufferGetPageSize(nbuf)); - nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); - nopaque->hasho_prevblkno = InvalidBlockNumber; - nopaque->hasho_nextblkno = InvalidBlockNumber; - nopaque->hasho_flag = LH_BUCKET_PAGE; - nopaque->hasho_oaddr = InvalidOvflAddress; - nopaque->hasho_bucket = nbucket; - _hash_wrtnorelbuf(rel, nbuf); - - /* - * make sure the old bucket isn't empty. advance 'opage' and - * friends through the overflow bucket chain until we find a - * non-empty page. - * - * XXX we should only need this once, if we are careful to - * preserve the invariant that overflow pages are never empty. - */ - _hash_checkpage(opage, LH_BUCKET_PAGE); - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - if (PageIsEmpty(opage)) { - oblkno = oopaque->hasho_nextblkno; - _hash_relbuf(rel, obuf, HASH_WRITE); - if (!BlockNumberIsValid(oblkno)) { - /* - * the old bucket is completely empty; of course, the new - * bucket will be as well, but since it's a base bucket - * page we don't care. - */ - _hash_relbuf(rel, nbuf, HASH_WRITE); - return; - } + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* get the buffers & pages */ + oblkno = BUCKET_TO_BLKNO(obucket); + nblkno = BUCKET_TO_BLKNO(nbucket); obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); + nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE); opage = BufferGetPage(obuf); - _hash_checkpage(opage, LH_OVERFLOW_PAGE); - if (PageIsEmpty(opage)) { - elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno); - } - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - } - - /* - * we are now guaranteed that 'opage' is not empty. partition the - * tuples in the old bucket between the old bucket and the new - * bucket, advancing along their respective overflow bucket chains - * and adding overflow pages as needed. - */ - ooffnum = FirstOffsetNumber; - omaxoffnum = PageGetMaxOffsetNumber(opage); - for (;;) { + npage = BufferGetPage(nbuf); + + /* initialize the new bucket */ + _hash_pageinit(npage, BufferGetPageSize(nbuf)); + nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); + nopaque->hasho_prevblkno = InvalidBlockNumber; + nopaque->hasho_nextblkno = InvalidBlockNumber; + nopaque->hasho_flag = LH_BUCKET_PAGE; + nopaque->hasho_oaddr = InvalidOvflAddress; + nopaque->hasho_bucket = nbucket; + _hash_wrtnorelbuf(rel, nbuf); + /* - * at each iteration through this loop, each of these variables - * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum + * make sure the old bucket isn't empty. advance 'opage' and friends + * through the overflow bucket chain until we find a non-empty page. + * + * XXX we should only need this once, if we are careful to preserve the + * invariant that overflow pages are never empty. */ + _hash_checkpage(opage, LH_BUCKET_PAGE); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + if (PageIsEmpty(opage)) + { + oblkno = oopaque->hasho_nextblkno; + _hash_relbuf(rel, obuf, HASH_WRITE); + if (!BlockNumberIsValid(oblkno)) + { - /* check if we're at the end of the page */ - if (ooffnum > omaxoffnum) { - /* at end of page, but check for overflow page */ - oblkno = oopaque->hasho_nextblkno; - if (BlockNumberIsValid(oblkno)) { - /* - * we ran out of tuples on this particular page, but - * we have more overflow pages; re-init values. - */ - _hash_wrtbuf(rel, obuf); + /* + * the old bucket is completely empty; of course, the new + * bucket will be as well, but since it's a base bucket page + * we don't care. + */ + _hash_relbuf(rel, nbuf, HASH_WRITE); + return; + } obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); opage = BufferGetPage(obuf); _hash_checkpage(opage, LH_OVERFLOW_PAGE); - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - - /* we're guaranteed that an ovfl page has at least 1 tuple */ - if (PageIsEmpty(opage)) { - elog(WARN, "_hash_splitpage: empty ovfl page %d!", - oblkno); + if (PageIsEmpty(opage)) + { + elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno); } - ooffnum = FirstOffsetNumber; - omaxoffnum = PageGetMaxOffsetNumber(opage); - } else { + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + } + + /* + * we are now guaranteed that 'opage' is not empty. partition the + * tuples in the old bucket between the old bucket and the new bucket, + * advancing along their respective overflow bucket chains and adding + * overflow pages as needed. + */ + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + for (;;) + { + /* - * we're at the end of the bucket chain, so now we're - * really done with everything. before quitting, call - * _hash_squeezebucket to ensure the tuples in the - * bucket (including the overflow pages) are packed as - * tightly as possible. + * at each iteration through this loop, each of these variables + * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum */ - _hash_wrtbuf(rel, obuf); - _hash_wrtbuf(rel, nbuf); - _hash_squeezebucket(rel, metap, obucket); - return; - } - } - - /* hash on the tuple */ - hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); - itup = &(hitem->hash_itup); - itupdesc = RelationGetTupleDescriptor(rel); - datum = index_getattr(itup, 1, itupdesc, &null); - bucket = _hash_call(rel, metap, datum); - - if (bucket == nbucket) { - /* - * insert the tuple into the new bucket. if it doesn't - * fit on the current page in the new bucket, we must - * allocate a new overflow page and place the tuple on - * that page instead. - */ - itemsz = IndexTupleDSize(hitem->hash_itup) - + (sizeof(HashItemData) - sizeof(IndexTupleData)); - - itemsz = DOUBLEALIGN(itemsz); - - if (PageGetFreeSpace(npage) < itemsz) { - ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf); - _hash_wrtbuf(rel, nbuf); - nbuf = ovflbuf; - npage = BufferGetPage(nbuf); - _hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - } - - noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage)); - PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED); - _hash_wrtnorelbuf(rel, nbuf); - - /* - * now delete the tuple from the old bucket. after this - * section of code, 'ooffnum' will actually point to the - * ItemId to which we would point if we had advanced it - * before the deletion (PageIndexTupleDelete repacks the - * ItemId array). this also means that 'omaxoffnum' is - * exactly one less than it used to be, so we really can - * just decrement it instead of calling - * PageGetMaxOffsetNumber. - */ - PageIndexTupleDelete(opage, ooffnum); - _hash_wrtnorelbuf(rel, obuf); - omaxoffnum = OffsetNumberPrev(omaxoffnum); - - /* - * tidy up. if the old page was an overflow page and it - * is now empty, we must free it (we want to preserve the - * invariant that overflow pages cannot be empty). - */ - if (PageIsEmpty(opage) && - (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) { - obuf = _hash_freeovflpage(rel, obuf); - - /* check that we're not through the bucket chain */ - if (BufferIsInvalid(obuf)) { - _hash_wrtbuf(rel, nbuf); - _hash_squeezebucket(rel, metap, obucket); - return; + + /* check if we're at the end of the page */ + if (ooffnum > omaxoffnum) + { + /* at end of page, but check for overflow page */ + oblkno = oopaque->hasho_nextblkno; + if (BlockNumberIsValid(oblkno)) + { + + /* + * we ran out of tuples on this particular page, but we + * have more overflow pages; re-init values. + */ + _hash_wrtbuf(rel, obuf); + obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); + opage = BufferGetPage(obuf); + _hash_checkpage(opage, LH_OVERFLOW_PAGE); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + + /* we're guaranteed that an ovfl page has at least 1 tuple */ + if (PageIsEmpty(opage)) + { + elog(WARN, "_hash_splitpage: empty ovfl page %d!", + oblkno); + } + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + } + else + { + + /* + * we're at the end of the bucket chain, so now we're + * really done with everything. before quitting, call + * _hash_squeezebucket to ensure the tuples in the bucket + * (including the overflow pages) are packed as tightly as + * possible. + */ + _hash_wrtbuf(rel, obuf); + _hash_wrtbuf(rel, nbuf); + _hash_squeezebucket(rel, metap, obucket); + return; + } } - - /* - * re-init. again, we're guaranteed that an ovfl page - * has at least one tuple. - */ - opage = BufferGetPage(obuf); - _hash_checkpage(opage, LH_OVERFLOW_PAGE); - oblkno = BufferGetBlockNumber(obuf); - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - if (PageIsEmpty(opage)) { - elog(WARN, "_hash_splitpage: empty overflow page %d", - oblkno); + + /* hash on the tuple */ + hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); + itup = &(hitem->hash_itup); + itupdesc = RelationGetTupleDescriptor(rel); + datum = index_getattr(itup, 1, itupdesc, &null); + bucket = _hash_call(rel, metap, datum); + + if (bucket == nbucket) + { + + /* + * insert the tuple into the new bucket. if it doesn't fit on + * the current page in the new bucket, we must allocate a new + * overflow page and place the tuple on that page instead. + */ + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + + itemsz = DOUBLEALIGN(itemsz); + + if (PageGetFreeSpace(npage) < itemsz) + { + ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf); + _hash_wrtbuf(rel, nbuf); + nbuf = ovflbuf; + npage = BufferGetPage(nbuf); + _hash_checkpage(npage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + } + + noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage)); + PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED); + _hash_wrtnorelbuf(rel, nbuf); + + /* + * now delete the tuple from the old bucket. after this + * section of code, 'ooffnum' will actually point to the + * ItemId to which we would point if we had advanced it before + * the deletion (PageIndexTupleDelete repacks the ItemId + * array). this also means that 'omaxoffnum' is exactly one + * less than it used to be, so we really can just decrement it + * instead of calling PageGetMaxOffsetNumber. + */ + PageIndexTupleDelete(opage, ooffnum); + _hash_wrtnorelbuf(rel, obuf); + omaxoffnum = OffsetNumberPrev(omaxoffnum); + + /* + * tidy up. if the old page was an overflow page and it is + * now empty, we must free it (we want to preserve the + * invariant that overflow pages cannot be empty). + */ + if (PageIsEmpty(opage) && + (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) + { + obuf = _hash_freeovflpage(rel, obuf); + + /* check that we're not through the bucket chain */ + if (BufferIsInvalid(obuf)) + { + _hash_wrtbuf(rel, nbuf); + _hash_squeezebucket(rel, metap, obucket); + return; + } + + /* + * re-init. again, we're guaranteed that an ovfl page has + * at least one tuple. + */ + opage = BufferGetPage(obuf); + _hash_checkpage(opage, LH_OVERFLOW_PAGE); + oblkno = BufferGetBlockNumber(obuf); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + if (PageIsEmpty(opage)) + { + elog(WARN, "_hash_splitpage: empty overflow page %d", + oblkno); + } + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + } + } + else + { + + /* + * the tuple stays on this page. we didn't move anything, so + * we didn't delete anything and therefore we don't have to + * change 'omaxoffnum'. + * + * XXX any hash value from [0, nbucket-1] will map to this + * bucket, which doesn't make sense to me. + */ + ooffnum = OffsetNumberNext(ooffnum); } - ooffnum = FirstOffsetNumber; - omaxoffnum = PageGetMaxOffsetNumber(opage); - } - } else { - /* - * the tuple stays on this page. we didn't move anything, - * so we didn't delete anything and therefore we don't - * have to change 'omaxoffnum'. - * - * XXX any hash value from [0, nbucket-1] will map to this - * bucket, which doesn't make sense to me. - */ - ooffnum = OffsetNumberNext(ooffnum); } - } - /*NOTREACHED*/ + /* NOTREACHED */ } diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c index bd776d68c0d..79fa33f747c 100644 --- a/src/backend/access/hash/hashscan.c +++ b/src/backend/access/hash/hashscan.c @@ -1,160 +1,167 @@ /*------------------------------------------------------------------------- * * hashscan.c-- - * manage scans on hash tables + * manage scans on hash tables * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.8 1996/11/15 18:36:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.9 1997/09/07 04:38:01 momjian Exp $ * * NOTES - * Because we can be doing an index scan on a relation while we - * update it, we need to avoid missing data that moves around in - * the index. The routines and global variables in this file - * guarantee that all scans in the local address space stay - * correctly positioned. This is all we need to worry about, since - * write locking guarantees that no one else will be on the same - * page at the same time as we are. + * Because we can be doing an index scan on a relation while we + * update it, we need to avoid missing data that moves around in + * the index. The routines and global variables in this file + * guarantee that all scans in the local address space stay + * correctly positioned. This is all we need to worry about, since + * write locking guarantees that no one else will be on the same + * page at the same time as we are. * - * The scheme is to manage a list of active scans in the current - * backend. Whenever we add or remove records from an index, we - * check the list of active scans to see if any has been affected. - * A scan is affected only if it is on the same relation, and the - * same page, as the update. + * The scheme is to manage a list of active scans in the current + * backend. Whenever we add or remove records from an index, we + * check the list of active scans to see if any has been affected. + * A scan is affected only if it is on the same relation, and the + * same page, as the update. * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> -static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); -static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); +static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); +static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); -typedef struct HashScanListData { - IndexScanDesc hashsl_scan; - struct HashScanListData *hashsl_next; -} HashScanListData; +typedef struct HashScanListData +{ + IndexScanDesc hashsl_scan; + struct HashScanListData *hashsl_next; +} HashScanListData; -typedef HashScanListData *HashScanList; +typedef HashScanListData *HashScanList; -static HashScanList HashScans = (HashScanList) NULL; +static HashScanList HashScans = (HashScanList) NULL; /* - * _Hash_regscan() -- register a new scan. + * _Hash_regscan() -- register a new scan. */ void _hash_regscan(IndexScanDesc scan) { - HashScanList new_el; - - new_el = (HashScanList) palloc(sizeof(HashScanListData)); - new_el->hashsl_scan = scan; - new_el->hashsl_next = HashScans; - HashScans = new_el; + HashScanList new_el; + + new_el = (HashScanList) palloc(sizeof(HashScanListData)); + new_el->hashsl_scan = scan; + new_el->hashsl_next = HashScans; + HashScans = new_el; } /* - * _hash_dropscan() -- drop a scan from the scan list + * _hash_dropscan() -- drop a scan from the scan list */ void _hash_dropscan(IndexScanDesc scan) { - HashScanList chk, last; - - last = (HashScanList) NULL; - for (chk = HashScans; - chk != (HashScanList) NULL && chk->hashsl_scan != scan; - chk = chk->hashsl_next) { - last = chk; - } - - if (chk == (HashScanList) NULL) - elog(WARN, "hash scan list trashed; can't find 0x%lx", scan); - - if (last == (HashScanList) NULL) - HashScans = chk->hashsl_next; - else - last->hashsl_next = chk->hashsl_next; - - pfree (chk); + HashScanList chk, + last; + + last = (HashScanList) NULL; + for (chk = HashScans; + chk != (HashScanList) NULL && chk->hashsl_scan != scan; + chk = chk->hashsl_next) + { + last = chk; + } + + if (chk == (HashScanList) NULL) + elog(WARN, "hash scan list trashed; can't find 0x%lx", scan); + + if (last == (HashScanList) NULL) + HashScans = chk->hashsl_next; + else + last->hashsl_next = chk->hashsl_next; + + pfree(chk); } void _hash_adjscans(Relation rel, ItemPointer tid) { - HashScanList l; - Oid relid; - - relid = rel->rd_id; - for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) { - if (relid == l->hashsl_scan->relation->rd_id) - _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid), - ItemPointerGetOffsetNumber(tid)); - } + HashScanList l; + Oid relid; + + relid = rel->rd_id; + for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) + { + if (relid == l->hashsl_scan->relation->rd_id) + _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid), + ItemPointerGetOffsetNumber(tid)); + } } static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno) { - ItemPointer current; - Buffer buf; - Buffer metabuf; - HashScanOpaque so; - - if (!_hash_scantouched(scan, blkno, offno)) - return; - - metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ); - - so = (HashScanOpaque) scan->opaque; - buf = so->hashso_curbuf; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) { - _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_curbuf = buf; - } - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) { - ItemPointerData tmp; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_mrkbuf = buf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - } + ItemPointer current; + Buffer buf; + Buffer metabuf; + HashScanOpaque so; + + if (!_hash_scantouched(scan, blkno, offno)) + return; + + metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ); + + so = (HashScanOpaque) scan->opaque; + buf = so->hashso_curbuf; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + { + _hash_step(scan, &buf, BackwardScanDirection, metabuf); + so->hashso_curbuf = buf; + } + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + { + ItemPointerData tmp; + + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; + _hash_step(scan, &buf, BackwardScanDirection, metabuf); + so->hashso_mrkbuf = buf; + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; + } } -static bool +static bool _hash_scantouched(IndexScanDesc scan, - BlockNumber blkno, - OffsetNumber offno) + BlockNumber blkno, + OffsetNumber offno) { - ItemPointer current; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return (true); - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return (true); - - return (false); + ItemPointer current; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + return (false); } diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index bc67b7f5aac..0a42ad05065 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -1,423 +1,467 @@ /*------------------------------------------------------------------------- * * hashsearch.c-- - * search code for postgres hash tables + * search code for postgres hash tables * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.10 1997/06/28 05:45:40 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.11 1997/09/07 04:38:02 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #ifndef HAVE_MEMMOVE -# include "regex/utils.h" +#include "regex/utils.h" #else -# include <string.h> -#endif +#include <string.h> +#endif /* - * _hash_search() -- Finds the page/bucket that the contains the - * scankey and loads it into *bufP. the buffer has a read lock. + * _hash_search() -- Finds the page/bucket that the contains the + * scankey and loads it into *bufP. the buffer has a read lock. */ void _hash_search(Relation rel, - int keysz, - ScanKey scankey, - Buffer *bufP, - HashMetaPage metap) + int keysz, + ScanKey scankey, + Buffer * bufP, + HashMetaPage metap) { - BlockNumber blkno; - Datum keyDatum; - Bucket bucket; - - if (scankey == (ScanKey) NULL || - (keyDatum = scankey[0].sk_argument) == (Datum) NULL) { - /* - * If the scankey argument is NULL, all tuples will satisfy - * the scan so we start the scan at the first bucket (bucket - * 0). - */ - bucket = 0; - } else { - bucket = _hash_call(rel, metap, keyDatum); - } - - blkno = BUCKET_TO_BLKNO(bucket); - - *bufP = _hash_getbuf(rel, blkno, HASH_READ); + BlockNumber blkno; + Datum keyDatum; + Bucket bucket; + + if (scankey == (ScanKey) NULL || + (keyDatum = scankey[0].sk_argument) == (Datum) NULL) + { + + /* + * If the scankey argument is NULL, all tuples will satisfy the + * scan so we start the scan at the first bucket (bucket 0). + */ + bucket = 0; + } + else + { + bucket = _hash_call(rel, metap, keyDatum); + } + + blkno = BUCKET_TO_BLKNO(bucket); + + *bufP = _hash_getbuf(rel, blkno, HASH_READ); } /* - * _hash_next() -- Get the next item in a scan. + * _hash_next() -- Get the next item in a scan. * - * On entry, we have a valid currentItemData in the scan, and a - * read lock on the page that contains that item. We do not have - * the page pinned. We return the next item in the scan. On - * exit, we have the page containing the next item locked but not - * pinned. + * On entry, we have a valid currentItemData in the scan, and a + * read lock on the page that contains that item. We do not have + * the page pinned. We return the next item in the scan. On + * exit, we have the page containing the next item locked but not + * pinned. */ RetrieveIndexResult _hash_next(IndexScanDesc scan, ScanDirection dir) { - Relation rel; - Buffer buf; - Buffer metabuf; - Page page; - OffsetNumber offnum; - RetrieveIndexResult res; - ItemPointer current; - HashItem hitem; - IndexTuple itup; - HashScanOpaque so; - - rel = scan->relation; - so = (HashScanOpaque) scan->opaque; - current = &(scan->currentItemData); - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - - /* - * XXX 10 may 91: somewhere there's a bug in our management of the - * cached buffer for this scan. wei discovered it. the following - * is a workaround so he can work until i figure out what's going on. - */ - - if (!BufferIsValid(so->hashso_curbuf)) { - so->hashso_curbuf = _hash_getbuf(rel, - ItemPointerGetBlockNumber(current), - HASH_READ); - } - - /* we still have the buffer pinned and locked */ - buf = so->hashso_curbuf; - - /* - * step to next valid tuple. note that _hash_step releases our - * lock on 'metabuf'; if we switch to a new 'buf' while looking - * for the next tuple, we come back with a lock on that buffer. - */ - if (!_hash_step(scan, &buf, dir, metabuf)) { - return ((RetrieveIndexResult) NULL); - } - - /* if we're here, _hash_step found a valid tuple */ - current = &(scan->currentItemData); - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &hitem->hash_itup; - res = FormRetrieveIndexResult(current, &(itup->t_tid)); - - return (res); + Relation rel; + Buffer buf; + Buffer metabuf; + Page page; + OffsetNumber offnum; + RetrieveIndexResult res; + ItemPointer current; + HashItem hitem; + IndexTuple itup; + HashScanOpaque so; + + rel = scan->relation; + so = (HashScanOpaque) scan->opaque; + current = &(scan->currentItemData); + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + + /* + * XXX 10 may 91: somewhere there's a bug in our management of the + * cached buffer for this scan. wei discovered it. the following is + * a workaround so he can work until i figure out what's going on. + */ + + if (!BufferIsValid(so->hashso_curbuf)) + { + so->hashso_curbuf = _hash_getbuf(rel, + ItemPointerGetBlockNumber(current), + HASH_READ); + } + + /* we still have the buffer pinned and locked */ + buf = so->hashso_curbuf; + + /* + * step to next valid tuple. note that _hash_step releases our lock + * on 'metabuf'; if we switch to a new 'buf' while looking for the + * next tuple, we come back with a lock on that buffer. + */ + if (!_hash_step(scan, &buf, dir, metabuf)) + { + return ((RetrieveIndexResult) NULL); + } + + /* if we're here, _hash_step found a valid tuple */ + current = &(scan->currentItemData); + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &hitem->hash_itup; + res = FormRetrieveIndexResult(current, &(itup->t_tid)); + + return (res); } static void _hash_readnext(Relation rel, - Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) + Buffer * bufp, Page * pagep, HashPageOpaque * opaquep) { - BlockNumber blkno; - - blkno = (*opaquep)->hasho_nextblkno; - _hash_relbuf(rel, *bufp, HASH_READ); - *bufp = InvalidBuffer; - if (BlockNumberIsValid(blkno)) { - *bufp = _hash_getbuf(rel, blkno, HASH_READ); - *pagep = BufferGetPage(*bufp); - _hash_checkpage(*pagep, LH_OVERFLOW_PAGE); - *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); - Assert(!PageIsEmpty(*pagep)); - } + BlockNumber blkno; + + blkno = (*opaquep)->hasho_nextblkno; + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + if (BlockNumberIsValid(blkno)) + { + *bufp = _hash_getbuf(rel, blkno, HASH_READ); + *pagep = BufferGetPage(*bufp); + _hash_checkpage(*pagep, LH_OVERFLOW_PAGE); + *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); + Assert(!PageIsEmpty(*pagep)); + } } static void _hash_readprev(Relation rel, - Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) + Buffer * bufp, Page * pagep, HashPageOpaque * opaquep) { - BlockNumber blkno; - - blkno = (*opaquep)->hasho_prevblkno; - _hash_relbuf(rel, *bufp, HASH_READ); - *bufp = InvalidBuffer; - if (BlockNumberIsValid(blkno)) { - *bufp = _hash_getbuf(rel, blkno, HASH_READ); - *pagep = BufferGetPage(*bufp); - _hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); - if (PageIsEmpty(*pagep)) { - Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE); - _hash_relbuf(rel, *bufp, HASH_READ); - *bufp = InvalidBuffer; + BlockNumber blkno; + + blkno = (*opaquep)->hasho_prevblkno; + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + if (BlockNumberIsValid(blkno)) + { + *bufp = _hash_getbuf(rel, blkno, HASH_READ); + *pagep = BufferGetPage(*bufp); + _hash_checkpage(*pagep, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); + if (PageIsEmpty(*pagep)) + { + Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE); + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + } } - } } /* - * _hash_first() -- Find the first item in a scan. + * _hash_first() -- Find the first item in a scan. * - * Return the RetrieveIndexResult of the first item in the tree that - * satisfies the qualificatin associated with the scan descriptor. On - * exit, the page containing the current index tuple is read locked - * and pinned, and the scan's opaque data entry is updated to - * include the buffer. + * Return the RetrieveIndexResult of the first item in the tree that + * satisfies the qualificatin associated with the scan descriptor. On + * exit, the page containing the current index tuple is read locked + * and pinned, and the scan's opaque data entry is updated to + * include the buffer. */ RetrieveIndexResult _hash_first(IndexScanDesc scan, ScanDirection dir) { - Relation rel; - Buffer buf; - Buffer metabuf; - Page page; - HashPageOpaque opaque; - HashMetaPage metap; - HashItem hitem; - IndexTuple itup; - ItemPointer current; - OffsetNumber offnum; - RetrieveIndexResult res; - HashScanOpaque so; - - rel = scan->relation; - so = (HashScanOpaque) scan->opaque; - current = &(scan->currentItemData); - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* - * XXX -- The attribute number stored in the scan key is the attno - * in the heap relation. We need to transmogrify this into - * the index relation attno here. For the moment, we have - * hardwired attno == 1. - */ - - /* find the correct bucket page and load it into buf */ - _hash_search(rel, 1, scan->keyData, &buf, metap); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - - /* - * if we are scanning forward, we need to find the first non-empty - * page (if any) in the bucket chain. since overflow pages are - * never empty, this had better be either the bucket page or the - * first overflow page. - * - * if we are scanning backward, we always go all the way to the - * end of the bucket chain. - */ - if (PageIsEmpty(page)) { - if (BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); - } else { - ItemPointerSetInvalid(current); - so->hashso_curbuf = InvalidBuffer; - /* - * If there is no scankeys, all tuples will satisfy - * the scan - so we continue in _hash_step to get - * tuples from all buckets. - vadim 04/29/97 - */ - if ( scan->numberOfKeys >= 1 ) - { - _hash_relbuf(rel, buf, HASH_READ); - _hash_relbuf(rel, metabuf, HASH_READ); - return ((RetrieveIndexResult) NULL); - } + Relation rel; + Buffer buf; + Buffer metabuf; + Page page; + HashPageOpaque opaque; + HashMetaPage metap; + HashItem hitem; + IndexTuple itup; + ItemPointer current; + OffsetNumber offnum; + RetrieveIndexResult res; + HashScanOpaque so; + + rel = scan->relation; + so = (HashScanOpaque) scan->opaque; + current = &(scan->currentItemData); + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* + * XXX -- The attribute number stored in the scan key is the attno in + * the heap relation. We need to transmogrify this into the index + * relation attno here. For the moment, we have hardwired attno == 1. + */ + + /* find the correct bucket page and load it into buf */ + _hash_search(rel, 1, scan->keyData, &buf, metap); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + + /* + * if we are scanning forward, we need to find the first non-empty + * page (if any) in the bucket chain. since overflow pages are never + * empty, this had better be either the bucket page or the first + * overflow page. + * + * if we are scanning backward, we always go all the way to the end of + * the bucket chain. + */ + if (PageIsEmpty(page)) + { + if (BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + else + { + ItemPointerSetInvalid(current); + so->hashso_curbuf = InvalidBuffer; + + /* + * If there is no scankeys, all tuples will satisfy the scan - + * so we continue in _hash_step to get tuples from all + * buckets. - vadim 04/29/97 + */ + if (scan->numberOfKeys >= 1) + { + _hash_relbuf(rel, buf, HASH_READ); + _hash_relbuf(rel, metabuf, HASH_READ); + return ((RetrieveIndexResult) NULL); + } + } } - } - if (ScanDirectionIsBackward(dir)) { - while (BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); + if (ScanDirectionIsBackward(dir)) + { + while (BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + } + + if (!_hash_step(scan, &buf, dir, metabuf)) + { + return ((RetrieveIndexResult) NULL); } - } - - if (!_hash_step(scan, &buf, dir, metabuf)) { - return ((RetrieveIndexResult) NULL); - } - - /* if we're here, _hash_step found a valid tuple */ - current = &(scan->currentItemData); - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &hitem->hash_itup; - res = FormRetrieveIndexResult(current, &(itup->t_tid)); - - return (res); + + /* if we're here, _hash_step found a valid tuple */ + current = &(scan->currentItemData); + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &hitem->hash_itup; + res = FormRetrieveIndexResult(current, &(itup->t_tid)); + + return (res); } /* - * _hash_step() -- step to the next valid item in a scan in the bucket. + * _hash_step() -- step to the next valid item in a scan in the bucket. * - * If no valid record exists in the requested direction, return - * false. Else, return true and set the CurrentItemData for the - * scan to the right thing. - * - * 'bufP' points to the buffer which contains the current page - * that we'll step through. + * If no valid record exists in the requested direction, return + * false. Else, return true and set the CurrentItemData for the + * scan to the right thing. * - * 'metabuf' is released when this returns. + * 'bufP' points to the buffer which contains the current page + * that we'll step through. + * + * 'metabuf' is released when this returns. */ bool -_hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf) +_hash_step(IndexScanDesc scan, Buffer * bufP, ScanDirection dir, Buffer metabuf) { - Relation rel; - ItemPointer current; - HashScanOpaque so; - int allbuckets; - HashMetaPage metap; - Buffer buf; - Page page; - HashPageOpaque opaque; - OffsetNumber maxoff; - OffsetNumber offnum; - Bucket bucket; - BlockNumber blkno; - HashItem hitem; - IndexTuple itup; - - rel = scan->relation; - current = &(scan->currentItemData); - so = (HashScanOpaque) scan->opaque; - allbuckets = (scan->numberOfKeys < 1); - - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - buf = *bufP; - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - - /* - * If _hash_step is called from _hash_first, current will not be - * valid, so we can't dereference it. However, in that case, we - * presumably want to start at the beginning/end of the page... - */ - maxoff = PageGetMaxOffsetNumber(page); - if (ItemPointerIsValid(current)) { - offnum = ItemPointerGetOffsetNumber(current); - } else { - offnum = InvalidOffsetNumber; - } - - /* - * 'offnum' now points to the last tuple we have seen (if any). - * - * continue to step through tuples until: - * 1) we get to the end of the bucket chain or - * 2) we find a valid tuple. - */ - do { - bucket = opaque->hasho_bucket; - - switch (dir) { - case ForwardScanDirection: - if (offnum != InvalidOffsetNumber) { - offnum = OffsetNumberNext(offnum); /* move forward */ - } else { - offnum = FirstOffsetNumber; /* new page */ - } - while (offnum > maxoff) { - /* - * either this page is empty (maxoff == - * InvalidOffsetNumber) or we ran off the end. - */ - _hash_readnext(rel, &buf, &page, &opaque); - if (BufferIsInvalid(buf)) { /* end of chain */ - if (allbuckets && bucket < metap->hashm_maxbucket) { - ++bucket; - blkno = BUCKET_TO_BLKNO(bucket); - buf = _hash_getbuf(rel, blkno, HASH_READ); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(opaque->hasho_bucket == bucket); - while (PageIsEmpty(page) && - BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); + Relation rel; + ItemPointer current; + HashScanOpaque so; + int allbuckets; + HashMetaPage metap; + Buffer buf; + Page page; + HashPageOpaque opaque; + OffsetNumber maxoff; + OffsetNumber offnum; + Bucket bucket; + BlockNumber blkno; + HashItem hitem; + IndexTuple itup; + + rel = scan->relation; + current = &(scan->currentItemData); + so = (HashScanOpaque) scan->opaque; + allbuckets = (scan->numberOfKeys < 1); + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + buf = *bufP; + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + + /* + * If _hash_step is called from _hash_first, current will not be + * valid, so we can't dereference it. However, in that case, we + * presumably want to start at the beginning/end of the page... + */ + maxoff = PageGetMaxOffsetNumber(page); + if (ItemPointerIsValid(current)) + { + offnum = ItemPointerGetOffsetNumber(current); + } + else + { + offnum = InvalidOffsetNumber; + } + + /* + * 'offnum' now points to the last tuple we have seen (if any). + * + * continue to step through tuples until: 1) we get to the end of the + * bucket chain or 2) we find a valid tuple. + */ + do + { + bucket = opaque->hasho_bucket; + + switch (dir) + { + case ForwardScanDirection: + if (offnum != InvalidOffsetNumber) + { + offnum = OffsetNumberNext(offnum); /* move forward */ } - maxoff = PageGetMaxOffsetNumber(page); - offnum = FirstOffsetNumber; - } else { - maxoff = offnum = InvalidOffsetNumber; - break; /* while */ - } - } else { - /* _hash_readnext never returns an empty page */ - maxoff = PageGetMaxOffsetNumber(page); - offnum = FirstOffsetNumber; - } - } - break; - case BackwardScanDirection: - if (offnum != InvalidOffsetNumber) { - offnum = OffsetNumberPrev(offnum); /* move back */ - } else { - offnum = maxoff; /* new page */ - } - while (offnum < FirstOffsetNumber) { - /* - * either this page is empty (offnum == - * InvalidOffsetNumber) or we ran off the end. - */ - _hash_readprev(rel, &buf, &page, &opaque); - if (BufferIsInvalid(buf)) { /* end of chain */ - if (allbuckets && bucket > 0) { - --bucket; - blkno = BUCKET_TO_BLKNO(bucket); - buf = _hash_getbuf(rel, blkno, HASH_READ); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(opaque->hasho_bucket == bucket); - while (BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); + else + { + offnum = FirstOffsetNumber; /* new page */ + } + while (offnum > maxoff) + { + + /* + * either this page is empty (maxoff == + * InvalidOffsetNumber) or we ran off the end. + */ + _hash_readnext(rel, &buf, &page, &opaque); + if (BufferIsInvalid(buf)) + { /* end of chain */ + if (allbuckets && bucket < metap->hashm_maxbucket) + { + ++bucket; + blkno = BUCKET_TO_BLKNO(bucket); + buf = _hash_getbuf(rel, blkno, HASH_READ); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(opaque->hasho_bucket == bucket); + while (PageIsEmpty(page) && + BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + maxoff = PageGetMaxOffsetNumber(page); + offnum = FirstOffsetNumber; + } + else + { + maxoff = offnum = InvalidOffsetNumber; + break; /* while */ + } + } + else + { + /* _hash_readnext never returns an empty page */ + maxoff = PageGetMaxOffsetNumber(page); + offnum = FirstOffsetNumber; + } + } + break; + case BackwardScanDirection: + if (offnum != InvalidOffsetNumber) + { + offnum = OffsetNumberPrev(offnum); /* move back */ + } + else + { + offnum = maxoff;/* new page */ } - maxoff = offnum = PageGetMaxOffsetNumber(page); - } else { - maxoff = offnum = InvalidOffsetNumber; - break; /* while */ - } - } else { - /* _hash_readprev never returns an empty page */ - maxoff = offnum = PageGetMaxOffsetNumber(page); + while (offnum < FirstOffsetNumber) + { + + /* + * either this page is empty (offnum == + * InvalidOffsetNumber) or we ran off the end. + */ + _hash_readprev(rel, &buf, &page, &opaque); + if (BufferIsInvalid(buf)) + { /* end of chain */ + if (allbuckets && bucket > 0) + { + --bucket; + blkno = BUCKET_TO_BLKNO(bucket); + buf = _hash_getbuf(rel, blkno, HASH_READ); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(opaque->hasho_bucket == bucket); + while (BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + maxoff = offnum = PageGetMaxOffsetNumber(page); + } + else + { + maxoff = offnum = InvalidOffsetNumber; + break; /* while */ + } + } + else + { + /* _hash_readprev never returns an empty page */ + maxoff = offnum = PageGetMaxOffsetNumber(page); + } + } + break; + default: + /* NoMovementScanDirection */ + /* this should not be reached */ + break; } - } - break; - default: - /* NoMovementScanDirection */ - /* this should not be reached */ - break; - } - /* we ran off the end of the world without finding a match */ - if (offnum == InvalidOffsetNumber) { - _hash_relbuf(rel, metabuf, HASH_READ); - *bufP = so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(current); - return(false); - } - - /* get ready to check this tuple */ - hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &hitem->hash_itup; - } while (!_hash_checkqual(scan, itup)); - - /* if we made it to here, we've found a valid tuple */ - _hash_relbuf(rel, metabuf, HASH_READ); - blkno = BufferGetBlockNumber(buf); - *bufP = so->hashso_curbuf = buf; - ItemPointerSet(current, blkno, offnum); - return(true); + /* we ran off the end of the world without finding a match */ + if (offnum == InvalidOffsetNumber) + { + _hash_relbuf(rel, metabuf, HASH_READ); + *bufP = so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(current); + return (false); + } + + /* get ready to check this tuple */ + hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &hitem->hash_itup; + } while (!_hash_checkqual(scan, itup)); + + /* if we made it to here, we've found a valid tuple */ + _hash_relbuf(rel, metabuf, HASH_READ); + blkno = BufferGetBlockNumber(buf); + *bufP = so->hashso_curbuf = buf; + ItemPointerSet(current, blkno, offnum); + return (true); } diff --git a/src/backend/access/hash/hashstrat.c b/src/backend/access/hash/hashstrat.c index d2f1e513c38..f1bdbdb8a3a 100644 --- a/src/backend/access/hash/hashstrat.c +++ b/src/backend/access/hash/hashstrat.c @@ -1,80 +1,83 @@ /*------------------------------------------------------------------------- * * btstrat.c-- - * Srategy map entries for the btree indexed access method + * Srategy map entries for the btree indexed access method * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.9 1997/08/20 02:01:42 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.10 1997/09/07 04:38:03 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <access/istrat.h> -/* - * only one valid strategy for hash tables: equality. +/* + * only one valid strategy for hash tables: equality. */ #ifdef NOT_USED -static StrategyNumber HTNegate[1] = { - InvalidStrategy +static StrategyNumber HTNegate[1] = { + InvalidStrategy }; -static StrategyNumber HTCommute[1] = { - HTEqualStrategyNumber +static StrategyNumber HTCommute[1] = { + HTEqualStrategyNumber }; -static StrategyNumber HTNegateCommute[1] = { - InvalidStrategy +static StrategyNumber HTNegateCommute[1] = { + InvalidStrategy }; -static StrategyEvaluationData HTEvaluationData = { - /* XXX static for simplicity */ +static StrategyEvaluationData HTEvaluationData = { + /* XXX static for simplicity */ - HTMaxStrategyNumber, - (StrategyTransformMap)HTNegate, - (StrategyTransformMap)HTCommute, - (StrategyTransformMap)HTNegateCommute, - {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL} + HTMaxStrategyNumber, + (StrategyTransformMap) HTNegate, + (StrategyTransformMap) HTCommute, + (StrategyTransformMap) HTNegateCommute, + {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} }; + #endif /* ---------------------------------------------------------------- - * RelationGetHashStrategy + * RelationGetHashStrategy * ---------------------------------------------------------------- */ #ifdef NOT_USED -static StrategyNumber +static StrategyNumber _hash_getstrat(Relation rel, - AttrNumber attno, - RegProcedure proc) + AttrNumber attno, + RegProcedure proc) { - StrategyNumber strat; + StrategyNumber strat; - strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc); + strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc); - Assert(StrategyNumberIsValid(strat)); + Assert(StrategyNumberIsValid(strat)); - return (strat); + return (strat); } + #endif #ifdef NOT_USED -static bool +static bool _hash_invokestrat(Relation rel, - AttrNumber attno, - StrategyNumber strat, - Datum left, - Datum right) + AttrNumber attno, + StrategyNumber strat, + Datum left, + Datum right) { - return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat, - left, right)); + return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat, + left, right)); } + #endif diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index dd0b4737454..f9fbe0e2d17 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -1,109 +1,110 @@ /*------------------------------------------------------------------------- * * btutils.c-- - * Utility code for Postgres btree implementation. + * Utility code for Postgres btree implementation. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.9 1997/08/14 05:01:32 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.10 1997/09/07 04:38:04 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <fmgr.h> #include <utils/memutils.h> #include <access/iqual.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif ScanKey _hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap) { - ScanKey skey; - TupleDesc itupdesc; - int natts; - AttrNumber i; - Datum arg; - RegProcedure proc; - bool null; - - natts = rel->rd_rel->relnatts; - itupdesc = RelationGetTupleDescriptor(rel); - - skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); - - for (i = 0; i < natts; i++) { - arg = index_getattr(itup, i + 1, itupdesc, &null); - proc = metap->hashm_procid; - ScanKeyEntryInitialize(&skey[i], - 0x0, (AttrNumber) (i + 1), proc, arg); - } - - return (skey); -} + ScanKey skey; + TupleDesc itupdesc; + int natts; + AttrNumber i; + Datum arg; + RegProcedure proc; + bool null; + + natts = rel->rd_rel->relnatts; + itupdesc = RelationGetTupleDescriptor(rel); + + skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); + + for (i = 0; i < natts; i++) + { + arg = index_getattr(itup, i + 1, itupdesc, &null); + proc = metap->hashm_procid; + ScanKeyEntryInitialize(&skey[i], + 0x0, (AttrNumber) (i + 1), proc, arg); + } + + return (skey); +} void _hash_freeskey(ScanKey skey) { - pfree(skey); + pfree(skey); } bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup) { - if (scan->numberOfKeys > 0) - return (index_keytest(itup, - RelationGetTupleDescriptor(scan->relation), - scan->numberOfKeys, scan->keyData)); - else - return (true); + if (scan->numberOfKeys > 0) + return (index_keytest(itup, + RelationGetTupleDescriptor(scan->relation), + scan->numberOfKeys, scan->keyData)); + else + return (true); } HashItem _hash_formitem(IndexTuple itup) { - int nbytes_hitem; - HashItem hitem; - Size tuplen; - - /* disallow nulls in hash keys */ - if (itup->t_info & INDEX_NULL_MASK) - elog(WARN, "hash indices cannot include null keys"); - - /* make a copy of the index tuple with room for the sequence number */ - tuplen = IndexTupleSize(itup); - nbytes_hitem = tuplen + - (sizeof(HashItemData) - sizeof(IndexTupleData)); - - hitem = (HashItem) palloc(nbytes_hitem); - memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen); - - return (hitem); + int nbytes_hitem; + HashItem hitem; + Size tuplen; + + /* disallow nulls in hash keys */ + if (itup->t_info & INDEX_NULL_MASK) + elog(WARN, "hash indices cannot include null keys"); + + /* make a copy of the index tuple with room for the sequence number */ + tuplen = IndexTupleSize(itup); + nbytes_hitem = tuplen + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + + hitem = (HashItem) palloc(nbytes_hitem); + memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen); + + return (hitem); } Bucket _hash_call(Relation rel, HashMetaPage metap, Datum key) { - uint32 n; - Bucket bucket; - RegProcedure proc; - - proc = metap->hashm_procid; - n = (uint32) fmgr(proc, key); - bucket = n & metap->hashm_highmask; - if (bucket > metap->hashm_maxbucket) - bucket = bucket & metap->hashm_lowmask; - return (bucket); + uint32 n; + Bucket bucket; + RegProcedure proc; + + proc = metap->hashm_procid; + n = (uint32) fmgr(proc, key); + bucket = n & metap->hashm_highmask; + if (bucket > metap->hashm_maxbucket) + bucket = bucket & metap->hashm_lowmask; + return (bucket); } /* @@ -112,12 +113,13 @@ _hash_call(Relation rel, HashMetaPage metap, Datum key) uint32 _hash_log2(uint32 num) { - uint32 i, limit; - - limit = 1; - for (i = 0; limit < num; limit = limit << 1, i++) - ; - return (i); + uint32 i, + limit; + + limit = 1; + for (i = 0; limit < num; limit = limit << 1, i++) + ; + return (i); } /* @@ -126,19 +128,20 @@ _hash_log2(uint32 num) void _hash_checkpage(Page page, int flags) { - HashPageOpaque opaque; + HashPageOpaque opaque; - Assert(page); - Assert(((PageHeader)(page))->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData))); + Assert(page); + Assert(((PageHeader) (page))->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData))); #if 1 - Assert(((PageHeader)(page))->pd_upper <= - (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); - Assert(((PageHeader)(page))->pd_special == - (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); - Assert(((PageHeader)(page))->pd_opaque.od_pagesize == BLCKSZ); + Assert(((PageHeader) (page))->pd_upper <= + (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); + Assert(((PageHeader) (page))->pd_special == + (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); + Assert(((PageHeader) (page))->pd_opaque.od_pagesize == BLCKSZ); #endif - if (flags) { - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(opaque->hasho_flag & flags); - } + if (flags) + { + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(opaque->hasho_flag & flags); + } } |