diff options
Diffstat (limited to 'src/backend/access/hash/hashpage.c')
-rw-r--r-- | src/backend/access/hash/hashpage.c | 134 |
1 files changed, 64 insertions, 70 deletions
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 883f2a73121..b40c20b480b 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.51 2005/06/09 21:01:25 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.52 2005/10/15 02:49:08 momjian Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -240,13 +240,13 @@ _hash_metapinit(Relation rel) RelationGetRelationName(rel)); /* - * Determine the target fill factor (tuples per bucket) for this - * index. The idea is to make the fill factor correspond to pages - * about 3/4ths full. We can compute it exactly if the index datatype - * is fixed-width, but for var-width there's some guessing involved. + * Determine the target fill factor (tuples per bucket) for this index. + * The idea is to make the fill factor correspond to pages about 3/4ths + * full. We can compute it exactly if the index datatype is fixed-width, + * but for var-width there's some guessing involved. */ data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid, - RelationGetDescr(rel)->attrs[0]->atttypmod); + RelationGetDescr(rel)->attrs[0]->atttypmod); item_width = MAXALIGN(sizeof(HashItemData)) + MAXALIGN(data_width) + sizeof(ItemIdData); /* include the line pointer */ ffactor = (BLCKSZ * 3 / 4) / item_width; @@ -289,9 +289,8 @@ _hash_metapinit(Relation rel) metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); /* - * We initialize the index with two buckets, 0 and 1, occupying - * physical blocks 1 and 2. The first freespace bitmap page is in - * block 3. + * We initialize the index with two buckets, 0 and 1, occupying physical + * blocks 1 and 2. The first freespace bitmap page is in block 3. */ metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ @@ -321,8 +320,8 @@ _hash_metapinit(Relation rel) } /* - * Initialize first bitmap page. Can't do this until we create the - * first two buckets, else smgr will complain. + * Initialize first bitmap page. Can't do this until we create the first + * two buckets, else smgr will complain. */ _hash_initbitmap(rel, metap, 3); @@ -367,15 +366,14 @@ _hash_expandtable(Relation rel, Buffer metabuf) * Obtain the page-zero lock to assert the right to begin a split (see * README). * - * Note: deadlock should be impossible here. Our own backend could only - * be holding bucket sharelocks due to stopped indexscans; those will - * not block other holders of the page-zero lock, who are only - * interested in acquiring bucket sharelocks themselves. Exclusive - * bucket locks are only taken here and in hashbulkdelete, and neither - * of these operations needs any additional locks to complete. (If, - * due to some flaw in this reasoning, we manage to deadlock anyway, - * it's okay to error out; the index will be left in a consistent - * state.) + * Note: deadlock should be impossible here. Our own backend could only be + * holding bucket sharelocks due to stopped indexscans; those will not + * block other holders of the page-zero lock, who are only interested in + * acquiring bucket sharelocks themselves. Exclusive bucket locks are + * only taken here and in hashbulkdelete, and neither of these operations + * needs any additional locks to complete. (If, due to some flaw in this + * reasoning, we manage to deadlock anyway, it's okay to error out; the + * index will be left in a consistent state.) */ _hash_getlock(rel, 0, HASH_EXCLUSIVE); @@ -386,8 +384,8 @@ _hash_expandtable(Relation rel, Buffer metabuf) _hash_checkpage(rel, (Page) metap, LH_META_PAGE); /* - * Check to see if split is still needed; someone else might have - * already done one while we waited for the lock. + * Check to see if split is still needed; someone else might have already + * done one while we waited for the lock. * * Make sure this stays in sync with _hash_doinsert() */ @@ -402,11 +400,11 @@ _hash_expandtable(Relation rel, Buffer metabuf) * The lock protects us against other backends, but not against our own * backend. Must check for active scans separately. * - * Ideally we would lock the new bucket too before proceeding, but if we - * are about to cross a splitpoint then the BUCKET_TO_BLKNO mapping - * isn't correct yet. For simplicity we update the metapage first and - * then lock. This should be okay because no one else should be - * trying to lock the new bucket yet... + * Ideally we would lock the new bucket too before proceeding, but if we are + * about to cross a splitpoint then the BUCKET_TO_BLKNO mapping isn't + * correct yet. For simplicity we update the metapage first and then + * lock. This should be okay because no one else should be trying to lock + * the new bucket yet... */ new_bucket = metap->hashm_maxbucket + 1; old_bucket = (new_bucket & metap->hashm_lowmask); @@ -420,14 +418,13 @@ _hash_expandtable(Relation rel, Buffer metabuf) goto fail; /* - * Okay to proceed with split. Update the metapage bucket mapping - * info. + * Okay to proceed with split. Update the metapage bucket mapping info. * - * Since we are scribbling on the metapage data right in the shared - * buffer, any failure in this next little bit leaves us with a big - * problem: the metapage is effectively corrupt but could get written - * back to disk. We don't really expect any failure, but just to be - * sure, establish a critical section. + * Since we are scribbling on the metapage data right in the shared buffer, + * any failure in this next little bit leaves us with a big problem: the + * metapage is effectively corrupt but could get written back to disk. We + * don't really expect any failure, but just to be sure, establish a + * critical section. */ START_CRIT_SECTION(); @@ -443,8 +440,8 @@ _hash_expandtable(Relation rel, Buffer metabuf) /* * If the split point is increasing (hashm_maxbucket's log base 2 * increases), we need to adjust the hashm_spares[] array and - * hashm_ovflpoint so that future overflow pages will be created - * beyond this new batch of bucket pages. + * hashm_ovflpoint so that future overflow pages will be created beyond + * this new batch of bucket pages. * * XXX should initialize new bucket pages to prevent out-of-order page * creation? Don't wanna do it right here though. @@ -471,10 +468,9 @@ _hash_expandtable(Relation rel, Buffer metabuf) /* * Copy bucket mapping info now; this saves re-accessing the meta page * inside _hash_splitbucket's inner loop. Note that once we drop the - * split lock, other splits could begin, so these values might be out - * of date before _hash_splitbucket finishes. That's okay, since all - * it needs is to tell which of these two buckets to map hashkeys - * into. + * split lock, other splits could begin, so these values might be out of + * date before _hash_splitbucket finishes. That's okay, since all it + * needs is to tell which of these two buckets to map hashkeys into. */ maxbucket = metap->hashm_maxbucket; highmask = metap->hashm_highmask; @@ -554,9 +550,9 @@ _hash_splitbucket(Relation rel, TupleDesc itupdesc = RelationGetDescr(rel); /* - * It should be okay to simultaneously write-lock pages from each - * bucket, since no one else can be trying to acquire buffer lock on - * pages of either bucket. + * It should be okay to simultaneously write-lock pages from each bucket, + * since no one else can be trying to acquire buffer lock on pages of + * either bucket. */ oblkno = start_oblkno; nblkno = start_nblkno; @@ -578,17 +574,17 @@ _hash_splitbucket(Relation rel, nopaque->hasho_filler = HASHO_FILL; /* - * Partition the tuples in the old bucket between the old bucket and - * the new bucket, advancing along the old bucket's overflow bucket - * chain and adding overflow pages to the new bucket as needed. + * Partition the tuples in the old bucket between the old bucket and the + * new bucket, advancing along the old bucket's overflow bucket chain and + * adding overflow pages to the new bucket as needed. */ ooffnum = FirstOffsetNumber; omaxoffnum = PageGetMaxOffsetNumber(opage); for (;;) { /* - * at each iteration through this loop, each of these variables - * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum + * at each iteration through this loop, each of these variables should + * be up-to-date: obuf opage oopaque ooffnum omaxoffnum */ /* check if we're at the end of the page */ @@ -600,8 +596,8 @@ _hash_splitbucket(Relation rel, break; /* - * we ran out of tuples on this particular page, but we have - * more overflow pages; advance to next page. + * we ran out of tuples on this particular page, but we have more + * overflow pages; advance to next page. */ _hash_wrtbuf(rel, obuf); @@ -618,8 +614,7 @@ _hash_splitbucket(Relation rel, * Re-hash the tuple to determine which bucket it now belongs in. * * It is annoying to call the hash function while holding locks, but - * releasing and relocking the page for each tuple is unappealing - * too. + * releasing and relocking the page for each tuple is unappealing too. */ hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); itup = &(hitem->hash_itup); @@ -632,9 +627,9 @@ _hash_splitbucket(Relation rel, if (bucket == nbucket) { /* - * insert the tuple into the new bucket. if it doesn't fit on - * the current page in the new bucket, we must allocate a new - * overflow page and place the tuple on that page instead. + * insert the tuple into the new bucket. if it doesn't fit on the + * current page in the new bucket, we must allocate a new overflow + * page and place the tuple on that page instead. */ itemsz = IndexTupleDSize(hitem->hash_itup) + (sizeof(HashItemData) - sizeof(IndexTupleData)); @@ -659,13 +654,13 @@ _hash_splitbucket(Relation rel, RelationGetRelationName(rel)); /* - * now delete the tuple from the old bucket. after this - * section of code, 'ooffnum' will actually point to the - * ItemId to which we would point if we had advanced it before - * the deletion (PageIndexTupleDelete repacks the ItemId - * array). this also means that 'omaxoffnum' is exactly one - * less than it used to be, so we really can just decrement it - * instead of calling PageGetMaxOffsetNumber. + * now delete the tuple from the old bucket. after this section + * of code, 'ooffnum' will actually point to the ItemId to which + * we would point if we had advanced it before the deletion + * (PageIndexTupleDelete repacks the ItemId array). this also + * means that 'omaxoffnum' is exactly one less than it used to be, + * so we really can just decrement it instead of calling + * PageGetMaxOffsetNumber. */ PageIndexTupleDelete(opage, ooffnum); omaxoffnum = OffsetNumberPrev(omaxoffnum); @@ -673,9 +668,9 @@ _hash_splitbucket(Relation rel, else { /* - * the tuple stays on this page. we didn't move anything, so - * we didn't delete anything and therefore we don't have to - * change 'omaxoffnum'. + * the tuple stays on this page. we didn't move anything, so we + * didn't delete anything and therefore we don't have to change + * 'omaxoffnum'. */ Assert(bucket == obucket); ooffnum = OffsetNumberNext(ooffnum); @@ -683,11 +678,10 @@ _hash_splitbucket(Relation rel, } /* - * We're at the end of the old bucket chain, so we're done - * partitioning the tuples. Before quitting, call _hash_squeezebucket - * to ensure the tuples remaining in the old bucket (including the - * overflow pages) are packed as tightly as possible. The new bucket - * is already tight. + * We're at the end of the old bucket chain, so we're done partitioning + * the tuples. Before quitting, call _hash_squeezebucket to ensure the + * tuples remaining in the old bucket (including the overflow pages) are + * packed as tightly as possible. The new bucket is already tight. */ _hash_wrtbuf(rel, obuf); _hash_wrtbuf(rel, nbuf); |