diff options
Diffstat (limited to 'src/backend/storage/buffer')
-rw-r--r-- | src/backend/storage/buffer/buf_init.c | 407 | ||||
-rw-r--r-- | src/backend/storage/buffer/buf_table.c | 217 | ||||
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 2695 | ||||
-rw-r--r-- | src/backend/storage/buffer/freelist.c | 373 | ||||
-rw-r--r-- | src/backend/storage/buffer/localbuf.c | 370 |
5 files changed, 2132 insertions, 1930 deletions
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 20f8195d1e9..4ce064d6713 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * buf_init.c-- - * buffer manager initialization routines + * buffer manager initialization routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.10 1997/07/28 00:54:33 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.11 1997/09/07 04:48:15 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -35,98 +35,103 @@ #include "utils/dynahash.h" #include "utils/hsearch.h" #include "utils/memutils.h" -#include "executor/execdebug.h" /* for NDirectFileRead */ +#include "executor/execdebug.h" /* for NDirectFileRead */ #include "catalog/catalog.h" /* - * if BMTRACE is defined, we trace the last 200 buffer allocations and - * deallocations in a circular buffer in shared memory. + * if BMTRACE is defined, we trace the last 200 buffer allocations and + * deallocations in a circular buffer in shared memory. */ #ifdef BMTRACE -bmtrace *TraceBuf; -long *CurTraceBuf; -#define BMT_LIMIT 200 -#endif /* BMTRACE */ -int ShowPinTrace = 0; - -int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */ -int Data_Descriptors; -int Free_List_Descriptor; -int Lookup_List_Descriptor; -int Num_Descriptors; - -BufferDesc *BufferDescriptors; -BufferBlock BufferBlocks; +bmtrace *TraceBuf; +long *CurTraceBuf; + +#define BMT_LIMIT 200 +#endif /* BMTRACE */ +int ShowPinTrace = 0; + +int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */ +int Data_Descriptors; +int Free_List_Descriptor; +int Lookup_List_Descriptor; +int Num_Descriptors; + +BufferDesc *BufferDescriptors; +BufferBlock BufferBlocks; + #ifndef HAS_TEST_AND_SET -long *NWaitIOBackendP; +long *NWaitIOBackendP; + #endif -extern IpcSemaphoreId WaitIOSemId; +extern IpcSemaphoreId WaitIOSemId; + +long *PrivateRefCount;/* also used in freelist.c */ +long *LastRefCount; /* refcounts of last ExecMain level */ +long *CommitInfoNeedsSave; /* to write buffers where we have + * filled in */ -long *PrivateRefCount; /* also used in freelist.c */ -long *LastRefCount; /* refcounts of last ExecMain level */ -long *CommitInfoNeedsSave; /* to write buffers where we have filled in */ - /* t_tmin (or t_tmax) */ + /* t_tmin (or t_tmax) */ /* * Data Structures: - * buffers live in a freelist and a lookup data structure. - * + * buffers live in a freelist and a lookup data structure. + * * * Buffer Lookup: - * Two important notes. First, the buffer has to be - * available for lookup BEFORE an IO begins. Otherwise - * a second process trying to read the buffer will - * allocate its own copy and the buffeer pool will - * become inconsistent. + * Two important notes. First, the buffer has to be + * available for lookup BEFORE an IO begins. Otherwise + * a second process trying to read the buffer will + * allocate its own copy and the buffeer pool will + * become inconsistent. * * Buffer Replacement: - * see freelist.c. A buffer cannot be replaced while in - * use either by data manager or during IO. + * see freelist.c. A buffer cannot be replaced while in + * use either by data manager or during IO. * * WriteBufferBack: - * currently, a buffer is only written back at the time - * it is selected for replacement. It should - * be done sooner if possible to reduce latency of - * BufferAlloc(). Maybe there should be a daemon process. + * currently, a buffer is only written back at the time + * it is selected for replacement. It should + * be done sooner if possible to reduce latency of + * BufferAlloc(). Maybe there should be a daemon process. * * Synchronization/Locking: * - * BufMgrLock lock -- must be acquired before manipulating the - * buffer queues (lookup/freelist). Must be released - * before exit and before doing any IO. + * BufMgrLock lock -- must be acquired before manipulating the + * buffer queues (lookup/freelist). Must be released + * before exit and before doing any IO. * * IO_IN_PROGRESS -- this is a flag in the buffer descriptor. - * It must be set when an IO is initiated and cleared at - * the end of the IO. It is there to make sure that one - * process doesn't start to use a buffer while another is - * faulting it in. see IOWait/IOSignal. + * It must be set when an IO is initiated and cleared at + * the end of the IO. It is there to make sure that one + * process doesn't start to use a buffer while another is + * faulting it in. see IOWait/IOSignal. * - * refcount -- A buffer is pinned during IO and immediately - * after a BufferAlloc(). A buffer is always either pinned - * or on the freelist but never both. The buffer must be - * released, written, or flushed before the end of - * transaction. + * refcount -- A buffer is pinned during IO and immediately + * after a BufferAlloc(). A buffer is always either pinned + * or on the freelist but never both. The buffer must be + * released, written, or flushed before the end of + * transaction. * * PrivateRefCount -- Each buffer also has a private refcount the keeps - * track of the number of times the buffer is pinned in the current - * processes. This is used for two purposes, first, if we pin a - * a buffer more than once, we only need to change the shared refcount - * once, thus only lock the buffer pool once, second, when a transaction - * aborts, it should only unpin the buffers exactly the number of times it - * has pinned them, so that it will not blow away buffers of another - * backend. + * track of the number of times the buffer is pinned in the current + * processes. This is used for two purposes, first, if we pin a + * a buffer more than once, we only need to change the shared refcount + * once, thus only lock the buffer pool once, second, when a transaction + * aborts, it should only unpin the buffers exactly the number of times it + * has pinned them, so that it will not blow away buffers of another + * backend. * */ -SPINLOCK BufMgrLock; +SPINLOCK BufMgrLock; -long int ReadBufferCount; -long int ReadLocalBufferCount; -long int BufferHitCount; -long int LocalBufferHitCount; -long int BufferFlushCount; -long int LocalBufferFlushCount; +long int ReadBufferCount; +long int ReadLocalBufferCount; +long int BufferHitCount; +long int LocalBufferHitCount; +long int BufferFlushCount; +long int LocalBufferFlushCount; /* @@ -138,111 +143,121 @@ long int LocalBufferFlushCount; void InitBufferPool(IPCKey key) { - bool foundBufs,foundDescs; - int i; - - /* check padding of BufferDesc and BufferHdr */ - /* we need both checks because a sbufdesc_padded > PADDED_SBUFDESC_SIZE - will shrink sbufdesc to the required size, which is bad */ - if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE || - sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE) - elog(WARN,"Internal error: sbufdesc does not have the proper size, " - "contact the Postgres developers"); - if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE/2) - elog(WARN,"Internal error: sbufdesc is greatly over-sized, " - "contact the Postgres developers"); - - Data_Descriptors = NBuffers; - Free_List_Descriptor = Data_Descriptors; - Lookup_List_Descriptor = Data_Descriptors + 1; - Num_Descriptors = Data_Descriptors + 1; - - SpinAcquire(BufMgrLock); - + bool foundBufs, + foundDescs; + int i; + + /* check padding of BufferDesc and BufferHdr */ + + /* + * we need both checks because a sbufdesc_padded > + * PADDED_SBUFDESC_SIZE will shrink sbufdesc to the required size, + * which is bad + */ + if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE || + sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE) + elog(WARN, "Internal error: sbufdesc does not have the proper size, " + "contact the Postgres developers"); + if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE / 2) + elog(WARN, "Internal error: sbufdesc is greatly over-sized, " + "contact the Postgres developers"); + + Data_Descriptors = NBuffers; + Free_List_Descriptor = Data_Descriptors; + Lookup_List_Descriptor = Data_Descriptors + 1; + Num_Descriptors = Data_Descriptors + 1; + + SpinAcquire(BufMgrLock); + #ifdef BMTRACE - CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", - (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long), - &foundDescs); - if (!foundDescs) - memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long)); - - TraceBuf = (bmtrace *) &(CurTraceBuf[1]); + CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", + (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long), + &foundDescs); + if (!foundDescs) + memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long)); + + TraceBuf = (bmtrace *) & (CurTraceBuf[1]); #endif - - BufferDescriptors = (BufferDesc *) - ShmemInitStruct("Buffer Descriptors", - Num_Descriptors*sizeof(BufferDesc),&foundDescs); - - BufferBlocks = (BufferBlock) - ShmemInitStruct("Buffer Blocks", - NBuffers*BLCKSZ,&foundBufs); - + + BufferDescriptors = (BufferDesc *) + ShmemInitStruct("Buffer Descriptors", + Num_Descriptors * sizeof(BufferDesc), &foundDescs); + + BufferBlocks = (BufferBlock) + ShmemInitStruct("Buffer Blocks", + NBuffers * BLCKSZ, &foundBufs); + #ifndef HAS_TEST_AND_SET - { - bool foundNWaitIO; - - NWaitIOBackendP = (long *)ShmemInitStruct("#Backends Waiting IO", - sizeof(long), - &foundNWaitIO); - if (!foundNWaitIO) - *NWaitIOBackendP = 0; - } + { + bool foundNWaitIO; + + NWaitIOBackendP = (long *) ShmemInitStruct("#Backends Waiting IO", + sizeof(long), + &foundNWaitIO); + if (!foundNWaitIO) + *NWaitIOBackendP = 0; + } #endif - - if (foundDescs || foundBufs) { - - /* both should be present or neither */ - Assert(foundDescs && foundBufs); - - } else { - BufferDesc *buf; - unsigned long block; - - buf = BufferDescriptors; - block = (unsigned long) BufferBlocks; - - /* - * link the buffers into a circular, doubly-linked list to - * initialize free list. Still don't know anything about - * replacement strategy in this file. - */ - for (i = 0; i < Data_Descriptors; block+=BLCKSZ,buf++,i++) { - Assert(ShmemIsValid((unsigned long)block)); - - buf->freeNext = i+1; - buf->freePrev = i-1; - - CLEAR_BUFFERTAG(&(buf->tag)); - buf->data = MAKE_OFFSET(block); - buf->flags = (BM_DELETED | BM_FREE | BM_VALID); - buf->refcount = 0; - buf->buf_id = i; + + if (foundDescs || foundBufs) + { + + /* both should be present or neither */ + Assert(foundDescs && foundBufs); + + } + else + { + BufferDesc *buf; + unsigned long block; + + buf = BufferDescriptors; + block = (unsigned long) BufferBlocks; + + /* + * link the buffers into a circular, doubly-linked list to + * initialize free list. Still don't know anything about + * replacement strategy in this file. + */ + for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) + { + Assert(ShmemIsValid((unsigned long) block)); + + buf->freeNext = i + 1; + buf->freePrev = i - 1; + + CLEAR_BUFFERTAG(&(buf->tag)); + buf->data = MAKE_OFFSET(block); + buf->flags = (BM_DELETED | BM_FREE | BM_VALID); + buf->refcount = 0; + buf->buf_id = i; #ifdef HAS_TEST_AND_SET - S_INIT_LOCK(&(buf->io_in_progress_lock)); + S_INIT_LOCK(&(buf->io_in_progress_lock)); #endif + } + + /* close the circular queue */ + BufferDescriptors[0].freePrev = Data_Descriptors - 1; + BufferDescriptors[Data_Descriptors - 1].freeNext = 0; } - - /* close the circular queue */ - BufferDescriptors[0].freePrev = Data_Descriptors-1; - BufferDescriptors[Data_Descriptors-1].freeNext = 0; - } - - /* Init the rest of the module */ - InitBufTable(); - InitFreeList(!foundDescs); - - SpinRelease(BufMgrLock); - + + /* Init the rest of the module */ + InitBufTable(); + InitFreeList(!foundDescs); + + SpinRelease(BufMgrLock); + #ifndef HAS_TEST_AND_SET - { - int status; - WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key), - 1, IPCProtection, 0, 1, &status); - } + { + int status; + + WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key), + 1, IPCProtection, 0, 1, &status); + } #endif - PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); - LastRefCount = (long *) calloc(NBuffers, sizeof(long)); - CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long)); + PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); + LastRefCount = (long *) calloc(NBuffers, sizeof(long)); + CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long)); } /* ----------------------------------------------------- @@ -255,43 +270,41 @@ InitBufferPool(IPCKey key) int BufferShmemSize() { - int size = 0; - int nbuckets; - int nsegs; - int tmp; - - nbuckets = 1 << (int)my_log2((NBuffers - 1) / DEF_FFACTOR + 1); - nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1); - - /* size of shmem binding table */ - size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */ - size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ - size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - size += BUCKET_ALLOC_INCR * - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(BTABLE_KEYSIZE) + - MAXALIGN(BTABLE_DATASIZE)); - - /* size of buffer descriptors */ - size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc)); - - /* size of data pages */ - size += NBuffers * MAXALIGN(BLCKSZ); - - /* size of buffer hash table */ - size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */ - size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ - size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - tmp = (int)ceil((double)NBuffers/BUCKET_ALLOC_INCR); - size += tmp * BUCKET_ALLOC_INCR * - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(sizeof(BufferTag)) + - MAXALIGN(sizeof(Buffer))); - + int size = 0; + int nbuckets; + int nsegs; + int tmp; + + nbuckets = 1 << (int) my_log2((NBuffers - 1) / DEF_FFACTOR + 1); + nsegs = 1 << (int) my_log2((nbuckets - 1) / DEF_SEGSIZE + 1); + + /* size of shmem binding table */ + size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */ + size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ + size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + size += BUCKET_ALLOC_INCR * + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(BTABLE_KEYSIZE) + + MAXALIGN(BTABLE_DATASIZE)); + + /* size of buffer descriptors */ + size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc)); + + /* size of data pages */ + size += NBuffers * MAXALIGN(BLCKSZ); + + /* size of buffer hash table */ + size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */ + size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ + size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + tmp = (int) ceil((double) NBuffers / BUCKET_ALLOC_INCR); + size += tmp * BUCKET_ALLOC_INCR * + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(sizeof(BufferTag)) + + MAXALIGN(sizeof(Buffer))); + #ifdef BMTRACE - size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long); + size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long); #endif - return size; + return size; } - - diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index 61e365ce55e..41b2b4d8ee0 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * buf_table.c-- - * routines for finding buffers in the buffer pool. + * routines for finding buffers in the buffer pool. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.4 1997/08/19 21:32:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.5 1997/09/07 04:48:17 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -16,30 +16,31 @@ * * Data Structures: * - * Buffers are identified by their BufferTag (buf.h). This + * Buffers are identified by their BufferTag (buf.h). This * file contains routines for allocating a shmem hash table to * map buffer tags to buffer descriptors. * * Synchronization: - * - * All routines in this file assume buffer manager spinlock is - * held by their caller. + * + * All routines in this file assume buffer manager spinlock is + * held by their caller. */ #include "postgres.h" #include "storage/bufmgr.h" -#include "storage/buf_internals.h" /* where the declarations go */ +#include "storage/buf_internals.h" /* where the declarations go */ #include "storage/shmem.h" #include "storage/spin.h" #include "utils/hsearch.h" -static HTAB *SharedBufHash; +static HTAB *SharedBufHash; -typedef struct lookup { - BufferTag key; - Buffer id; -} LookupEnt; +typedef struct lookup +{ + BufferTag key; + Buffer id; +} LookupEnt; /* * Initialize shmem hash table for mapping buffers @@ -47,109 +48,116 @@ typedef struct lookup { void InitBufTable() { - HASHCTL info; - int hash_flags; - - /* assume lock is held */ - - /* BufferTag maps to Buffer */ - info.keysize = sizeof(BufferTag); - info.datasize = sizeof(Buffer); - info.hash = tag_hash; - - hash_flags = (HASH_ELEM | HASH_FUNCTION); - - - SharedBufHash = (HTAB *) ShmemInitHash("Shared Buf Lookup Table", - NBuffers,NBuffers, - &info,hash_flags); - - if (! SharedBufHash) { - elog(FATAL,"couldn't initialize shared buffer pool Hash Tbl"); - exit(1); - } - + HASHCTL info; + int hash_flags; + + /* assume lock is held */ + + /* BufferTag maps to Buffer */ + info.keysize = sizeof(BufferTag); + info.datasize = sizeof(Buffer); + info.hash = tag_hash; + + hash_flags = (HASH_ELEM | HASH_FUNCTION); + + + SharedBufHash = (HTAB *) ShmemInitHash("Shared Buf Lookup Table", + NBuffers, NBuffers, + &info, hash_flags); + + if (!SharedBufHash) + { + elog(FATAL, "couldn't initialize shared buffer pool Hash Tbl"); + exit(1); + } + } -BufferDesc * -BufTableLookup(BufferTag *tagPtr) +BufferDesc * +BufTableLookup(BufferTag * tagPtr) { - LookupEnt * result; - bool found; - - if (tagPtr->blockNum == P_NEW) - return(NULL); - - result = (LookupEnt *) - hash_search(SharedBufHash,(char *) tagPtr,HASH_FIND,&found); - - if (! result){ - elog(WARN,"BufTableLookup: BufferLookup table corrupted"); - return(NULL); - } - if (! found) { - return(NULL); - } - return(&(BufferDescriptors[result->id])); + LookupEnt *result; + bool found; + + if (tagPtr->blockNum == P_NEW) + return (NULL); + + result = (LookupEnt *) + hash_search(SharedBufHash, (char *) tagPtr, HASH_FIND, &found); + + if (!result) + { + elog(WARN, "BufTableLookup: BufferLookup table corrupted"); + return (NULL); + } + if (!found) + { + return (NULL); + } + return (&(BufferDescriptors[result->id])); } /* * BufTableDelete */ bool -BufTableDelete(BufferDesc *buf) +BufTableDelete(BufferDesc * buf) { - LookupEnt * result; - bool found; - - /* buffer not initialized or has been removed from - * table already. BM_DELETED keeps us from removing - * buffer twice. - */ - if (buf->flags & BM_DELETED) { - return(TRUE); - } - - buf->flags |= BM_DELETED; - - result = (LookupEnt *) - hash_search(SharedBufHash,(char *) &(buf->tag),HASH_REMOVE,&found); - - if (! (result && found)) { - elog(WARN,"BufTableDelete: BufferLookup table corrupted"); - return(FALSE); - } - - return(TRUE); + LookupEnt *result; + bool found; + + /* + * buffer not initialized or has been removed from table already. + * BM_DELETED keeps us from removing buffer twice. + */ + if (buf->flags & BM_DELETED) + { + return (TRUE); + } + + buf->flags |= BM_DELETED; + + result = (LookupEnt *) + hash_search(SharedBufHash, (char *) &(buf->tag), HASH_REMOVE, &found); + + if (!(result && found)) + { + elog(WARN, "BufTableDelete: BufferLookup table corrupted"); + return (FALSE); + } + + return (TRUE); } bool -BufTableInsert(BufferDesc *buf) +BufTableInsert(BufferDesc * buf) { - LookupEnt * result; - bool found; - - /* cannot insert it twice */ - Assert (buf->flags & BM_DELETED); - buf->flags &= ~(BM_DELETED); - - result = (LookupEnt *) - hash_search(SharedBufHash,(char *) &(buf->tag),HASH_ENTER,&found); - - if (! result) { - Assert(0); - elog(WARN,"BufTableInsert: BufferLookup table corrupted"); - return(FALSE); - } - /* found something else in the table ! */ - if (found) { - Assert(0); - elog(WARN,"BufTableInsert: BufferLookup table corrupted"); - return(FALSE); - } - - result->id = buf->buf_id; - return(TRUE); + LookupEnt *result; + bool found; + + /* cannot insert it twice */ + Assert(buf->flags & BM_DELETED); + buf->flags &= ~(BM_DELETED); + + result = (LookupEnt *) + hash_search(SharedBufHash, (char *) &(buf->tag), HASH_ENTER, &found); + + if (!result) + { + Assert(0); + elog(WARN, "BufTableInsert: BufferLookup table corrupted"); + return (FALSE); + } + /* found something else in the table ! */ + if (found) + { + Assert(0); + elog(WARN, "BufTableInsert: BufferLookup table corrupted"); + return (FALSE); + } + + result->id = buf->buf_id; + return (TRUE); } /* prints out collision stats for the buf table */ @@ -157,8 +165,9 @@ BufTableInsert(BufferDesc *buf) void DBG_LookupListCheck(int nlookup) { - nlookup = 10; - - hash_stats("Shared",SharedBufHash); + nlookup = 10; + + hash_stats("Shared", SharedBufHash); } + #endif diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 466728c4a46..2a53e6bd78c 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1,44 +1,44 @@ /*------------------------------------------------------------------------- * * bufmgr.c-- - * buffer manager interface routines + * buffer manager interface routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.19 1997/08/19 21:32:39 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.20 1997/09/07 04:48:19 momjian Exp $ * *------------------------------------------------------------------------- */ /* * * BufferAlloc() -- lookup a buffer in the buffer table. If - * it isn't there add it, but do not read it into memory. - * This is used when we are about to reinitialize the - * buffer so don't care what the current disk contents are. - * BufferAlloc() pins the new buffer in memory. + * it isn't there add it, but do not read it into memory. + * This is used when we are about to reinitialize the + * buffer so don't care what the current disk contents are. + * BufferAlloc() pins the new buffer in memory. * * ReadBuffer() -- same as BufferAlloc() but reads the data - * on a buffer cache miss. + * on a buffer cache miss. * * ReleaseBuffer() -- unpin the buffer * * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty" - * but don't unpin. The disk IO is delayed until buffer - * replacement if WriteMode is BUFFER_LATE_WRITE. + * but don't unpin. The disk IO is delayed until buffer + * replacement if WriteMode is BUFFER_LATE_WRITE. * - * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() + * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() * * FlushBuffer() -- as above but never delayed write. * * BufferSync() -- flush all dirty buffers in the buffer pool. - * + * * InitBufferPool() -- Init the buffer module. * - * See other files: - * freelist.c -- chooses victim for buffer replacement - * buf_table.c -- manages the buffer lookup table + * See other files: + * freelist.c -- chooses victim for buffer replacement + * buf_table.c -- manages the buffer lookup table */ #include <sys/types.h> #include <sys/file.h> @@ -66,7 +66,7 @@ #include "utils/palloc.h" #include "utils/memutils.h" #include "utils/relcache.h" -#include "executor/execdebug.h" /* for NDirectFileRead */ +#include "executor/execdebug.h" /* for NDirectFileRead */ #include "catalog/catalog.h" extern SPINLOCK BufMgrLock; @@ -77,76 +77,88 @@ extern long int LocalBufferHitCount; extern long int BufferFlushCount; extern long int LocalBufferFlushCount; -static int WriteMode = BUFFER_LATE_WRITE; /* Delayed write is default */ +static int WriteMode = BUFFER_LATE_WRITE; /* Delayed write is + * default */ + +static void WaitIO(BufferDesc * buf, SPINLOCK spinlock); -static void WaitIO(BufferDesc *buf, SPINLOCK spinlock); #ifndef HAS_TEST_AND_SET -static void SignalIO(BufferDesc *buf); -extern long *NWaitIOBackendP; /* defined in buf_init.c */ -#endif /* HAS_TEST_AND_SET */ - -static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, - bool bufferLockHeld); -static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, - bool *foundPtr, bool bufferLockHeld); -static int FlushBuffer (Buffer buffer, bool release); -static void BufferSync(void); -static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld); +static void SignalIO(BufferDesc * buf); +extern long *NWaitIOBackendP;/* defined in buf_init.c */ + +#endif /* HAS_TEST_AND_SET */ + +static Buffer +ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, + bool bufferLockHeld); +static BufferDesc * +BufferAlloc(Relation reln, BlockNumber blockNum, + bool * foundPtr, bool bufferLockHeld); +static int FlushBuffer(Buffer buffer, bool release); +static void BufferSync(void); +static int BufferReplace(BufferDesc * bufHdr, bool bufferLockHeld); /* --------------------------------------------------- * RelationGetBufferWithBuffer - * see if the given buffer is what we want - * if yes, we don't need to bother the buffer manager + * see if the given buffer is what we want + * if yes, we don't need to bother the buffer manager * --------------------------------------------------- */ Buffer RelationGetBufferWithBuffer(Relation relation, - BlockNumber blockNumber, - Buffer buffer) + BlockNumber blockNumber, + Buffer buffer) { - BufferDesc *bufHdr; - LRelId lrelId; - - if (BufferIsValid(buffer)) { - if (!BufferIsLocal(buffer)) { - bufHdr = &BufferDescriptors[buffer-1]; - lrelId = RelationGetLRelId(relation); - SpinAcquire(BufMgrLock); - if (bufHdr->tag.blockNum == blockNumber && - bufHdr->tag.relId.relId == lrelId.relId && - bufHdr->tag.relId.dbId == lrelId.dbId) { - SpinRelease(BufMgrLock); - return(buffer); - } - return(ReadBufferWithBufferLock(relation, blockNumber, true)); - } else { - bufHdr = &LocalBufferDescriptors[-buffer-1]; - if (bufHdr->tag.relId.relId == relation->rd_id && - bufHdr->tag.blockNum == blockNumber) { - return(buffer); - } + BufferDesc *bufHdr; + LRelId lrelId; + + if (BufferIsValid(buffer)) + { + if (!BufferIsLocal(buffer)) + { + bufHdr = &BufferDescriptors[buffer - 1]; + lrelId = RelationGetLRelId(relation); + SpinAcquire(BufMgrLock); + if (bufHdr->tag.blockNum == blockNumber && + bufHdr->tag.relId.relId == lrelId.relId && + bufHdr->tag.relId.dbId == lrelId.dbId) + { + SpinRelease(BufMgrLock); + return (buffer); + } + return (ReadBufferWithBufferLock(relation, blockNumber, true)); + } + else + { + bufHdr = &LocalBufferDescriptors[-buffer - 1]; + if (bufHdr->tag.relId.relId == relation->rd_id && + bufHdr->tag.blockNum == blockNumber) + { + return (buffer); + } + } } - } - return(ReadBuffer(relation, blockNumber)); + return (ReadBuffer(relation, blockNumber)); } /* * ReadBuffer -- returns a buffer containing the requested - * block of the requested relation. If the blknum - * requested is P_NEW, extend the relation file and - * allocate a new block. + * block of the requested relation. If the blknum + * requested is P_NEW, extend the relation file and + * allocate a new block. * * Returns: the buffer number for the buffer containing - * the block read or NULL on an error. + * the block read or NULL on an error. * * Assume when this function is called, that reln has been - * opened already. + * opened already. */ -extern int ShowPinTrace; +extern int ShowPinTrace; -#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG defined */ +#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG + * defined */ /* * ReadBuffer -- @@ -155,7 +167,7 @@ extern int ShowPinTrace; Buffer ReadBuffer(Relation reln, BlockNumber blockNum) { - return ReadBufferWithBufferLock(reln, blockNum, false); + return ReadBufferWithBufferLock(reln, blockNum, false); } /* @@ -164,156 +176,176 @@ ReadBuffer(Relation reln, BlockNumber blockNum) * XXX caller must have already acquired BufMgrLock */ #ifdef NOT_USED -static bool +static bool is_userbuffer(Buffer buffer) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - if (IsSystemRelationName(buf->sb_relname)) - return false; - return true; + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + if (IsSystemRelationName(buf->sb_relname)) + return false; + return true; } + #endif #ifdef NOT_USED Buffer ReadBuffer_Debug(char *file, - int line, - Relation reln, - BlockNumber blockNum) + int line, + Relation reln, + BlockNumber blockNum) { - Buffer buffer; - - buffer = ReadBufferWithBufferLock(reln, blockNum, false); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \ + Buffer buffer; + + buffer = ReadBufferWithBufferLock(reln, blockNum, false); + if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } - return buffer; + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } + return buffer; } + #endif /* - * ReadBufferWithBufferLock -- does the work of - * ReadBuffer() but with the possibility that - * the buffer lock has already been held. this - * is yet another effort to reduce the number of - * semops in the system. + * ReadBufferWithBufferLock -- does the work of + * ReadBuffer() but with the possibility that + * the buffer lock has already been held. this + * is yet another effort to reduce the number of + * semops in the system. */ -static Buffer +static Buffer ReadBufferWithBufferLock(Relation reln, - BlockNumber blockNum, - bool bufferLockHeld) + BlockNumber blockNum, + bool bufferLockHeld) { - BufferDesc *bufHdr; - int extend; /* extending the file by one block */ - int status; - bool found; - bool isLocalBuf; - - extend = (blockNum == P_NEW); - isLocalBuf = reln->rd_islocal; - - if (isLocalBuf) { - ReadLocalBufferCount++; - bufHdr = LocalBufferAlloc(reln, blockNum, &found); - if (found) LocalBufferHitCount++; - } else { - ReadBufferCount++; - - /* lookup the buffer. IO_IN_PROGRESS is set if the requested - * block is not currently in memory. - */ - bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld); - if (found) BufferHitCount++; - } - - if (!bufHdr) { - return(InvalidBuffer); - } - - /* if its already in the buffer pool, we're done */ - if (found) { + BufferDesc *bufHdr; + int extend; /* extending the file by one block */ + int status; + bool found; + bool isLocalBuf; + + extend = (blockNum == P_NEW); + isLocalBuf = reln->rd_islocal; + + if (isLocalBuf) + { + ReadLocalBufferCount++; + bufHdr = LocalBufferAlloc(reln, blockNum, &found); + if (found) + LocalBufferHitCount++; + } + else + { + ReadBufferCount++; + + /* + * lookup the buffer. IO_IN_PROGRESS is set if the requested + * block is not currently in memory. + */ + bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld); + if (found) + BufferHitCount++; + } + + if (!bufHdr) + { + return (InvalidBuffer); + } + + /* if its already in the buffer pool, we're done */ + if (found) + { + + /* + * This happens when a bogus buffer was returned previously and is + * floating around in the buffer pool. A routine calling this + * would want this extended. + */ + if (extend) + { + /* new buffers are zero-filled */ + memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); + smgrextend(bufHdr->bufsmgr, reln, + (char *) MAKE_PTR(bufHdr->data)); + } + return (BufferDescriptorGetBuffer(bufHdr)); + + } + /* - * This happens when a bogus buffer was returned previously and is - * floating around in the buffer pool. A routine calling this would - * want this extended. + * if we have gotten to this point, the reln pointer must be ok and + * the relation file must be open. */ - if (extend) { - /* new buffers are zero-filled */ - memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - smgrextend(bufHdr->bufsmgr, reln, - (char *) MAKE_PTR(bufHdr->data)); + if (extend) + { + /* new buffers are zero-filled */ + memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); + status = smgrextend(bufHdr->bufsmgr, reln, + (char *) MAKE_PTR(bufHdr->data)); } - return (BufferDescriptorGetBuffer(bufHdr)); - - } - - /* - * if we have gotten to this point, the reln pointer must be ok - * and the relation file must be open. - */ - if (extend) { - /* new buffers are zero-filled */ - memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - status = smgrextend(bufHdr->bufsmgr, reln, - (char *) MAKE_PTR(bufHdr->data)); - } else { - status = smgrread(bufHdr->bufsmgr, reln, blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - if (isLocalBuf) - return (BufferDescriptorGetBuffer(bufHdr)); + else + { + status = smgrread(bufHdr->bufsmgr, reln, blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + + if (isLocalBuf) + return (BufferDescriptorGetBuffer(bufHdr)); + + /* lock buffer manager again to update IO IN PROGRESS */ + SpinAcquire(BufMgrLock); + + if (status == SM_FAIL) + { + /* IO Failed. cleanup the data structures and go home */ + + if (!BufTableDelete(bufHdr)) + { + SpinRelease(BufMgrLock); + elog(FATAL, "BufRead: buffer table broken after IO error\n"); + } + /* remember that BufferAlloc() pinned the buffer */ + UnpinBuffer(bufHdr); - /* lock buffer manager again to update IO IN PROGRESS */ - SpinAcquire(BufMgrLock); - - if (status == SM_FAIL) { - /* IO Failed. cleanup the data structures and go home */ - - if (! BufTableDelete(bufHdr)) { - SpinRelease(BufMgrLock); - elog(FATAL,"BufRead: buffer table broken after IO error\n"); + /* + * Have to reset the flag so that anyone waiting for the buffer + * can tell that the contents are invalid. + */ + bufHdr->flags |= BM_IO_ERROR; + bufHdr->flags &= ~BM_IO_IN_PROGRESS; } - /* remember that BufferAlloc() pinned the buffer */ - UnpinBuffer(bufHdr); - - /* - * Have to reset the flag so that anyone waiting for - * the buffer can tell that the contents are invalid. - */ - bufHdr->flags |= BM_IO_ERROR; - bufHdr->flags &= ~BM_IO_IN_PROGRESS; - } else { - /* IO Succeeded. clear the flags, finish buffer update */ - - bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); - } - - /* If anyone was waiting for IO to complete, wake them up now */ + else + { + /* IO Succeeded. clear the flags, finish buffer update */ + + bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); + } + + /* If anyone was waiting for IO to complete, wake them up now */ #ifdef HAS_TEST_AND_SET - S_UNLOCK(&(bufHdr->io_in_progress_lock)); + S_UNLOCK(&(bufHdr->io_in_progress_lock)); #else - if (bufHdr->refcount > 1) - SignalIO(bufHdr); + if (bufHdr->refcount > 1) + SignalIO(bufHdr); #endif - - SpinRelease(BufMgrLock); - - if (status == SM_FAIL) - return(InvalidBuffer); - - return(BufferDescriptorGetBuffer(bufHdr)); + + SpinRelease(BufMgrLock); + + if (status == SM_FAIL) + return (InvalidBuffer); + + return (BufferDescriptorGetBuffer(bufHdr)); } /* * BufferAlloc -- Get a buffer from the buffer pool but dont - * read it. + * read it. * * Returns: descriptor for buffer * @@ -321,321 +353,339 @@ ReadBufferWithBufferLock(Relation reln, */ static BufferDesc * BufferAlloc(Relation reln, - BlockNumber blockNum, - bool *foundPtr, - bool bufferLockHeld) + BlockNumber blockNum, + bool * foundPtr, + bool bufferLockHeld) { - BufferDesc *buf, *buf2; - BufferTag newTag; /* identity of requested block */ - bool inProgress; /* buffer undergoing IO */ - bool newblock = FALSE; - - /* create a new tag so we can lookup the buffer */ - /* assume that the relation is already open */ - if (blockNum == P_NEW) { - newblock = TRUE; - blockNum = smgrnblocks(reln->rd_rel->relsmgr, reln); - } - - INIT_BUFFERTAG(&newTag,reln,blockNum); - - if (!bufferLockHeld) - SpinAcquire(BufMgrLock); - - /* see if the block is in the buffer pool already */ - buf = BufTableLookup(&newTag); - if (buf != NULL) { - /* Found it. Now, (a) pin the buffer so no - * one steals it from the buffer pool, - * (b) check IO_IN_PROGRESS, someone may be - * faulting the buffer into the buffer pool. - */ - - PinBuffer(buf); - inProgress = (buf->flags & BM_IO_IN_PROGRESS); - - *foundPtr = TRUE; - if (inProgress) { - WaitIO(buf, BufMgrLock); - if (buf->flags & BM_IO_ERROR) { - /* wierd race condition: - * - * We were waiting for someone else to read the buffer. - * While we were waiting, the reader boof'd in some - * way, so the contents of the buffer are still - * invalid. By saying that we didn't find it, we can - * make the caller reinitialize the buffer. If two - * processes are waiting for this block, both will - * read the block. The second one to finish may overwrite - * any updates made by the first. (Assume higher level - * synchronization prevents this from happening). - * - * This is never going to happen, don't worry about it. - */ - *foundPtr = FALSE; - } + BufferDesc *buf, + *buf2; + BufferTag newTag; /* identity of requested block */ + bool inProgress; /* buffer undergoing IO */ + bool newblock = FALSE; + + /* create a new tag so we can lookup the buffer */ + /* assume that the relation is already open */ + if (blockNum == P_NEW) + { + newblock = TRUE; + blockNum = smgrnblocks(reln->rd_rel->relsmgr, reln); } + + INIT_BUFFERTAG(&newTag, reln, blockNum); + + if (!bufferLockHeld) + SpinAcquire(BufMgrLock); + + /* see if the block is in the buffer pool already */ + buf = BufTableLookup(&newTag); + if (buf != NULL) + { + + /* + * Found it. Now, (a) pin the buffer so no one steals it from the + * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting + * the buffer into the buffer pool. + */ + + PinBuffer(buf); + inProgress = (buf->flags & BM_IO_IN_PROGRESS); + + *foundPtr = TRUE; + if (inProgress) + { + WaitIO(buf, BufMgrLock); + if (buf->flags & BM_IO_ERROR) + { + + /* + * wierd race condition: + * + * We were waiting for someone else to read the buffer. While + * we were waiting, the reader boof'd in some way, so the + * contents of the buffer are still invalid. By saying + * that we didn't find it, we can make the caller + * reinitialize the buffer. If two processes are waiting + * for this block, both will read the block. The second + * one to finish may overwrite any updates made by the + * first. (Assume higher level synchronization prevents + * this from happening). + * + * This is never going to happen, don't worry about it. + */ + *foundPtr = FALSE; + } + } #ifdef BMTRACE - _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND); -#endif /* BMTRACE */ - - SpinRelease(BufMgrLock); - - return(buf); - } - - *foundPtr = FALSE; - - /* - * Didn't find it in the buffer pool. We'll have - * to initialize a new buffer. First, grab one from - * the free list. If it's dirty, flush it to disk. - * Remember to unlock BufMgr spinlock while doing the IOs. - */ - inProgress = FALSE; - for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL; ) { - - /* GetFreeBuffer will abort if it can't find a free buffer */ - buf = GetFreeBuffer(); - - /* - * But it can return buf == NULL if we are in aborting - * transaction now and so elog(WARN,...) in GetFreeBuffer - * will not abort again. - */ - if ( buf == NULL ) - return (NULL); - + _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND); +#endif /* BMTRACE */ + + SpinRelease(BufMgrLock); + + return (buf); + } + + *foundPtr = FALSE; + /* - * There should be exactly one pin on the buffer after - * it is allocated -- ours. If it had a pin it wouldn't - * have been on the free list. No one else could have - * pinned it between GetFreeBuffer and here because we - * have the BufMgrLock. + * Didn't find it in the buffer pool. We'll have to initialize a new + * buffer. First, grab one from the free list. If it's dirty, flush + * it to disk. Remember to unlock BufMgr spinlock while doing the IOs. */ - Assert(buf->refcount == 0); - buf->refcount = 1; - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1; - - if (buf->flags & BM_DIRTY) { - bool smok; - /* - * Set BM_IO_IN_PROGRESS to keep anyone from doing anything - * with the contents of the buffer while we write it out. - * We don't really care if they try to read it, but if they - * can complete a BufferAlloc on it they can then scribble - * into it, and we'd really like to avoid that while we are - * flushing the buffer. Setting this flag should block them - * in WaitIO until we're done. - */ - inProgress = TRUE; - buf->flags |= BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - /* - * All code paths that acquire this lock pin the buffer - * first; since no one had it pinned (it just came off the - * free list), no one else can have this lock. - */ - Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - S_LOCK(&(buf->io_in_progress_lock)); -#endif /* HAS_TEST_AND_SET */ - - /* - * Write the buffer out, being careful to release BufMgrLock - * before starting the I/O. - * - * This #ifndef is here because a few extra semops REALLY kill - * you on machines that don't have spinlocks. If you don't - * operate with much concurrency, well... - */ - smok = BufferReplace(buf, true); -#ifndef OPTIMIZE_SINGLE - SpinAcquire(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - - if ( smok == FALSE ) - { - elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s", - buf->tag.blockNum, buf->sb_dbname, buf->sb_relname); - inProgress = FALSE; - buf->flags |= BM_IO_ERROR; - buf->flags &= ~BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - buf->refcount--; - if ( buf->refcount == 0 ) - { - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - } - buf = (BufferDesc *) NULL; - } - else - { + inProgress = FALSE; + for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) + { + + /* GetFreeBuffer will abort if it can't find a free buffer */ + buf = GetFreeBuffer(); + /* - * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't - * be setted by anyone. - vadim 01/17/97 + * But it can return buf == NULL if we are in aborting transaction + * now and so elog(WARN,...) in GetFreeBuffer will not abort + * again. */ - if ( buf->flags & BM_JUST_DIRTIED ) - { - elog (FATAL, "BufferAlloc: content of block %u (%s) changed while flushing", - buf->tag.blockNum, buf->sb_relname); - } - else - { - buf->flags &= ~BM_DIRTY; - } - } - - /* - * Somebody could have pinned the buffer while we were - * doing the I/O and had given up the BufMgrLock (though - * they would be waiting for us to clear the BM_IO_IN_PROGRESS - * flag). That's why this is a loop -- if so, we need to clear - * the I/O flags, remove our pin and start all over again. - * - * People may be making buffers free at any time, so there's - * no reason to think that we have an immediate disaster on - * our hands. - */ - if ( buf && buf->refcount > 1 ) - { - inProgress = FALSE; - buf->flags &= ~BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - buf->refcount--; - buf = (BufferDesc *) NULL; - } - - /* - * Somebody could have allocated another buffer for the - * same block we are about to read in. (While we flush out - * the dirty buffer, we don't hold the lock and someone could - * have allocated another buffer for the same block. The problem - * is we haven't gotten around to insert the new tag into - * the buffer table. So we need to check here. -ay 3/95 - */ - buf2 = BufTableLookup(&newTag); - if (buf2 != NULL) { - /* Found it. Someone has already done what we're about - * to do. We'll just handle this as if it were found in - * the buffer pool in the first place. + if (buf == NULL) + return (NULL); + + /* + * There should be exactly one pin on the buffer after it is + * allocated -- ours. If it had a pin it wouldn't have been on + * the free list. No one else could have pinned it between + * GetFreeBuffer and here because we have the BufMgrLock. */ - if ( buf != NULL ) + Assert(buf->refcount == 0); + buf->refcount = 1; + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1; + + if (buf->flags & BM_DIRTY) { + bool smok; + + /* + * Set BM_IO_IN_PROGRESS to keep anyone from doing anything + * with the contents of the buffer while we write it out. We + * don't really care if they try to read it, but if they can + * complete a BufferAlloc on it they can then scribble into + * it, and we'd really like to avoid that while we are + * flushing the buffer. Setting this flag should block them + * in WaitIO until we're done. + */ + inProgress = TRUE; + buf->flags |= BM_IO_IN_PROGRESS; #ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ - - /* give up the buffer since we don't need it any more */ - buf->refcount--; - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - buf->flags &= ~BM_IO_IN_PROGRESS; - } - PinBuffer(buf2); - inProgress = (buf2->flags & BM_IO_IN_PROGRESS); - - *foundPtr = TRUE; - if (inProgress) { - WaitIO(buf2, BufMgrLock); - if (buf2->flags & BM_IO_ERROR) { - *foundPtr = FALSE; - } + /* + * All code paths that acquire this lock pin the buffer first; + * since no one had it pinned (it just came off the free + * list), no one else can have this lock. + */ + Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); + S_LOCK(&(buf->io_in_progress_lock)); +#endif /* HAS_TEST_AND_SET */ + + /* + * Write the buffer out, being careful to release BufMgrLock + * before starting the I/O. + * + * This #ifndef is here because a few extra semops REALLY kill + * you on machines that don't have spinlocks. If you don't + * operate with much concurrency, well... + */ + smok = BufferReplace(buf, true); +#ifndef OPTIMIZE_SINGLE + SpinAcquire(BufMgrLock); +#endif /* OPTIMIZE_SINGLE */ + + if (smok == FALSE) + { + elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s", + buf->tag.blockNum, buf->sb_dbname, buf->sb_relname); + inProgress = FALSE; + buf->flags |= BM_IO_ERROR; + buf->flags &= ~BM_IO_IN_PROGRESS; +#ifdef HAS_TEST_AND_SET + S_UNLOCK(&(buf->io_in_progress_lock)); +#else /* !HAS_TEST_AND_SET */ + if (buf->refcount > 1) + SignalIO(buf); +#endif /* !HAS_TEST_AND_SET */ + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; + buf->refcount--; + if (buf->refcount == 0) + { + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + } + buf = (BufferDesc *) NULL; + } + else + { + + /* + * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't + * be setted by anyone. - vadim 01/17/97 + */ + if (buf->flags & BM_JUST_DIRTIED) + { + elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing", + buf->tag.blockNum, buf->sb_relname); + } + else + { + buf->flags &= ~BM_DIRTY; + } + } + + /* + * Somebody could have pinned the buffer while we were doing + * the I/O and had given up the BufMgrLock (though they would + * be waiting for us to clear the BM_IO_IN_PROGRESS flag). + * That's why this is a loop -- if so, we need to clear the + * I/O flags, remove our pin and start all over again. + * + * People may be making buffers free at any time, so there's no + * reason to think that we have an immediate disaster on our + * hands. + */ + if (buf && buf->refcount > 1) + { + inProgress = FALSE; + buf->flags &= ~BM_IO_IN_PROGRESS; +#ifdef HAS_TEST_AND_SET + S_UNLOCK(&(buf->io_in_progress_lock)); +#else /* !HAS_TEST_AND_SET */ + if (buf->refcount > 1) + SignalIO(buf); +#endif /* !HAS_TEST_AND_SET */ + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; + buf->refcount--; + buf = (BufferDesc *) NULL; + } + + /* + * Somebody could have allocated another buffer for the same + * block we are about to read in. (While we flush out the + * dirty buffer, we don't hold the lock and someone could have + * allocated another buffer for the same block. The problem is + * we haven't gotten around to insert the new tag into the + * buffer table. So we need to check here. -ay 3/95 + */ + buf2 = BufTableLookup(&newTag); + if (buf2 != NULL) + { + + /* + * Found it. Someone has already done what we're about to + * do. We'll just handle this as if it were found in the + * buffer pool in the first place. + */ + if (buf != NULL) + { +#ifdef HAS_TEST_AND_SET + S_UNLOCK(&(buf->io_in_progress_lock)); +#else /* !HAS_TEST_AND_SET */ + if (buf->refcount > 1) + SignalIO(buf); +#endif /* !HAS_TEST_AND_SET */ + + /* give up the buffer since we don't need it any more */ + buf->refcount--; + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + buf->flags &= ~BM_IO_IN_PROGRESS; + } + + PinBuffer(buf2); + inProgress = (buf2->flags & BM_IO_IN_PROGRESS); + + *foundPtr = TRUE; + if (inProgress) + { + WaitIO(buf2, BufMgrLock); + if (buf2->flags & BM_IO_ERROR) + { + *foundPtr = FALSE; + } + } + + SpinRelease(BufMgrLock); + + return (buf2); + } } - + } + + /* + * At this point we should have the sole pin on a non-dirty buffer and + * we may or may not already have the BM_IO_IN_PROGRESS flag set. + */ + + /* + * Change the name of the buffer in the lookup table: + * + * Need to update the lookup table before the read starts. If someone + * comes along looking for the buffer while we are reading it in, we + * don't want them to allocate a new buffer. For the same reason, we + * didn't want to erase the buf table entry for the buffer we were + * writing back until now, either. + */ + + if (!BufTableDelete(buf)) + { SpinRelease(BufMgrLock); - - return(buf2); - } + elog(FATAL, "buffer wasn't in the buffer table\n"); + } - } - /* - * At this point we should have the sole pin on a non-dirty - * buffer and we may or may not already have the BM_IO_IN_PROGRESS - * flag set. - */ - - /* - * Change the name of the buffer in the lookup table: - * - * Need to update the lookup table before the read starts. - * If someone comes along looking for the buffer while - * we are reading it in, we don't want them to allocate - * a new buffer. For the same reason, we didn't want - * to erase the buf table entry for the buffer we were - * writing back until now, either. - */ - - if (! BufTableDelete(buf)) { - SpinRelease(BufMgrLock); - elog(FATAL,"buffer wasn't in the buffer table\n"); - - } - - /* record the database name and relation name for this buffer */ - strcpy (buf->sb_relname, reln->rd_rel->relname.data); - strcpy (buf->sb_dbname, GetDatabaseName()); - - /* remember which storage manager is responsible for it */ - buf->bufsmgr = reln->rd_rel->relsmgr; - - INIT_BUFFERTAG(&(buf->tag),reln,blockNum); - if (! BufTableInsert(buf)) { - SpinRelease(BufMgrLock); - elog(FATAL,"Buffer in lookup table twice \n"); - } - - /* Buffer contents are currently invalid. Have - * to mark IO IN PROGRESS so no one fiddles with - * them until the read completes. If this routine - * has been called simply to allocate a buffer, no - * io will be attempted, so the flag isnt set. - */ - if (!inProgress) { - buf->flags |= BM_IO_IN_PROGRESS; + + /* record the database name and relation name for this buffer */ + strcpy(buf->sb_relname, reln->rd_rel->relname.data); + strcpy(buf->sb_dbname, GetDatabaseName()); + + /* remember which storage manager is responsible for it */ + buf->bufsmgr = reln->rd_rel->relsmgr; + + INIT_BUFFERTAG(&(buf->tag), reln, blockNum); + if (!BufTableInsert(buf)) + { + SpinRelease(BufMgrLock); + elog(FATAL, "Buffer in lookup table twice \n"); + } + + /* + * Buffer contents are currently invalid. Have to mark IO IN PROGRESS + * so no one fiddles with them until the read completes. If this + * routine has been called simply to allocate a buffer, no io will be + * attempted, so the flag isnt set. + */ + if (!inProgress) + { + buf->flags |= BM_IO_IN_PROGRESS; #ifdef HAS_TEST_AND_SET - Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - S_LOCK(&(buf->io_in_progress_lock)); -#endif /* HAS_TEST_AND_SET */ - } - + Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); + S_LOCK(&(buf->io_in_progress_lock)); +#endif /* HAS_TEST_AND_SET */ + } + #ifdef BMTRACE - _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); -#endif /* BMTRACE */ - - SpinRelease(BufMgrLock); - - return (buf); + _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); +#endif /* BMTRACE */ + + SpinRelease(BufMgrLock); + + return (buf); } /* * WriteBuffer-- * - * Pushes buffer contents to disk if WriteMode is BUFFER_FLUSH_WRITE. - * Otherwise, marks contents as dirty. + * Pushes buffer contents to disk if WriteMode is BUFFER_FLUSH_WRITE. + * Otherwise, marks contents as dirty. * * Assume that buffer is pinned. Assume that reln is - * valid. + * valid. * * Side Effects: - * Pin count is decremented. + * Pin count is decremented. */ #undef WriteBuffer @@ -643,92 +693,103 @@ BufferAlloc(Relation reln, int WriteBuffer(Buffer buffer) { - BufferDesc *bufHdr; + BufferDesc *bufHdr; - if (WriteMode == BUFFER_FLUSH_WRITE) { - return (FlushBuffer (buffer, TRUE)); - } else { + if (WriteMode == BUFFER_FLUSH_WRITE) + { + return (FlushBuffer(buffer, TRUE)); + } + else + { - if (BufferIsLocal(buffer)) - return WriteLocalBuffer(buffer, TRUE); - - if (BAD_BUFFER_ID(buffer)) - return(FALSE); + if (BufferIsLocal(buffer)) + return WriteLocalBuffer(buffer, TRUE); - bufHdr = &BufferDescriptors[buffer-1]; - - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); - CommitInfoNeedsSave[buffer - 1] = 0; - } - return(TRUE); -} + if (BAD_BUFFER_ID(buffer)) + return (FALSE); + + bufHdr = &BufferDescriptors[buffer - 1]; + + SpinAcquire(BufMgrLock); + Assert(bufHdr->refcount > 0); + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + UnpinBuffer(bufHdr); + SpinRelease(BufMgrLock); + CommitInfoNeedsSave[buffer - 1] = 0; + } + return (TRUE); +} #ifdef NOT_USED void WriteBuffer_Debug(char *file, int line, Buffer buffer) { - WriteBuffer(buffer); - if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf; - buf = &BufferDescriptors[buffer-1]; - fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \ + WriteBuffer(buffer); + if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf; + + buf = &BufferDescriptors[buffer - 1]; + fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif /* * DirtyBufferCopy() -- For a given dbid/relid/blockno, if the buffer is - * in the cache and is dirty, mark it clean and copy - * it to the requested location. This is a logical - * write, and has been installed to support the cache - * management code for write-once storage managers. + * in the cache and is dirty, mark it clean and copy + * it to the requested location. This is a logical + * write, and has been installed to support the cache + * management code for write-once storage managers. * - * DirtyBufferCopy() -- Copy a given dirty buffer to the requested - * destination. + * DirtyBufferCopy() -- Copy a given dirty buffer to the requested + * destination. * - * We treat this as a write. If the requested buffer is in the pool - * and is dirty, we copy it to the location requested and mark it - * clean. This routine supports the Sony jukebox storage manager, - * which agrees to take responsibility for the data once we mark - * it clean. + * We treat this as a write. If the requested buffer is in the pool + * and is dirty, we copy it to the location requested and mark it + * clean. This routine supports the Sony jukebox storage manager, + * which agrees to take responsibility for the data once we mark + * it clean. * - * NOTE: used by sony jukebox code in postgres 4.2 - ay 2/95 + * NOTE: used by sony jukebox code in postgres 4.2 - ay 2/95 */ #ifdef NOT_USED void DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest) { - BufferDesc *buf; - BufferTag btag; - - btag.relId.relId = relid; - btag.relId.dbId = dbid; - btag.blockNum = blkno; - - SpinAcquire(BufMgrLock); - buf = BufTableLookup(&btag); - - if (buf == (BufferDesc *) NULL - || !(buf->flags & BM_DIRTY) - || !(buf->flags & BM_VALID)) { + BufferDesc *buf; + BufferTag btag; + + btag.relId.relId = relid; + btag.relId.dbId = dbid; + btag.blockNum = blkno; + + SpinAcquire(BufMgrLock); + buf = BufTableLookup(&btag); + + if (buf == (BufferDesc *) NULL + || !(buf->flags & BM_DIRTY) + || !(buf->flags & BM_VALID)) + { + SpinRelease(BufMgrLock); + return; + } + + /* + * hate to do this holding the lock, but release and reacquire is + * slower + */ + memmove(dest, (char *) MAKE_PTR(buf->data), BLCKSZ); + + buf->flags &= ~BM_DIRTY; + SpinRelease(BufMgrLock); - return; - } - - /* hate to do this holding the lock, but release and reacquire is slower */ - memmove(dest, (char *) MAKE_PTR(buf->data), BLCKSZ); - - buf->flags &= ~BM_DIRTY; - - SpinRelease(BufMgrLock); } + #endif /* @@ -742,504 +803,541 @@ DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest) static int FlushBuffer(Buffer buffer, bool release) { - BufferDesc *bufHdr; - Oid bufdb; - Relation bufrel; - int status; - - if (BufferIsLocal(buffer)) - return FlushLocalBuffer(buffer, release); - - if (BAD_BUFFER_ID(buffer)) - return (STATUS_ERROR); - - bufHdr = &BufferDescriptors[buffer-1]; - bufdb = bufHdr->tag.relId.dbId; - - Assert (bufdb == MyDatabaseId || bufdb == (Oid) NULL); - bufrel = RelationIdCacheGetRelation (bufHdr->tag.relId.relId); - Assert (bufrel != (Relation) NULL); - - /* To check if block content changed while flushing. - vadim 01/17/97 */ - SpinAcquire(BufMgrLock); - bufHdr->flags &= ~BM_JUST_DIRTIED; - SpinRelease(BufMgrLock); - - status = smgrflush(bufHdr->bufsmgr, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - - if (status == SM_FAIL) - { - elog(WARN, "FlushBuffer: cannot flush block %u of the relation %s", - bufHdr->tag.blockNum, bufHdr->sb_relname); - return (STATUS_ERROR); - } - BufferFlushCount++; - - SpinAcquire(BufMgrLock); - /* - * If this buffer was marked by someone as DIRTY while - * we were flushing it out we must not clear DIRTY flag - * - vadim 01/17/97 - */ - if ( bufHdr->flags & BM_JUST_DIRTIED ) - { - elog (NOTICE, "FlusfBuffer: content of block %u (%s) changed while flushing", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - else - { - bufHdr->flags &= ~BM_DIRTY; - } - if ( release ) - UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); - CommitInfoNeedsSave[buffer - 1] = 0; - - return(STATUS_OK); + BufferDesc *bufHdr; + Oid bufdb; + Relation bufrel; + int status; + + if (BufferIsLocal(buffer)) + return FlushLocalBuffer(buffer, release); + + if (BAD_BUFFER_ID(buffer)) + return (STATUS_ERROR); + + bufHdr = &BufferDescriptors[buffer - 1]; + bufdb = bufHdr->tag.relId.dbId; + + Assert(bufdb == MyDatabaseId || bufdb == (Oid) NULL); + bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + Assert(bufrel != (Relation) NULL); + + /* To check if block content changed while flushing. - vadim 01/17/97 */ + SpinAcquire(BufMgrLock); + bufHdr->flags &= ~BM_JUST_DIRTIED; + SpinRelease(BufMgrLock); + + status = smgrflush(bufHdr->bufsmgr, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + + if (status == SM_FAIL) + { + elog(WARN, "FlushBuffer: cannot flush block %u of the relation %s", + bufHdr->tag.blockNum, bufHdr->sb_relname); + return (STATUS_ERROR); + } + BufferFlushCount++; + + SpinAcquire(BufMgrLock); + + /* + * If this buffer was marked by someone as DIRTY while we were + * flushing it out we must not clear DIRTY flag - vadim 01/17/97 + */ + if (bufHdr->flags & BM_JUST_DIRTIED) + { + elog(NOTICE, "FlusfBuffer: content of block %u (%s) changed while flushing", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + else + { + bufHdr->flags &= ~BM_DIRTY; + } + if (release) + UnpinBuffer(bufHdr); + SpinRelease(BufMgrLock); + CommitInfoNeedsSave[buffer - 1] = 0; + + return (STATUS_OK); } /* * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer - * when the operation is complete. + * when the operation is complete. * - * We know that the buffer is for a relation in our private cache, - * because this routine is called only to write out buffers that - * were changed by the executing backend. + * We know that the buffer is for a relation in our private cache, + * because this routine is called only to write out buffers that + * were changed by the executing backend. */ int WriteNoReleaseBuffer(Buffer buffer) { - BufferDesc *bufHdr; - - if (WriteMode == BUFFER_FLUSH_WRITE) { - return (FlushBuffer (buffer, FALSE)); - } else { + BufferDesc *bufHdr; - if (BufferIsLocal(buffer)) - return WriteLocalBuffer(buffer, FALSE); - - if (BAD_BUFFER_ID(buffer)) - return (STATUS_ERROR); + if (WriteMode == BUFFER_FLUSH_WRITE) + { + return (FlushBuffer(buffer, FALSE)); + } + else + { - bufHdr = &BufferDescriptors[buffer-1]; - - SpinAcquire(BufMgrLock); - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - SpinRelease(BufMgrLock); - CommitInfoNeedsSave[buffer - 1] = 0; - } - return(STATUS_OK); + if (BufferIsLocal(buffer)) + return WriteLocalBuffer(buffer, FALSE); + + if (BAD_BUFFER_ID(buffer)) + return (STATUS_ERROR); + + bufHdr = &BufferDescriptors[buffer - 1]; + + SpinAcquire(BufMgrLock); + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + SpinRelease(BufMgrLock); + CommitInfoNeedsSave[buffer - 1] = 0; + } + return (STATUS_OK); } #undef ReleaseAndReadBuffer /* * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() - * so that only one semop needs to be called. + * so that only one semop needs to be called. * */ Buffer ReleaseAndReadBuffer(Buffer buffer, - Relation relation, - BlockNumber blockNum) + Relation relation, + BlockNumber blockNum) { - BufferDesc *bufHdr; - Buffer retbuf; - - if (BufferIsLocal(buffer)) { - Assert(LocalRefCount[-buffer - 1] > 0); - LocalRefCount[-buffer - 1]--; - } else { - if (BufferIsValid(buffer)) { - bufHdr = &BufferDescriptors[buffer-1]; - Assert(PrivateRefCount[buffer - 1] > 0); - PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0 && - LastRefCount[buffer - 1] == 0) { - /* only release buffer if it is not pinned in previous ExecMain - level */ - SpinAcquire(BufMgrLock); - bufHdr->refcount--; - if (bufHdr->refcount == 0) { - AddBufferToFreelist(bufHdr); - bufHdr->flags |= BM_FREE; - } - if(CommitInfoNeedsSave[buffer - 1]) { - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - CommitInfoNeedsSave[buffer - 1] = 0; + BufferDesc *bufHdr; + Buffer retbuf; + + if (BufferIsLocal(buffer)) + { + Assert(LocalRefCount[-buffer - 1] > 0); + LocalRefCount[-buffer - 1]--; + } + else + { + if (BufferIsValid(buffer)) + { + bufHdr = &BufferDescriptors[buffer - 1]; + Assert(PrivateRefCount[buffer - 1] > 0); + PrivateRefCount[buffer - 1]--; + if (PrivateRefCount[buffer - 1] == 0 && + LastRefCount[buffer - 1] == 0) + { + + /* + * only release buffer if it is not pinned in previous + * ExecMain level + */ + SpinAcquire(BufMgrLock); + bufHdr->refcount--; + if (bufHdr->refcount == 0) + { + AddBufferToFreelist(bufHdr); + bufHdr->flags |= BM_FREE; + } + if (CommitInfoNeedsSave[buffer - 1]) + { + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + CommitInfoNeedsSave[buffer - 1] = 0; + } + retbuf = ReadBufferWithBufferLock(relation, blockNum, true); + return retbuf; + } } - retbuf = ReadBufferWithBufferLock(relation, blockNum, true); - return retbuf; - } } - } - return (ReadBuffer(relation, blockNum)); + return (ReadBuffer(relation, blockNum)); } /* * BufferSync -- Flush all dirty buffers in the pool. * - * This is called at transaction commit time. It does the wrong thing, - * right now. We should flush only our own changes to stable storage, - * and we should obey the lock protocol on the buffer manager metadata - * as we do it. Also, we need to be sure that no other transaction is - * modifying the page as we flush it. This is only a problem for objects - * that use a non-two-phase locking protocol, like btree indices. For - * those objects, we would like to set a write lock for the duration of - * our IO. Another possibility is to code updates to btree pages - * carefully, so that writing them out out of order cannot cause - * any unrecoverable errors. + * This is called at transaction commit time. It does the wrong thing, + * right now. We should flush only our own changes to stable storage, + * and we should obey the lock protocol on the buffer manager metadata + * as we do it. Also, we need to be sure that no other transaction is + * modifying the page as we flush it. This is only a problem for objects + * that use a non-two-phase locking protocol, like btree indices. For + * those objects, we would like to set a write lock for the duration of + * our IO. Another possibility is to code updates to btree pages + * carefully, so that writing them out out of order cannot cause + * any unrecoverable errors. * - * I don't want to think hard about this right now, so I will try - * to come back to it later. + * I don't want to think hard about this right now, so I will try + * to come back to it later. */ static void BufferSync() -{ - int i; - Oid bufdb; - Oid bufrel; - Relation reln; - BufferDesc *bufHdr; - int status; - - SpinAcquire(BufMgrLock); - for (i=0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) { - if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { - bufdb = bufHdr->tag.relId.dbId; - bufrel = bufHdr->tag.relId.relId; - if (bufdb == MyDatabaseId || bufdb == (Oid) 0) { - reln = RelationIdCacheGetRelation(bufrel); - - /* - * We have to pin buffer to keep anyone from stealing it - * from the buffer pool while we are flushing it or - * waiting in WaitIO. It's bad for GetFreeBuffer in - * BufferAlloc, but there is no other way to prevent - * writing into disk block data from some other buffer, - * getting smgr status of some other block and - * clearing BM_DIRTY of ... - VAdim 09/16/96 - */ - PinBuffer(bufHdr); - if (bufHdr->flags & BM_IO_IN_PROGRESS) +{ + int i; + Oid bufdb; + Oid bufrel; + Relation reln; + BufferDesc *bufHdr; + int status; + + SpinAcquire(BufMgrLock); + for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) + { + if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { - WaitIO(bufHdr, BufMgrLock); - UnpinBuffer(bufHdr); - if (bufHdr->flags & BM_IO_ERROR) - { - elog(WARN, "BufferSync: write error %u for %s", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - if (reln != (Relation)NULL) - RelationDecrementReferenceCount(reln); - continue; - } - - /* - * To check if block content changed while flushing - * (see below). - vadim 01/17/97 - */ - bufHdr->flags &= ~BM_JUST_DIRTIED; + bufdb = bufHdr->tag.relId.dbId; + bufrel = bufHdr->tag.relId.relId; + if (bufdb == MyDatabaseId || bufdb == (Oid) 0) + { + reln = RelationIdCacheGetRelation(bufrel); + + /* + * We have to pin buffer to keep anyone from stealing it + * from the buffer pool while we are flushing it or + * waiting in WaitIO. It's bad for GetFreeBuffer in + * BufferAlloc, but there is no other way to prevent + * writing into disk block data from some other buffer, + * getting smgr status of some other block and clearing + * BM_DIRTY of ... - VAdim 09/16/96 + */ + PinBuffer(bufHdr); + if (bufHdr->flags & BM_IO_IN_PROGRESS) + { + WaitIO(bufHdr, BufMgrLock); + UnpinBuffer(bufHdr); + if (bufHdr->flags & BM_IO_ERROR) + { + elog(WARN, "BufferSync: write error %u for %s", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + if (reln != (Relation) NULL) + RelationDecrementReferenceCount(reln); + continue; + } + + /* + * To check if block content changed while flushing (see + * below). - vadim 01/17/97 + */ + bufHdr->flags &= ~BM_JUST_DIRTIED; + + /* + * If we didn't have the reldesc in our local cache, flush + * this page out using the 'blind write' storage manager + * routine. If we did find it, use the standard + * interface. + */ - /* - * If we didn't have the reldesc in our local cache, flush this - * page out using the 'blind write' storage manager routine. If - * we did find it, use the standard interface. - */ - #ifndef OPTIMIZE_SINGLE - SpinRelease(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - if (reln == (Relation) NULL) { - status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, - bufHdr->sb_relname, bufdb, bufrel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } else { - status = smgrwrite(bufHdr->bufsmgr, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } + SpinRelease(BufMgrLock); +#endif /* OPTIMIZE_SINGLE */ + if (reln == (Relation) NULL) + { + status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, + bufHdr->sb_relname, bufdb, bufrel, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + else + { + status = smgrwrite(bufHdr->bufsmgr, reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } #ifndef OPTIMIZE_SINGLE - SpinAcquire(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - - UnpinBuffer(bufHdr); - if (status == SM_FAIL) { - bufHdr->flags |= BM_IO_ERROR; - elog(WARN, "BufferSync: cannot write %u for %s", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - BufferFlushCount++; - /* - * If this buffer was marked by someone as DIRTY while - * we were flushing it out we must not clear DIRTY flag - * - vadim 01/17/97 - */ - if ( bufHdr->flags & BM_JUST_DIRTIED ) - { - elog (NOTICE, "BufferSync: content of block %u (%s) changed while flushing", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - else - { - bufHdr->flags &= ~BM_DIRTY; + SpinAcquire(BufMgrLock); +#endif /* OPTIMIZE_SINGLE */ + + UnpinBuffer(bufHdr); + if (status == SM_FAIL) + { + bufHdr->flags |= BM_IO_ERROR; + elog(WARN, "BufferSync: cannot write %u for %s", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + BufferFlushCount++; + + /* + * If this buffer was marked by someone as DIRTY while we + * were flushing it out we must not clear DIRTY flag - + * vadim 01/17/97 + */ + if (bufHdr->flags & BM_JUST_DIRTIED) + { + elog(NOTICE, "BufferSync: content of block %u (%s) changed while flushing", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + else + { + bufHdr->flags &= ~BM_DIRTY; + } + if (reln != (Relation) NULL) + RelationDecrementReferenceCount(reln); + } } - if (reln != (Relation)NULL) - RelationDecrementReferenceCount(reln); - } } - } - SpinRelease(BufMgrLock); + SpinRelease(BufMgrLock); - LocalBufferSync(); + LocalBufferSync(); } /* * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' - * is cleared. Because IO_IN_PROGRESS conflicts are - * expected to be rare, there is only one BufferIO - * lock in the entire system. All processes block - * on this semaphore when they try to use a buffer - * that someone else is faulting in. Whenever a - * process finishes an IO and someone is waiting for - * the buffer, BufferIO is signaled (SignalIO). All - * waiting processes then wake up and check to see - * if their buffer is now ready. This implementation - * is simple, but efficient enough if WaitIO is - * rarely called by multiple processes simultaneously. + * is cleared. Because IO_IN_PROGRESS conflicts are + * expected to be rare, there is only one BufferIO + * lock in the entire system. All processes block + * on this semaphore when they try to use a buffer + * that someone else is faulting in. Whenever a + * process finishes an IO and someone is waiting for + * the buffer, BufferIO is signaled (SignalIO). All + * waiting processes then wake up and check to see + * if their buffer is now ready. This implementation + * is simple, but efficient enough if WaitIO is + * rarely called by multiple processes simultaneously. * - * ProcSleep atomically releases the spinlock and goes to - * sleep. + * ProcSleep atomically releases the spinlock and goes to + * sleep. * - * Note: there is an easy fix if the queue becomes long. - * save the id of the buffer we are waiting for in - * the queue structure. That way signal can figure - * out which proc to wake up. + * Note: there is an easy fix if the queue becomes long. + * save the id of the buffer we are waiting for in + * the queue structure. That way signal can figure + * out which proc to wake up. */ #ifdef HAS_TEST_AND_SET static void -WaitIO(BufferDesc *buf, SPINLOCK spinlock) +WaitIO(BufferDesc * buf, SPINLOCK spinlock) { - SpinRelease(spinlock); - S_LOCK(&(buf->io_in_progress_lock)); - S_UNLOCK(&(buf->io_in_progress_lock)); - SpinAcquire(spinlock); + SpinRelease(spinlock); + S_LOCK(&(buf->io_in_progress_lock)); + S_UNLOCK(&(buf->io_in_progress_lock)); + SpinAcquire(spinlock); } -#else /* HAS_TEST_AND_SET */ -IpcSemaphoreId WaitIOSemId; +#else /* HAS_TEST_AND_SET */ +IpcSemaphoreId WaitIOSemId; static void -WaitIO(BufferDesc *buf, SPINLOCK spinlock) +WaitIO(BufferDesc * buf, SPINLOCK spinlock) { - bool inProgress; - - for (;;) { - - /* wait until someone releases IO lock */ - (*NWaitIOBackendP)++; - SpinRelease(spinlock); - IpcSemaphoreLock(WaitIOSemId, 0, 1); - SpinAcquire(spinlock); - inProgress = (buf->flags & BM_IO_IN_PROGRESS); - if (!inProgress) break; - } + bool inProgress; + + for (;;) + { + + /* wait until someone releases IO lock */ + (*NWaitIOBackendP)++; + SpinRelease(spinlock); + IpcSemaphoreLock(WaitIOSemId, 0, 1); + SpinAcquire(spinlock); + inProgress = (buf->flags & BM_IO_IN_PROGRESS); + if (!inProgress) + break; + } } /* * SignalIO -- */ static void -SignalIO(BufferDesc *buf) +SignalIO(BufferDesc * buf) { - /* somebody better be waiting. */ - Assert( buf->refcount > 1); - IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP); - *NWaitIOBackendP = 0; + /* somebody better be waiting. */ + Assert(buf->refcount > 1); + IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP); + *NWaitIOBackendP = 0; } -#endif /* HAS_TEST_AND_SET */ -long NDirectFileRead; /* some I/O's are direct file access. bypass bufmgr */ -long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */ +#endif /* HAS_TEST_AND_SET */ + +long NDirectFileRead;/* some I/O's are direct file access. + * bypass bufmgr */ +long NDirectFileWrite; /* e.g., I/O in psort and + * hashjoin. */ void -PrintBufferUsage(FILE *statfp) +PrintBufferUsage(FILE * statfp) { - float hitrate; - float localhitrate; - - if (ReadBufferCount==0) - hitrate = 0.0; - else - hitrate = (float)BufferHitCount * 100.0/ReadBufferCount; - - if (ReadLocalBufferCount==0) - localhitrate = 0.0; - else - localhitrate = (float)LocalBufferHitCount * 100.0/ReadLocalBufferCount; - - fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", - ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate); - fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", - ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate); - fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n", - NDirectFileRead, NDirectFileWrite); + float hitrate; + float localhitrate; + + if (ReadBufferCount == 0) + hitrate = 0.0; + else + hitrate = (float) BufferHitCount *100.0 / ReadBufferCount; + + if (ReadLocalBufferCount == 0) + localhitrate = 0.0; + else + localhitrate = (float) LocalBufferHitCount *100.0 / ReadLocalBufferCount; + + fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", + ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate); + fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", + ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate); + fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n", + NDirectFileRead, NDirectFileWrite); } void ResetBufferUsage() { - BufferHitCount = 0; - ReadBufferCount = 0; - BufferFlushCount = 0; - LocalBufferHitCount = 0; - ReadLocalBufferCount = 0; - LocalBufferFlushCount = 0; - NDirectFileRead = 0; - NDirectFileWrite = 0; + BufferHitCount = 0; + ReadBufferCount = 0; + BufferFlushCount = 0; + LocalBufferHitCount = 0; + ReadLocalBufferCount = 0; + LocalBufferFlushCount = 0; + NDirectFileRead = 0; + NDirectFileWrite = 0; } /* ---------------------------------------------- - * ResetBufferPool + * ResetBufferPool * - * this routine is supposed to be called when a transaction aborts. - * it will release all the buffer pins held by the transaciton. + * this routine is supposed to be called when a transaction aborts. + * it will release all the buffer pins held by the transaciton. * * ---------------------------------------------- */ void ResetBufferPool() { - register int i; - for (i=1; i<=NBuffers; i++) { - CommitInfoNeedsSave[i - 1] = 0; - if (BufferIsValid(i)) { - while(PrivateRefCount[i - 1] > 0) { - ReleaseBuffer(i); - } + register int i; + + for (i = 1; i <= NBuffers; i++) + { + CommitInfoNeedsSave[i - 1] = 0; + if (BufferIsValid(i)) + { + while (PrivateRefCount[i - 1] > 0) + { + ReleaseBuffer(i); + } + } + LastRefCount[i - 1] = 0; } - LastRefCount[i - 1] = 0; - } - ResetLocalBufferPool(); + ResetLocalBufferPool(); } /* ----------------------------------------------- - * BufferPoolCheckLeak + * BufferPoolCheckLeak * - * check if there is buffer leak + * check if there is buffer leak * * ----------------------------------------------- */ int BufferPoolCheckLeak() { - register int i; - int error = 0; - - for (i = 1; i <= NBuffers; i++) { - if (BufferIsValid(i)) { - elog(NOTICE, - "buffer leak [%d] detected in BufferPoolCheckLeak()", i-1); - error = 1; + register int i; + int error = 0; + + for (i = 1; i <= NBuffers; i++) + { + if (BufferIsValid(i)) + { + elog(NOTICE, + "buffer leak [%d] detected in BufferPoolCheckLeak()", i - 1); + error = 1; + } } - } - if(error) { - PrintBufferDescs(); - return(1); - } - return(0); + if (error) + { + PrintBufferDescs(); + return (1); + } + return (0); } /* ------------------------------------------------ - * FlushBufferPool + * FlushBufferPool * - * flush all dirty blocks in buffer pool to disk + * flush all dirty blocks in buffer pool to disk * * ------------------------------------------------ */ void FlushBufferPool(int StableMainMemoryFlag) { - if (!StableMainMemoryFlag) { - BufferSync(); - smgrcommit(); - } + if (!StableMainMemoryFlag) + { + BufferSync(); + smgrcommit(); + } } /* * BufferIsValid -- - * True iff the refcnt of the local buffer is > 0 + * True iff the refcnt of the local buffer is > 0 * Note: - * BufferIsValid(InvalidBuffer) is False. - * BufferIsValid(UnknownBuffer) is False. + * BufferIsValid(InvalidBuffer) is False. + * BufferIsValid(UnknownBuffer) is False. */ bool BufferIsValid(Buffer bufnum) { - if (BufferIsLocal(bufnum)) - return (bufnum >= -NLocBuffer && LocalRefCount[-bufnum - 1] > 0); - - if (BAD_BUFFER_ID(bufnum)) - return(false); + if (BufferIsLocal(bufnum)) + return (bufnum >= -NLocBuffer && LocalRefCount[-bufnum - 1] > 0); - return ((bool)(PrivateRefCount[bufnum - 1] > 0)); + if (BAD_BUFFER_ID(bufnum)) + return (false); + + return ((bool) (PrivateRefCount[bufnum - 1] > 0)); } /* * BufferGetBlockNumber -- - * Returns the block number associated with a buffer. + * Returns the block number associated with a buffer. * * Note: - * Assumes that the buffer is valid. + * Assumes that the buffer is valid. */ BlockNumber BufferGetBlockNumber(Buffer buffer) { - Assert(BufferIsValid(buffer)); + Assert(BufferIsValid(buffer)); - /* XXX should be a critical section */ - if (BufferIsLocal(buffer)) - return (LocalBufferDescriptors[-buffer-1].tag.blockNum); - else - return (BufferDescriptors[buffer-1].tag.blockNum); + /* XXX should be a critical section */ + if (BufferIsLocal(buffer)) + return (LocalBufferDescriptors[-buffer - 1].tag.blockNum); + else + return (BufferDescriptors[buffer - 1].tag.blockNum); } /* * BufferGetRelation -- - * Returns the relation desciptor associated with a buffer. + * Returns the relation desciptor associated with a buffer. * * Note: - * Assumes buffer is valid. + * Assumes buffer is valid. */ Relation BufferGetRelation(Buffer buffer) { - Relation relation; - Oid relid; - - Assert(BufferIsValid(buffer)); - Assert(!BufferIsLocal(buffer)); /* not supported for local buffers */ - - /* XXX should be a critical section */ - relid = LRelIdGetRelationId(BufferDescriptors[buffer-1].tag.relId); - relation = RelationIdGetRelation(relid); - - RelationDecrementReferenceCount(relation); - - if (RelationHasReferenceCountZero(relation)) { - /* - elog(NOTICE, "BufferGetRelation: 0->1"); - */ - - RelationIncrementReferenceCount(relation); - } - - return (relation); + Relation relation; + Oid relid; + + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); /* not supported for local buffers */ + + /* XXX should be a critical section */ + relid = LRelIdGetRelationId(BufferDescriptors[buffer - 1].tag.relId); + relation = RelationIdGetRelation(relid); + + RelationDecrementReferenceCount(relation); + + if (RelationHasReferenceCountZero(relation)) + { + + /* + * elog(NOTICE, "BufferGetRelation: 0->1"); + */ + + RelationIncrementReferenceCount(relation); + } + + return (relation); } /* @@ -1249,217 +1347,232 @@ BufferGetRelation(Buffer buffer) * */ static int -BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld) +BufferReplace(BufferDesc * bufHdr, bool bufferLockHeld) { - Relation reln; - Oid bufdb, bufrel; - int status; - - if (!bufferLockHeld) - SpinAcquire(BufMgrLock); - - /* - * first try to find the reldesc in the cache, if no luck, - * don't bother to build the reldesc from scratch, just do - * a blind write. - */ - - bufdb = bufHdr->tag.relId.dbId; - bufrel = bufHdr->tag.relId.relId; - - if (bufdb == MyDatabaseId || bufdb == (Oid) NULL) - reln = RelationIdCacheGetRelation(bufrel); - else - reln = (Relation) NULL; - - /* To check if block content changed while flushing. - vadim 01/17/97 */ - bufHdr->flags &= ~BM_JUST_DIRTIED; - - SpinRelease(BufMgrLock); - - if (reln != (Relation) NULL) { - status = smgrflush(bufHdr->bufsmgr, reln, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } else { - - /* blind write always flushes */ - status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, - bufHdr->sb_relname, bufdb, bufrel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - if (status == SM_FAIL) - return (FALSE); - - BufferFlushCount++; - - return (TRUE); + Relation reln; + Oid bufdb, + bufrel; + int status; + + if (!bufferLockHeld) + SpinAcquire(BufMgrLock); + + /* + * first try to find the reldesc in the cache, if no luck, don't + * bother to build the reldesc from scratch, just do a blind write. + */ + + bufdb = bufHdr->tag.relId.dbId; + bufrel = bufHdr->tag.relId.relId; + + if (bufdb == MyDatabaseId || bufdb == (Oid) NULL) + reln = RelationIdCacheGetRelation(bufrel); + else + reln = (Relation) NULL; + + /* To check if block content changed while flushing. - vadim 01/17/97 */ + bufHdr->flags &= ~BM_JUST_DIRTIED; + + SpinRelease(BufMgrLock); + + if (reln != (Relation) NULL) + { + status = smgrflush(bufHdr->bufsmgr, reln, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + else + { + + /* blind write always flushes */ + status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, + bufHdr->sb_relname, bufdb, bufrel, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + + if (status == SM_FAIL) + return (FALSE); + + BufferFlushCount++; + + return (TRUE); } /* * RelationGetNumberOfBlocks -- - * Returns the buffer descriptor associated with a page in a relation. + * Returns the buffer descriptor associated with a page in a relation. * * Note: - * XXX may fail for huge relations. - * XXX should be elsewhere. - * XXX maybe should be hidden + * XXX may fail for huge relations. + * XXX should be elsewhere. + * XXX maybe should be hidden */ BlockNumber RelationGetNumberOfBlocks(Relation relation) { - return - ((relation->rd_islocal) ? relation->rd_nblocks : - smgrnblocks(relation->rd_rel->relsmgr, relation)); + return + ((relation->rd_islocal) ? relation->rd_nblocks : + smgrnblocks(relation->rd_rel->relsmgr, relation)); } /* * BufferGetBlock -- - * Returns a reference to a disk page image associated with a buffer. + * Returns a reference to a disk page image associated with a buffer. * * Note: - * Assumes buffer is valid. + * Assumes buffer is valid. */ Block BufferGetBlock(Buffer buffer) { - Assert(BufferIsValid(buffer)); + Assert(BufferIsValid(buffer)); - if (BufferIsLocal(buffer)) - return((Block)MAKE_PTR(LocalBufferDescriptors[-buffer-1].data)); - else - return((Block)MAKE_PTR(BufferDescriptors[buffer-1].data)); + if (BufferIsLocal(buffer)) + return ((Block) MAKE_PTR(LocalBufferDescriptors[-buffer - 1].data)); + else + return ((Block) MAKE_PTR(BufferDescriptors[buffer - 1].data)); } /* --------------------------------------------------------------------- - * ReleaseRelationBuffers + * ReleaseRelationBuffers * - * this function unmarks all the dirty pages of a relation - * in the buffer pool so that at the end of transaction - * these pages will not be flushed. - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. + * this function unmarks all the dirty pages of a relation + * in the buffer pool so that at the end of transaction + * these pages will not be flushed. + * XXX currently it sequentially searches the buffer pool, should be + * changed to more clever ways of searching. * -------------------------------------------------------------------- */ void -ReleaseRelationBuffers (Relation rdesc) +ReleaseRelationBuffers(Relation rdesc) { - register int i; - int holding = 0; - BufferDesc *buf; - - if ( rdesc->rd_islocal ) - { - for (i = 0; i < NLocBuffer; i++) - { - buf = &LocalBufferDescriptors[i]; - if ((buf->flags & BM_DIRTY) && - (buf->tag.relId.relId == rdesc->rd_id)) - { - buf->flags &= ~BM_DIRTY; - } - } - return; - } - - for (i=1; i<=NBuffers; i++) { - buf = &BufferDescriptors[i-1]; - if (!holding) { - SpinAcquire(BufMgrLock); - holding = 1; + register int i; + int holding = 0; + BufferDesc *buf; + + if (rdesc->rd_islocal) + { + for (i = 0; i < NLocBuffer; i++) + { + buf = &LocalBufferDescriptors[i]; + if ((buf->flags & BM_DIRTY) && + (buf->tag.relId.relId == rdesc->rd_id)) + { + buf->flags &= ~BM_DIRTY; + } + } + return; } - if ((buf->flags & BM_DIRTY) && - (buf->tag.relId.dbId == MyDatabaseId) && - (buf->tag.relId.relId == rdesc->rd_id)) { - buf->flags &= ~BM_DIRTY; - if (!(buf->flags & BM_FREE)) { - SpinRelease(BufMgrLock); - holding = 0; - ReleaseBuffer(i); - } + + for (i = 1; i <= NBuffers; i++) + { + buf = &BufferDescriptors[i - 1]; + if (!holding) + { + SpinAcquire(BufMgrLock); + holding = 1; + } + if ((buf->flags & BM_DIRTY) && + (buf->tag.relId.dbId == MyDatabaseId) && + (buf->tag.relId.relId == rdesc->rd_id)) + { + buf->flags &= ~BM_DIRTY; + if (!(buf->flags & BM_FREE)) + { + SpinRelease(BufMgrLock); + holding = 0; + ReleaseBuffer(i); + } + } } - } - if (holding) - SpinRelease(BufMgrLock); + if (holding) + SpinRelease(BufMgrLock); } /* --------------------------------------------------------------------- - * DropBuffers + * DropBuffers * - * This function marks all the buffers in the buffer cache for a - * particular database as clean. This is used when we destroy a - * database, to avoid trying to flush data to disk when the directory - * tree no longer exists. + * This function marks all the buffers in the buffer cache for a + * particular database as clean. This is used when we destroy a + * database, to avoid trying to flush data to disk when the directory + * tree no longer exists. * - * This is an exceedingly non-public interface. + * This is an exceedingly non-public interface. * -------------------------------------------------------------------- */ void DropBuffers(Oid dbid) { - register int i; - BufferDesc *buf; - - SpinAcquire(BufMgrLock); - for (i=1; i<=NBuffers; i++) { - buf = &BufferDescriptors[i-1]; - if ((buf->tag.relId.dbId == dbid) && (buf->flags & BM_DIRTY)) { - buf->flags &= ~BM_DIRTY; - } - } - SpinRelease(BufMgrLock); + register int i; + BufferDesc *buf; + + SpinAcquire(BufMgrLock); + for (i = 1; i <= NBuffers; i++) + { + buf = &BufferDescriptors[i - 1]; + if ((buf->tag.relId.dbId == dbid) && (buf->flags & BM_DIRTY)) + { + buf->flags &= ~BM_DIRTY; + } + } + SpinRelease(BufMgrLock); } /* ----------------------------------------------------------------- - * PrintBufferDescs + * PrintBufferDescs * - * this function prints all the buffer descriptors, for debugging - * use only. + * this function prints all the buffer descriptors, for debugging + * use only. * ----------------------------------------------------------------- */ void PrintBufferDescs() { - int i; - BufferDesc *buf = BufferDescriptors; + int i; + BufferDesc *buf = BufferDescriptors; - if (IsUnderPostmaster) { - SpinAcquire(BufMgrLock); - for (i = 0; i < NBuffers; ++i, ++buf) { - elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ + if (IsUnderPostmaster) + { + SpinAcquire(BufMgrLock); + for (i = 0; i < NBuffers; ++i, ++buf) + { + elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ blockNum=%d, flags=0x%x, refcount=%d %d)", - i, buf->freeNext, buf->freePrev, - buf->sb_relname, buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); + i, buf->freeNext, buf->freePrev, + buf->sb_relname, buf->tag.blockNum, buf->flags, + buf->refcount, PrivateRefCount[i]); + } + SpinRelease(BufMgrLock); } - SpinRelease(BufMgrLock); - } else { - /* interactive backend */ - for (i = 0; i < NBuffers; ++i, ++buf) { - printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n", - i, buf->sb_relname, buf->tag.blockNum, - buf->flags, buf->refcount, PrivateRefCount[i]); + else + { + /* interactive backend */ + for (i = 0; i < NBuffers; ++i, ++buf) + { + printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n", + i, buf->sb_relname, buf->tag.blockNum, + buf->flags, buf->refcount, PrivateRefCount[i]); + } } - } } void PrintPinnedBufs() { - int i; - BufferDesc *buf = BufferDescriptors; - - SpinAcquire(BufMgrLock); - for (i = 0; i < NBuffers; ++i, ++buf) { - if (PrivateRefCount[i] > 0) - elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ + int i; + BufferDesc *buf = BufferDescriptors; + + SpinAcquire(BufMgrLock); + for (i = 0; i < NBuffers; ++i, ++buf) + { + if (PrivateRefCount[i] > 0) + elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ blockNum=%d, flags=0x%x, refcount=%d %d)\n", - i, buf->freeNext, buf->freePrev, buf->sb_relname, - buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); - } - SpinRelease(BufMgrLock); + i, buf->freeNext, buf->freePrev, buf->sb_relname, + buf->tag.blockNum, buf->flags, + buf->refcount, PrivateRefCount[i]); + } + SpinRelease(BufMgrLock); } /* @@ -1474,17 +1587,20 @@ blockNum=%d, flags=0x%x, refcount=%d %d)\n", void BufferPoolBlowaway() { - register int i; - - BufferSync(); - for (i=1; i<=NBuffers; i++) { - if (BufferIsValid(i)) { - while(BufferIsValid(i)) - ReleaseBuffer(i); - } - BufTableDelete(&BufferDescriptors[i-1]); - } + register int i; + + BufferSync(); + for (i = 1; i <= NBuffers; i++) + { + if (BufferIsValid(i)) + { + while (BufferIsValid(i)) + ReleaseBuffer(i); + } + BufTableDelete(&BufferDescriptors[i - 1]); + } } + #endif #undef IncrBufferRefCount @@ -1493,297 +1609,328 @@ BufferPoolBlowaway() void IncrBufferRefCount(Buffer buffer) { - if (BufferIsLocal(buffer)) { - Assert(LocalRefCount[-buffer - 1] >= 0); - LocalRefCount[-buffer - 1]++; - } else { - Assert(!BAD_BUFFER_ID(buffer)); - Assert(PrivateRefCount[buffer - 1] >= 0); - PrivateRefCount[buffer - 1]++; - } + if (BufferIsLocal(buffer)) + { + Assert(LocalRefCount[-buffer - 1] >= 0); + LocalRefCount[-buffer - 1]++; + } + else + { + Assert(!BAD_BUFFER_ID(buffer)); + Assert(PrivateRefCount[buffer - 1] >= 0); + PrivateRefCount[buffer - 1]++; + } } /* * ReleaseBuffer -- remove the pin on a buffer without - * marking it dirty. + * marking it dirty. * */ int ReleaseBuffer(Buffer buffer) { - BufferDesc *bufHdr; - - if (BufferIsLocal(buffer)) { - Assert(LocalRefCount[-buffer - 1] > 0); - LocalRefCount[-buffer - 1]--; - return (STATUS_OK); - } - - if (BAD_BUFFER_ID(buffer)) - return(STATUS_ERROR); - - bufHdr = &BufferDescriptors[buffer-1]; - - Assert(PrivateRefCount[buffer - 1] > 0); - PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) { - /* only release buffer if it is not pinned in previous ExecMain - levels */ - SpinAcquire(BufMgrLock); - bufHdr->refcount--; - if (bufHdr->refcount == 0) { - AddBufferToFreelist(bufHdr); - bufHdr->flags |= BM_FREE; + BufferDesc *bufHdr; + + if (BufferIsLocal(buffer)) + { + Assert(LocalRefCount[-buffer - 1] > 0); + LocalRefCount[-buffer - 1]--; + return (STATUS_OK); } - if(CommitInfoNeedsSave[buffer - 1]) { - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - CommitInfoNeedsSave[buffer - 1] = 0; + + if (BAD_BUFFER_ID(buffer)) + return (STATUS_ERROR); + + bufHdr = &BufferDescriptors[buffer - 1]; + + Assert(PrivateRefCount[buffer - 1] > 0); + PrivateRefCount[buffer - 1]--; + if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) + { + + /* + * only release buffer if it is not pinned in previous ExecMain + * levels + */ + SpinAcquire(BufMgrLock); + bufHdr->refcount--; + if (bufHdr->refcount == 0) + { + AddBufferToFreelist(bufHdr); + bufHdr->flags |= BM_FREE; + } + if (CommitInfoNeedsSave[buffer - 1]) + { + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + CommitInfoNeedsSave[buffer - 1] = 0; + } + SpinRelease(BufMgrLock); } - SpinRelease(BufMgrLock); - } - - return(STATUS_OK); + + return (STATUS_OK); } #ifdef NOT_USED void IncrBufferRefCount_Debug(char *file, int line, Buffer buffer) { - IncrBufferRefCount(buffer); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \ + IncrBufferRefCount(buffer); + if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif #ifdef NOT_USED void ReleaseBuffer_Debug(char *file, int line, Buffer buffer) { - ReleaseBuffer(buffer); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \ + ReleaseBuffer(buffer); + if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif #ifdef NOT_USED int ReleaseAndReadBuffer_Debug(char *file, - int line, - Buffer buffer, - Relation relation, - BlockNumber blockNum) + int line, + Buffer buffer, + Relation relation, + BlockNumber blockNum) { - bool bufferValid; - Buffer b; - - bufferValid = BufferIsValid(buffer); - b = ReleaseAndReadBuffer(buffer, relation, blockNum); - if (ShowPinTrace && bufferValid && BufferIsLocal(buffer) - && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ + bool bufferValid; + Buffer b; + + bufferValid = BufferIsValid(buffer); + b = ReleaseAndReadBuffer(buffer, relation, blockNum); + if (ShowPinTrace && bufferValid && BufferIsLocal(buffer) + && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } - if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[b-1]; - - fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } + if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[b - 1]; + + fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - b, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[b - 1], file, line); - } - return b; + b, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[b - 1], file, line); + } + return b; } + #endif #ifdef BMTRACE /* - * trace allocations and deallocations in a circular buffer in - * shared memory. check the buffer before doing the allocation, - * and die if there's anything fishy. + * trace allocations and deallocations in a circular buffer in + * shared memory. check the buffer before doing the allocation, + * and die if there's anything fishy. */ _bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType) { - static int mypid = 0; - long start, cur; - bmtrace *tb; - - if (mypid == 0) - mypid = getpid(); - - start = *CurTraceBuf; - - if (start > 0) - cur = start - 1; - else - cur = BMT_LIMIT - 1; - - for (;;) { - tb = &TraceBuf[cur]; - if (tb->bmt_op != BMT_NOTUSED) { - if (tb->bmt_buf == bufNo) { - if ((tb->bmt_op == BMT_DEALLOC) - || (tb->bmt_dbid == dbId && tb->bmt_relid == relId - && tb->bmt_blkno == blkNo)) - goto okay; - - /* die holding the buffer lock */ - _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur); - } - } - - if (cur == start) - goto okay; - - if (cur == 0) - cur = BMT_LIMIT - 1; + static int mypid = 0; + long start, + cur; + bmtrace *tb; + + if (mypid == 0) + mypid = getpid(); + + start = *CurTraceBuf; + + if (start > 0) + cur = start - 1; else - cur--; - } - - okay: - tb = &TraceBuf[start]; - tb->bmt_pid = mypid; - tb->bmt_buf = bufNo; - tb->bmt_dbid = dbId; - tb->bmt_relid = relId; - tb->bmt_blkno = blkNo; - tb->bmt_op = allocType; - - *CurTraceBuf = (start + 1) % BMT_LIMIT; + cur = BMT_LIMIT - 1; + + for (;;) + { + tb = &TraceBuf[cur]; + if (tb->bmt_op != BMT_NOTUSED) + { + if (tb->bmt_buf == bufNo) + { + if ((tb->bmt_op == BMT_DEALLOC) + || (tb->bmt_dbid == dbId && tb->bmt_relid == relId + && tb->bmt_blkno == blkNo)) + goto okay; + + /* die holding the buffer lock */ + _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur); + } + } + + if (cur == start) + goto okay; + + if (cur == 0) + cur = BMT_LIMIT - 1; + else + cur--; + } + +okay: + tb = &TraceBuf[start]; + tb->bmt_pid = mypid; + tb->bmt_buf = bufNo; + tb->bmt_dbid = dbId; + tb->bmt_relid = relId; + tb->bmt_blkno = blkNo; + tb->bmt_op = allocType; + + *CurTraceBuf = (start + 1) % BMT_LIMIT; } _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, - int allocType, long start, long cur) + int allocType, long start, long cur) { - FILE *fp; - bmtrace *tb; - int i; - - tb = &TraceBuf[cur]; - - if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL) - elog(FATAL, "buffer alloc trace error and can't open log file"); - - fprintf(fp, "buffer alloc trace detected the following error:\n\n"); - fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n", - bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"), - (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation")); - - fprintf(fp, "the trace buffer contains:\n"); - - i = start; - for (;;) { - tb = &TraceBuf[i]; - if (tb->bmt_op != BMT_NOTUSED) { - fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%d,%d> ", - i, (i == cur ? " ---> " : "\t"), - tb->bmt_pid, tb->bmt_buf, - tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); - - switch (tb->bmt_op) { - case BMT_ALLOCFND: + FILE *fp; + bmtrace *tb; + int i; + + tb = &TraceBuf[cur]; + + if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL) + elog(FATAL, "buffer alloc trace error and can't open log file"); + + fprintf(fp, "buffer alloc trace detected the following error:\n\n"); + fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n", + bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"), + (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation")); + + fprintf(fp, "the trace buffer contains:\n"); + + i = start; + for (;;) + { + tb = &TraceBuf[i]; + if (tb->bmt_op != BMT_NOTUSED) + { + fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%d,%d> ", + i, (i == cur ? " ---> " : "\t"), + tb->bmt_pid, tb->bmt_buf, + tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); + + switch (tb->bmt_op) + { + case BMT_ALLOCFND: + fprintf(fp, "allocate (found)\n"); + break; + + case BMT_ALLOCNOTFND: + fprintf(fp, "allocate (not found)\n"); + break; + + case BMT_DEALLOC: + fprintf(fp, "deallocate\n"); + break; + + default: + fprintf(fp, "unknown op type %d\n", tb->bmt_op); + break; + } + } + + i = (i + 1) % BMT_LIMIT; + if (i == start) + break; + } + + fprintf(fp, "\noperation causing error:\n"); + fprintf(fp, "\tpid %d buf %d for <%d,%d,%d> ", + getpid(), bufNo, dbId, relId, blkNo); + + switch (allocType) + { + case BMT_ALLOCFND: fprintf(fp, "allocate (found)\n"); break; - - case BMT_ALLOCNOTFND: + + case BMT_ALLOCNOTFND: fprintf(fp, "allocate (not found)\n"); break; - - case BMT_DEALLOC: + + case BMT_DEALLOC: fprintf(fp, "deallocate\n"); break; - - default: - fprintf(fp, "unknown op type %d\n", tb->bmt_op); + + default: + fprintf(fp, "unknown op type %d\n", allocType); break; - } } - - i = (i + 1) % BMT_LIMIT; - if (i == start) - break; - } - - fprintf(fp, "\noperation causing error:\n"); - fprintf(fp, "\tpid %d buf %d for <%d,%d,%d> ", - getpid(), bufNo, dbId, relId, blkNo); - - switch (allocType) { - case BMT_ALLOCFND: - fprintf(fp, "allocate (found)\n"); - break; - - case BMT_ALLOCNOTFND: - fprintf(fp, "allocate (not found)\n"); - break; - - case BMT_DEALLOC: - fprintf(fp, "deallocate\n"); - break; - - default: - fprintf(fp, "unknown op type %d\n", allocType); - break; - } - - FreeFile(fp); - - kill(getpid(), SIGILL); + + FreeFile(fp); + + kill(getpid(), SIGILL); } -#endif /* BMTRACE */ +#endif /* BMTRACE */ void BufferRefCountReset(int *refcountsave) { - int i; - for (i=0; i<NBuffers; i++) { - refcountsave[i] = PrivateRefCount[i]; - LastRefCount[i] += PrivateRefCount[i]; - PrivateRefCount[i] = 0; - } + int i; + + for (i = 0; i < NBuffers; i++) + { + refcountsave[i] = PrivateRefCount[i]; + LastRefCount[i] += PrivateRefCount[i]; + PrivateRefCount[i] = 0; + } } void BufferRefCountRestore(int *refcountsave) { - int i; - for (i=0; i<NBuffers; i++) { - PrivateRefCount[i] = refcountsave[i]; - LastRefCount[i] -= refcountsave[i]; - refcountsave[i] = 0; - } + int i; + + for (i = 0; i < NBuffers; i++) + { + PrivateRefCount[i] = refcountsave[i]; + LastRefCount[i] -= refcountsave[i]; + refcountsave[i] = 0; + } } -int SetBufferWriteMode (int mode) +int +SetBufferWriteMode(int mode) { - int old; - - old = WriteMode; - WriteMode = mode; - return (old); + int old; + + old = WriteMode; + WriteMode = mode; + return (old); } -void SetBufferCommitInfoNeedsSave(Buffer buffer) +void +SetBufferCommitInfoNeedsSave(Buffer buffer) { - if ( !BufferIsLocal(buffer) ) - CommitInfoNeedsSave[buffer - 1]++; + if (!BufferIsLocal(buffer)) + CommitInfoNeedsSave[buffer - 1]++; } diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index f4e7bcdc57a..94a8e84b8c6 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * freelist.c-- - * routines for manipulating the buffer pool's replacement strategy - * freelist. + * routines for manipulating the buffer pool's replacement strategy + * freelist. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.4 1997/08/19 21:32:44 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.5 1997/09/07 04:48:22 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -16,23 +16,23 @@ * OLD COMMENTS * * Data Structures: - * SharedFreeList is a circular queue. Notice that this - * is a shared memory queue so the next/prev "ptrs" are - * buffer ids, not addresses. + * SharedFreeList is a circular queue. Notice that this + * is a shared memory queue so the next/prev "ptrs" are + * buffer ids, not addresses. * * Sync: all routines in this file assume that the buffer - * semaphore has been acquired by the caller. + * semaphore has been acquired by the caller. */ #include <stdio.h> #include "postgres.h" #include "storage/bufmgr.h" -#include "storage/buf_internals.h" /* where declarations go */ +#include "storage/buf_internals.h" /* where declarations go */ #include "storage/spin.h" -static BufferDesc *SharedFreeList; +static BufferDesc *SharedFreeList; /* only actually used in debugging. The lock * should be acquired before calling the freelist manager. @@ -40,40 +40,40 @@ static BufferDesc *SharedFreeList; extern SPINLOCK BufMgrLock; #define IsInQueue(bf) \ - Assert((bf->freeNext != INVALID_DESCRIPTOR));\ - Assert((bf->freePrev != INVALID_DESCRIPTOR));\ - Assert((bf->flags & BM_FREE)) + Assert((bf->freeNext != INVALID_DESCRIPTOR));\ + Assert((bf->freePrev != INVALID_DESCRIPTOR));\ + Assert((bf->flags & BM_FREE)) #define NotInQueue(bf) \ - Assert((bf->freeNext == INVALID_DESCRIPTOR));\ - Assert((bf->freePrev == INVALID_DESCRIPTOR));\ - Assert(! (bf->flags & BM_FREE)) + Assert((bf->freeNext == INVALID_DESCRIPTOR));\ + Assert((bf->freePrev == INVALID_DESCRIPTOR));\ + Assert(! (bf->flags & BM_FREE)) /* - * AddBufferToFreelist -- + * AddBufferToFreelist -- * * In theory, this is the only routine that needs to be changed - * if the buffer replacement strategy changes. Just change + * if the buffer replacement strategy changes. Just change * the manner in which buffers are added to the freelist queue. * Currently, they are added on an LRU basis. */ void -AddBufferToFreelist(BufferDesc *bf) +AddBufferToFreelist(BufferDesc * bf) { #ifdef BMTRACE - _bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum, - BufferDescriptorGetBuffer(bf), BMT_DEALLOC); -#endif /* BMTRACE */ - NotInQueue(bf); - - /* change bf so it points to inFrontOfNew and its successor */ - bf->freePrev = SharedFreeList->freePrev; - bf->freeNext = Free_List_Descriptor; - - /* insert new into chain */ - BufferDescriptors[bf->freeNext].freePrev = bf->buf_id; - BufferDescriptors[bf->freePrev].freeNext = bf->buf_id; + _bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum, + BufferDescriptorGetBuffer(bf), BMT_DEALLOC); +#endif /* BMTRACE */ + NotInQueue(bf); + + /* change bf so it points to inFrontOfNew and its successor */ + bf->freePrev = SharedFreeList->freePrev; + bf->freeNext = Free_List_Descriptor; + + /* insert new into chain */ + BufferDescriptors[bf->freeNext].freePrev = bf->buf_id; + BufferDescriptors[bf->freePrev].freeNext = bf->buf_id; } #undef PinBuffer @@ -82,47 +82,52 @@ AddBufferToFreelist(BufferDesc *bf) * PinBuffer -- make buffer unavailable for replacement. */ void -PinBuffer(BufferDesc *buf) +PinBuffer(BufferDesc * buf) { - long b; - - /* Assert (buf->refcount < 25); */ - - if (buf->refcount == 0) { - IsInQueue(buf); - - /* remove from freelist queue */ - BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; - BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; - buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; - - /* mark buffer as no longer free */ - buf->flags &= ~BM_FREE; - } else { - NotInQueue(buf); - } - - b = BufferDescriptorGetBuffer(buf) - 1; - Assert(PrivateRefCount[b] >= 0); - if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) - buf->refcount++; - PrivateRefCount[b]++; + long b; + + /* Assert (buf->refcount < 25); */ + + if (buf->refcount == 0) + { + IsInQueue(buf); + + /* remove from freelist queue */ + BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; + BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; + buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; + + /* mark buffer as no longer free */ + buf->flags &= ~BM_FREE; + } + else + { + NotInQueue(buf); + } + + b = BufferDescriptorGetBuffer(buf) - 1; + Assert(PrivateRefCount[b] >= 0); + if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) + buf->refcount++; + PrivateRefCount[b]++; } #ifdef NOT_USED void -PinBuffer_Debug(char *file, int line, BufferDesc *buf) +PinBuffer_Debug(char *file, int line, BufferDesc * buf) { - PinBuffer(buf); - if (ShowPinTrace) { - Buffer buffer = BufferDescriptorGetBuffer(buf); - - fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \ + PinBuffer(buf); + if (ShowPinTrace) + { + Buffer buffer = BufferDescriptorGetBuffer(buf); + + fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif #undef UnpinBuffer @@ -131,95 +136,102 @@ refcount = %ld, file: %s, line: %d\n", * UnpinBuffer -- make buffer available for replacement. */ void -UnpinBuffer(BufferDesc *buf) +UnpinBuffer(BufferDesc * buf) { - long b = BufferDescriptorGetBuffer(buf) - 1; - - Assert(buf->refcount); - Assert(PrivateRefCount[b] > 0); - PrivateRefCount[b]--; - if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) - buf->refcount--; - NotInQueue(buf); - - if (buf->refcount == 0) { - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - } else { - /* do nothing */ - } + long b = BufferDescriptorGetBuffer(buf) - 1; + + Assert(buf->refcount); + Assert(PrivateRefCount[b] > 0); + PrivateRefCount[b]--; + if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) + buf->refcount--; + NotInQueue(buf); + + if (buf->refcount == 0) + { + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + } + else + { + /* do nothing */ + } } #ifdef NOT_USED void -UnpinBuffer_Debug(char *file, int line, BufferDesc *buf) +UnpinBuffer_Debug(char *file, int line, BufferDesc * buf) { - UnpinBuffer(buf); - if (ShowPinTrace) { - Buffer buffer = BufferDescriptorGetBuffer(buf); - - fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \ + UnpinBuffer(buf); + if (ShowPinTrace) + { + Buffer buffer = BufferDescriptorGetBuffer(buf); + + fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif /* * GetFreeBuffer() -- get the 'next' buffer from the freelist. * */ -BufferDesc * +BufferDesc * GetFreeBuffer() { - BufferDesc *buf; - - if (Free_List_Descriptor == SharedFreeList->freeNext) { - - /* queue is empty. All buffers in the buffer pool are pinned. */ - elog(WARN,"out of free buffers: time to abort !\n"); - return(NULL); - } - buf = &(BufferDescriptors[SharedFreeList->freeNext]); - - /* remove from freelist queue */ - BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; - BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; - buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; - - buf->flags &= ~(BM_FREE); - - return(buf); + BufferDesc *buf; + + if (Free_List_Descriptor == SharedFreeList->freeNext) + { + + /* queue is empty. All buffers in the buffer pool are pinned. */ + elog(WARN, "out of free buffers: time to abort !\n"); + return (NULL); + } + buf = &(BufferDescriptors[SharedFreeList->freeNext]); + + /* remove from freelist queue */ + BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; + BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; + buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; + + buf->flags &= ~(BM_FREE); + + return (buf); } /* * InitFreeList -- initialize the dummy buffer descriptor used - * as a freelist head. + * as a freelist head. * * Assume: All of the buffers are already linked in a circular - * queue. Only called by postmaster and only during - * initialization. + * queue. Only called by postmaster and only during + * initialization. */ void InitFreeList(bool init) { - SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]); - - if (init) { - /* we only do this once, normally the postmaster */ - SharedFreeList->data = INVALID_OFFSET; - SharedFreeList->flags = 0; - SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE); - SharedFreeList->buf_id = Free_List_Descriptor; - - /* insert it into a random spot in the circular queue */ - SharedFreeList->freeNext = BufferDescriptors[0].freeNext; - SharedFreeList->freePrev = 0; - BufferDescriptors[SharedFreeList->freeNext].freePrev = - BufferDescriptors[SharedFreeList->freePrev].freeNext = - Free_List_Descriptor; - } + SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]); + + if (init) + { + /* we only do this once, normally the postmaster */ + SharedFreeList->data = INVALID_OFFSET; + SharedFreeList->flags = 0; + SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE); + SharedFreeList->buf_id = Free_List_Descriptor; + + /* insert it into a random spot in the circular queue */ + SharedFreeList->freeNext = BufferDescriptors[0].freeNext; + SharedFreeList->freePrev = 0; + BufferDescriptors[SharedFreeList->freeNext].freePrev = + BufferDescriptors[SharedFreeList->freePrev].freeNext = + Free_List_Descriptor; + } } @@ -230,67 +242,78 @@ InitFreeList(bool init) void DBG_FreeListCheck(int nfree) { - int i; - BufferDesc *buf; - - buf = &(BufferDescriptors[SharedFreeList->freeNext]); - for (i=0;i<nfree;i++,buf = &(BufferDescriptors[buf->freeNext])) { - - if (! (buf->flags & (BM_FREE))){ - if (buf != SharedFreeList) { - printf("\tfree list corrupted: %d flags %x\n", - buf->buf_id,buf->flags); - } else { - printf("\tfree list corrupted: too short -- %d not %d\n", - i,nfree); - - } - - + int i; + BufferDesc *buf; + + buf = &(BufferDescriptors[SharedFreeList->freeNext]); + for (i = 0; i < nfree; i++, buf = &(BufferDescriptors[buf->freeNext])) + { + + if (!(buf->flags & (BM_FREE))) + { + if (buf != SharedFreeList) + { + printf("\tfree list corrupted: %d flags %x\n", + buf->buf_id, buf->flags); + } + else + { + printf("\tfree list corrupted: too short -- %d not %d\n", + i, nfree); + + } + + + } + if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) || + (BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) + { + printf("\tfree list links corrupted: %d %ld %ld\n", + buf->buf_id, buf->freePrev, buf->freeNext); + } + } - if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) || - (BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) { - printf("\tfree list links corrupted: %d %ld %ld\n", - buf->buf_id,buf->freePrev,buf->freeNext); + if (buf != SharedFreeList) + { + printf("\tfree list corrupted: %d-th buffer is %d\n", + nfree, buf->buf_id); + } - - } - if (buf != SharedFreeList) { - printf("\tfree list corrupted: %d-th buffer is %d\n", - nfree,buf->buf_id); - - } } + #endif #ifdef NOT_USED /* * PrintBufferFreeList - - * prints the buffer free list, for debugging + * prints the buffer free list, for debugging */ static void PrintBufferFreeList() { - BufferDesc *buf; - - if (SharedFreeList->freeNext == Free_List_Descriptor) { - printf("free list is empty.\n"); - return; - } - - buf = &(BufferDescriptors[SharedFreeList->freeNext]); - for (;;) { - int i = (buf - BufferDescriptors); - printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n", - i, buf->sb_relname, buf->tag.blockNum, - buf->flags, buf->refcount, PrivateRefCount[i], - buf->freeNext, buf->freePrev); - - if (buf->freeNext == Free_List_Descriptor) - break; - - buf = &(BufferDescriptors[buf->freeNext]); - } + BufferDesc *buf; + + if (SharedFreeList->freeNext == Free_List_Descriptor) + { + printf("free list is empty.\n"); + return; + } + + buf = &(BufferDescriptors[SharedFreeList->freeNext]); + for (;;) + { + int i = (buf - BufferDescriptors); + + printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n", + i, buf->sb_relname, buf->tag.blockNum, + buf->flags, buf->refcount, PrivateRefCount[i], + buf->freeNext, buf->freePrev); + + if (buf->freeNext == Free_List_Descriptor) + break; + + buf = &(BufferDescriptors[buf->freeNext]); + } } #endif diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 910cb668d7a..072830b3dd6 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -1,21 +1,21 @@ /*------------------------------------------------------------------------- * * localbuf.c-- - * local buffer manager. Fast buffer manager for temporary tables - * or special cases when the operation is not visible to other backends. + * local buffer manager. Fast buffer manager for temporary tables + * or special cases when the operation is not visible to other backends. * - * When a relation is being created, the descriptor will have rd_islocal - * set to indicate that the local buffer manager should be used. During - * the same transaction the relation is being created, any inserts or - * selects from the newly created relation will use the local buffer - * pool. rd_islocal is reset at the end of a transaction (commit/abort). - * This is useful for queries like SELECT INTO TABLE and create index. + * When a relation is being created, the descriptor will have rd_islocal + * set to indicate that the local buffer manager should be used. During + * the same transaction the relation is being created, any inserts or + * selects from the newly created relation will use the local buffer + * pool. rd_islocal is reset at the end of a transaction (commit/abort). + * This is useful for queries like SELECT INTO TABLE and create index. * * Copyright (c) 1994-5, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.8 1997/07/28 00:54:48 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.9 1997/09/07 04:48:23 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -45,252 +45,262 @@ #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/relcache.h" -#include "executor/execdebug.h" /* for NDirectFileRead */ +#include "executor/execdebug.h" /* for NDirectFileRead */ #include "catalog/catalog.h" extern long int LocalBufferFlushCount; -int NLocBuffer = 64; -BufferDesc *LocalBufferDescriptors = NULL; -long *LocalRefCount = NULL; +int NLocBuffer = 64; +BufferDesc *LocalBufferDescriptors = NULL; +long *LocalRefCount = NULL; -static int nextFreeLocalBuf = 0; +static int nextFreeLocalBuf = 0; /*#define LBDEBUG*/ /* * LocalBufferAlloc - - * allocate a local buffer. We do round robin allocation for now. + * allocate a local buffer. We do round robin allocation for now. */ -BufferDesc * -LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) +BufferDesc * +LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool * foundPtr) { - int i; - BufferDesc *bufHdr = (BufferDesc *) NULL; + int i; + BufferDesc *bufHdr = (BufferDesc *) NULL; - if (blockNum == P_NEW) { - blockNum = reln->rd_nblocks; - reln->rd_nblocks++; - } + if (blockNum == P_NEW) + { + blockNum = reln->rd_nblocks; + reln->rd_nblocks++; + } - /* a low tech search for now -- not optimized for scans */ - for (i=0; i < NLocBuffer; i++) { - if (LocalBufferDescriptors[i].tag.relId.relId == reln->rd_id && - LocalBufferDescriptors[i].tag.blockNum == blockNum) { + /* a low tech search for now -- not optimized for scans */ + for (i = 0; i < NLocBuffer; i++) + { + if (LocalBufferDescriptors[i].tag.relId.relId == reln->rd_id && + LocalBufferDescriptors[i].tag.blockNum == blockNum) + { #ifdef LBDEBUG - fprintf(stderr, "LB ALLOC (%d,%d) %d\n", - reln->rd_id, blockNum, -i-1); -#endif - LocalRefCount[i]++; - *foundPtr = TRUE; - return &LocalBufferDescriptors[i]; + fprintf(stderr, "LB ALLOC (%d,%d) %d\n", + reln->rd_id, blockNum, -i - 1); +#endif + LocalRefCount[i]++; + *foundPtr = TRUE; + return &LocalBufferDescriptors[i]; + } } - } #ifdef LBDEBUG - fprintf(stderr, "LB ALLOC (%d,%d) %d\n", - reln->rd_id, blockNum, -nextFreeLocalBuf-1); -#endif - - /* need to get a new buffer (round robin for now) */ - for(i=0; i < NLocBuffer; i++) { - int b = (nextFreeLocalBuf + i) % NLocBuffer; - - if (LocalRefCount[b]==0) { - bufHdr = &LocalBufferDescriptors[b]; - LocalRefCount[b]++; - nextFreeLocalBuf = (b + 1) % NLocBuffer; - break; + fprintf(stderr, "LB ALLOC (%d,%d) %d\n", + reln->rd_id, blockNum, -nextFreeLocalBuf - 1); +#endif + + /* need to get a new buffer (round robin for now) */ + for (i = 0; i < NLocBuffer; i++) + { + int b = (nextFreeLocalBuf + i) % NLocBuffer; + + if (LocalRefCount[b] == 0) + { + bufHdr = &LocalBufferDescriptors[b]; + LocalRefCount[b]++; + nextFreeLocalBuf = (b + 1) % NLocBuffer; + break; + } } - } - if (bufHdr==NULL) - elog(WARN, "no empty local buffer."); - - /* - * this buffer is not referenced but it might still be dirty (the - * last transaction to touch it doesn't need its contents but has - * not flushed it). if that's the case, write it out before - * reusing it! - */ - if (bufHdr->flags & BM_DIRTY) { - Relation bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + if (bufHdr == NULL) + elog(WARN, "no empty local buffer."); - Assert(bufrel != NULL); - - /* flush this page */ - smgrwrite(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - LocalBufferFlushCount++; - } - - /* - * it's all ours now. - */ - bufHdr->tag.relId.relId = reln->rd_id; - bufHdr->tag.blockNum = blockNum; - bufHdr->flags &= ~BM_DIRTY; - - /* - * lazy memory allocation. (see MAKE_PTR for why we need to do - * MAKE_OFFSET.) - */ - if (bufHdr->data == (SHMEM_OFFSET)0) { - char *data = (char *)malloc(BLCKSZ); - - bufHdr->data = MAKE_OFFSET(data); - } - - *foundPtr = FALSE; - return bufHdr; + /* + * this buffer is not referenced but it might still be dirty (the last + * transaction to touch it doesn't need its contents but has not + * flushed it). if that's the case, write it out before reusing it! + */ + if (bufHdr->flags & BM_DIRTY) + { + Relation bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + + Assert(bufrel != NULL); + + /* flush this page */ + smgrwrite(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + LocalBufferFlushCount++; + } + + /* + * it's all ours now. + */ + bufHdr->tag.relId.relId = reln->rd_id; + bufHdr->tag.blockNum = blockNum; + bufHdr->flags &= ~BM_DIRTY; + + /* + * lazy memory allocation. (see MAKE_PTR for why we need to do + * MAKE_OFFSET.) + */ + if (bufHdr->data == (SHMEM_OFFSET) 0) + { + char *data = (char *) malloc(BLCKSZ); + + bufHdr->data = MAKE_OFFSET(data); + } + + *foundPtr = FALSE; + return bufHdr; } /* * WriteLocalBuffer - - * writes out a local buffer + * writes out a local buffer */ int WriteLocalBuffer(Buffer buffer, bool release) { - int bufid; + int bufid; - Assert(BufferIsLocal(buffer)); + Assert(BufferIsLocal(buffer)); #ifdef LBDEBUG - fprintf(stderr, "LB WRITE %d\n", buffer); -#endif - - bufid = - (buffer + 1); - LocalBufferDescriptors[bufid].flags |= BM_DIRTY; + fprintf(stderr, "LB WRITE %d\n", buffer); +#endif - if (release) { - Assert(LocalRefCount[bufid] > 0); - LocalRefCount[bufid]--; - } + bufid = -(buffer + 1); + LocalBufferDescriptors[bufid].flags |= BM_DIRTY; + + if (release) + { + Assert(LocalRefCount[bufid] > 0); + LocalRefCount[bufid]--; + } - return true; + return true; } /* * FlushLocalBuffer - - * flushes a local buffer + * flushes a local buffer */ int FlushLocalBuffer(Buffer buffer, bool release) { - int bufid; - Relation bufrel; - BufferDesc *bufHdr; + int bufid; + Relation bufrel; + BufferDesc *bufHdr; - Assert(BufferIsLocal(buffer)); + Assert(BufferIsLocal(buffer)); #ifdef LBDEBUG - fprintf(stderr, "LB FLUSH %d\n", buffer); -#endif - - bufid = - (buffer + 1); - bufHdr = &LocalBufferDescriptors[bufid]; - bufHdr->flags &= ~BM_DIRTY; - bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); - - Assert(bufrel != NULL); - smgrflush(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - LocalBufferFlushCount++; - - Assert(LocalRefCount[bufid] > 0); - if ( release ) - LocalRefCount[bufid]--; - - return true; + fprintf(stderr, "LB FLUSH %d\n", buffer); +#endif + + bufid = -(buffer + 1); + bufHdr = &LocalBufferDescriptors[bufid]; + bufHdr->flags &= ~BM_DIRTY; + bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + + Assert(bufrel != NULL); + smgrflush(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + LocalBufferFlushCount++; + + Assert(LocalRefCount[bufid] > 0); + if (release) + LocalRefCount[bufid]--; + + return true; } /* * InitLocalBuffer - - * init the local buffer cache. Since most queries (esp. multi-user ones) - * don't involve local buffers, we delay allocating memory for actual the - * buffer until we need it. + * init the local buffer cache. Since most queries (esp. multi-user ones) + * don't involve local buffers, we delay allocating memory for actual the + * buffer until we need it. */ void InitLocalBuffer(void) { - int i; - - /* - * these aren't going away. I'm not gonna use palloc. - */ - LocalBufferDescriptors = - (BufferDesc *)malloc(sizeof(BufferDesc) * NLocBuffer); - memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer); - nextFreeLocalBuf = 0; - - for (i = 0; i < NLocBuffer; i++) { - BufferDesc *buf = &LocalBufferDescriptors[i]; + int i; /* - * negative to indicate local buffer. This is tricky: shared buffers - * start with 0. We have to start with -2. (Note that the routine - * BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id - * is -1.) + * these aren't going away. I'm not gonna use palloc. */ - buf->buf_id = - i - 2; - } + LocalBufferDescriptors = + (BufferDesc *) malloc(sizeof(BufferDesc) * NLocBuffer); + memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer); + nextFreeLocalBuf = 0; + + for (i = 0; i < NLocBuffer; i++) + { + BufferDesc *buf = &LocalBufferDescriptors[i]; + + /* + * negative to indicate local buffer. This is tricky: shared + * buffers start with 0. We have to start with -2. (Note that the + * routine BufferDescriptorGetBuffer adds 1 to buf_id so our first + * buffer id is -1.) + */ + buf->buf_id = -i - 2; + } - LocalRefCount = - (long *)malloc(sizeof(long) * NLocBuffer); - memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); + LocalRefCount = + (long *) malloc(sizeof(long) * NLocBuffer); + memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); } /* * LocalBufferSync - - * flush all dirty buffers in the local buffer cache. Since the buffer - * cache is only used for keeping relations visible during a transaction, - * we will not need these buffers again. + * flush all dirty buffers in the local buffer cache. Since the buffer + * cache is only used for keeping relations visible during a transaction, + * we will not need these buffers again. */ void LocalBufferSync(void) { - int i; - - for (i = 0; i < NLocBuffer; i++) { - BufferDesc *buf = &LocalBufferDescriptors[i]; - Relation bufrel; + int i; + + for (i = 0; i < NLocBuffer; i++) + { + BufferDesc *buf = &LocalBufferDescriptors[i]; + Relation bufrel; - if (buf->flags & BM_DIRTY) { + if (buf->flags & BM_DIRTY) + { #ifdef LBDEBUG - fprintf(stderr, "LB SYNC %d\n", -i-1); -#endif - bufrel = RelationIdCacheGetRelation(buf->tag.relId.relId); - - Assert(bufrel != NULL); - - smgrwrite(bufrel->rd_rel->relsmgr, bufrel, buf->tag.blockNum, - (char *) MAKE_PTR(buf->data)); - LocalBufferFlushCount++; - - buf->tag.relId.relId = InvalidOid; - buf->flags &= ~BM_DIRTY; + fprintf(stderr, "LB SYNC %d\n", -i - 1); +#endif + bufrel = RelationIdCacheGetRelation(buf->tag.relId.relId); + + Assert(bufrel != NULL); + + smgrwrite(bufrel->rd_rel->relsmgr, bufrel, buf->tag.blockNum, + (char *) MAKE_PTR(buf->data)); + LocalBufferFlushCount++; + + buf->tag.relId.relId = InvalidOid; + buf->flags &= ~BM_DIRTY; + } } - } - memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; + memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); + nextFreeLocalBuf = 0; } void ResetLocalBufferPool(void) { - int i; + int i; - for (i = 0; i < NLocBuffer; i++) - { - BufferDesc *buf = &LocalBufferDescriptors[i]; + for (i = 0; i < NLocBuffer; i++) + { + BufferDesc *buf = &LocalBufferDescriptors[i]; - buf->tag.relId.relId = InvalidOid; - buf->flags &= ~BM_DIRTY; - buf->buf_id = - i - 2; - } + buf->tag.relId.relId = InvalidOid; + buf->flags &= ~BM_DIRTY; + buf->buf_id = -i - 2; + } - memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; + memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); + nextFreeLocalBuf = 0; } |