diff options
author | Bruce Momjian <bruce@momjian.us> | 1997-09-07 05:04:48 +0000 |
---|---|---|
committer | Bruce Momjian <bruce@momjian.us> | 1997-09-07 05:04:48 +0000 |
commit | 1ccd423235a48739d6f7a4d7889705b5f9ecc69b (patch) | |
tree | 8001c4e839dfad8f29ceda7f8c5f5dbb8759b564 /src/backend/storage | |
parent | 8fecd4febf8357f3cc20383ed29ced484877d5ac (diff) | |
download | postgresql-1ccd423235a48739d6f7a4d7889705b5f9ecc69b.tar.gz postgresql-1ccd423235a48739d6f7a4d7889705b5f9ecc69b.zip |
Massive commit to run PGINDENT on all *.c and *.h files.
Diffstat (limited to 'src/backend/storage')
26 files changed, 10162 insertions, 9462 deletions
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 20f8195d1e9..4ce064d6713 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * buf_init.c-- - * buffer manager initialization routines + * buffer manager initialization routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.10 1997/07/28 00:54:33 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.11 1997/09/07 04:48:15 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -35,98 +35,103 @@ #include "utils/dynahash.h" #include "utils/hsearch.h" #include "utils/memutils.h" -#include "executor/execdebug.h" /* for NDirectFileRead */ +#include "executor/execdebug.h" /* for NDirectFileRead */ #include "catalog/catalog.h" /* - * if BMTRACE is defined, we trace the last 200 buffer allocations and - * deallocations in a circular buffer in shared memory. + * if BMTRACE is defined, we trace the last 200 buffer allocations and + * deallocations in a circular buffer in shared memory. */ #ifdef BMTRACE -bmtrace *TraceBuf; -long *CurTraceBuf; -#define BMT_LIMIT 200 -#endif /* BMTRACE */ -int ShowPinTrace = 0; - -int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */ -int Data_Descriptors; -int Free_List_Descriptor; -int Lookup_List_Descriptor; -int Num_Descriptors; - -BufferDesc *BufferDescriptors; -BufferBlock BufferBlocks; +bmtrace *TraceBuf; +long *CurTraceBuf; + +#define BMT_LIMIT 200 +#endif /* BMTRACE */ +int ShowPinTrace = 0; + +int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */ +int Data_Descriptors; +int Free_List_Descriptor; +int Lookup_List_Descriptor; +int Num_Descriptors; + +BufferDesc *BufferDescriptors; +BufferBlock BufferBlocks; + #ifndef HAS_TEST_AND_SET -long *NWaitIOBackendP; +long *NWaitIOBackendP; + #endif -extern IpcSemaphoreId WaitIOSemId; +extern IpcSemaphoreId WaitIOSemId; + +long *PrivateRefCount;/* also used in freelist.c */ +long *LastRefCount; /* refcounts of last ExecMain level */ +long *CommitInfoNeedsSave; /* to write buffers where we have + * filled in */ -long *PrivateRefCount; /* also used in freelist.c */ -long *LastRefCount; /* refcounts of last ExecMain level */ -long *CommitInfoNeedsSave; /* to write buffers where we have filled in */ - /* t_tmin (or t_tmax) */ + /* t_tmin (or t_tmax) */ /* * Data Structures: - * buffers live in a freelist and a lookup data structure. - * + * buffers live in a freelist and a lookup data structure. + * * * Buffer Lookup: - * Two important notes. First, the buffer has to be - * available for lookup BEFORE an IO begins. Otherwise - * a second process trying to read the buffer will - * allocate its own copy and the buffeer pool will - * become inconsistent. + * Two important notes. First, the buffer has to be + * available for lookup BEFORE an IO begins. Otherwise + * a second process trying to read the buffer will + * allocate its own copy and the buffeer pool will + * become inconsistent. * * Buffer Replacement: - * see freelist.c. A buffer cannot be replaced while in - * use either by data manager or during IO. + * see freelist.c. A buffer cannot be replaced while in + * use either by data manager or during IO. * * WriteBufferBack: - * currently, a buffer is only written back at the time - * it is selected for replacement. It should - * be done sooner if possible to reduce latency of - * BufferAlloc(). Maybe there should be a daemon process. + * currently, a buffer is only written back at the time + * it is selected for replacement. It should + * be done sooner if possible to reduce latency of + * BufferAlloc(). Maybe there should be a daemon process. * * Synchronization/Locking: * - * BufMgrLock lock -- must be acquired before manipulating the - * buffer queues (lookup/freelist). Must be released - * before exit and before doing any IO. + * BufMgrLock lock -- must be acquired before manipulating the + * buffer queues (lookup/freelist). Must be released + * before exit and before doing any IO. * * IO_IN_PROGRESS -- this is a flag in the buffer descriptor. - * It must be set when an IO is initiated and cleared at - * the end of the IO. It is there to make sure that one - * process doesn't start to use a buffer while another is - * faulting it in. see IOWait/IOSignal. + * It must be set when an IO is initiated and cleared at + * the end of the IO. It is there to make sure that one + * process doesn't start to use a buffer while another is + * faulting it in. see IOWait/IOSignal. * - * refcount -- A buffer is pinned during IO and immediately - * after a BufferAlloc(). A buffer is always either pinned - * or on the freelist but never both. The buffer must be - * released, written, or flushed before the end of - * transaction. + * refcount -- A buffer is pinned during IO and immediately + * after a BufferAlloc(). A buffer is always either pinned + * or on the freelist but never both. The buffer must be + * released, written, or flushed before the end of + * transaction. * * PrivateRefCount -- Each buffer also has a private refcount the keeps - * track of the number of times the buffer is pinned in the current - * processes. This is used for two purposes, first, if we pin a - * a buffer more than once, we only need to change the shared refcount - * once, thus only lock the buffer pool once, second, when a transaction - * aborts, it should only unpin the buffers exactly the number of times it - * has pinned them, so that it will not blow away buffers of another - * backend. + * track of the number of times the buffer is pinned in the current + * processes. This is used for two purposes, first, if we pin a + * a buffer more than once, we only need to change the shared refcount + * once, thus only lock the buffer pool once, second, when a transaction + * aborts, it should only unpin the buffers exactly the number of times it + * has pinned them, so that it will not blow away buffers of another + * backend. * */ -SPINLOCK BufMgrLock; +SPINLOCK BufMgrLock; -long int ReadBufferCount; -long int ReadLocalBufferCount; -long int BufferHitCount; -long int LocalBufferHitCount; -long int BufferFlushCount; -long int LocalBufferFlushCount; +long int ReadBufferCount; +long int ReadLocalBufferCount; +long int BufferHitCount; +long int LocalBufferHitCount; +long int BufferFlushCount; +long int LocalBufferFlushCount; /* @@ -138,111 +143,121 @@ long int LocalBufferFlushCount; void InitBufferPool(IPCKey key) { - bool foundBufs,foundDescs; - int i; - - /* check padding of BufferDesc and BufferHdr */ - /* we need both checks because a sbufdesc_padded > PADDED_SBUFDESC_SIZE - will shrink sbufdesc to the required size, which is bad */ - if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE || - sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE) - elog(WARN,"Internal error: sbufdesc does not have the proper size, " - "contact the Postgres developers"); - if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE/2) - elog(WARN,"Internal error: sbufdesc is greatly over-sized, " - "contact the Postgres developers"); - - Data_Descriptors = NBuffers; - Free_List_Descriptor = Data_Descriptors; - Lookup_List_Descriptor = Data_Descriptors + 1; - Num_Descriptors = Data_Descriptors + 1; - - SpinAcquire(BufMgrLock); - + bool foundBufs, + foundDescs; + int i; + + /* check padding of BufferDesc and BufferHdr */ + + /* + * we need both checks because a sbufdesc_padded > + * PADDED_SBUFDESC_SIZE will shrink sbufdesc to the required size, + * which is bad + */ + if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE || + sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE) + elog(WARN, "Internal error: sbufdesc does not have the proper size, " + "contact the Postgres developers"); + if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE / 2) + elog(WARN, "Internal error: sbufdesc is greatly over-sized, " + "contact the Postgres developers"); + + Data_Descriptors = NBuffers; + Free_List_Descriptor = Data_Descriptors; + Lookup_List_Descriptor = Data_Descriptors + 1; + Num_Descriptors = Data_Descriptors + 1; + + SpinAcquire(BufMgrLock); + #ifdef BMTRACE - CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", - (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long), - &foundDescs); - if (!foundDescs) - memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long)); - - TraceBuf = (bmtrace *) &(CurTraceBuf[1]); + CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", + (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long), + &foundDescs); + if (!foundDescs) + memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long)); + + TraceBuf = (bmtrace *) & (CurTraceBuf[1]); #endif - - BufferDescriptors = (BufferDesc *) - ShmemInitStruct("Buffer Descriptors", - Num_Descriptors*sizeof(BufferDesc),&foundDescs); - - BufferBlocks = (BufferBlock) - ShmemInitStruct("Buffer Blocks", - NBuffers*BLCKSZ,&foundBufs); - + + BufferDescriptors = (BufferDesc *) + ShmemInitStruct("Buffer Descriptors", + Num_Descriptors * sizeof(BufferDesc), &foundDescs); + + BufferBlocks = (BufferBlock) + ShmemInitStruct("Buffer Blocks", + NBuffers * BLCKSZ, &foundBufs); + #ifndef HAS_TEST_AND_SET - { - bool foundNWaitIO; - - NWaitIOBackendP = (long *)ShmemInitStruct("#Backends Waiting IO", - sizeof(long), - &foundNWaitIO); - if (!foundNWaitIO) - *NWaitIOBackendP = 0; - } + { + bool foundNWaitIO; + + NWaitIOBackendP = (long *) ShmemInitStruct("#Backends Waiting IO", + sizeof(long), + &foundNWaitIO); + if (!foundNWaitIO) + *NWaitIOBackendP = 0; + } #endif - - if (foundDescs || foundBufs) { - - /* both should be present or neither */ - Assert(foundDescs && foundBufs); - - } else { - BufferDesc *buf; - unsigned long block; - - buf = BufferDescriptors; - block = (unsigned long) BufferBlocks; - - /* - * link the buffers into a circular, doubly-linked list to - * initialize free list. Still don't know anything about - * replacement strategy in this file. - */ - for (i = 0; i < Data_Descriptors; block+=BLCKSZ,buf++,i++) { - Assert(ShmemIsValid((unsigned long)block)); - - buf->freeNext = i+1; - buf->freePrev = i-1; - - CLEAR_BUFFERTAG(&(buf->tag)); - buf->data = MAKE_OFFSET(block); - buf->flags = (BM_DELETED | BM_FREE | BM_VALID); - buf->refcount = 0; - buf->buf_id = i; + + if (foundDescs || foundBufs) + { + + /* both should be present or neither */ + Assert(foundDescs && foundBufs); + + } + else + { + BufferDesc *buf; + unsigned long block; + + buf = BufferDescriptors; + block = (unsigned long) BufferBlocks; + + /* + * link the buffers into a circular, doubly-linked list to + * initialize free list. Still don't know anything about + * replacement strategy in this file. + */ + for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) + { + Assert(ShmemIsValid((unsigned long) block)); + + buf->freeNext = i + 1; + buf->freePrev = i - 1; + + CLEAR_BUFFERTAG(&(buf->tag)); + buf->data = MAKE_OFFSET(block); + buf->flags = (BM_DELETED | BM_FREE | BM_VALID); + buf->refcount = 0; + buf->buf_id = i; #ifdef HAS_TEST_AND_SET - S_INIT_LOCK(&(buf->io_in_progress_lock)); + S_INIT_LOCK(&(buf->io_in_progress_lock)); #endif + } + + /* close the circular queue */ + BufferDescriptors[0].freePrev = Data_Descriptors - 1; + BufferDescriptors[Data_Descriptors - 1].freeNext = 0; } - - /* close the circular queue */ - BufferDescriptors[0].freePrev = Data_Descriptors-1; - BufferDescriptors[Data_Descriptors-1].freeNext = 0; - } - - /* Init the rest of the module */ - InitBufTable(); - InitFreeList(!foundDescs); - - SpinRelease(BufMgrLock); - + + /* Init the rest of the module */ + InitBufTable(); + InitFreeList(!foundDescs); + + SpinRelease(BufMgrLock); + #ifndef HAS_TEST_AND_SET - { - int status; - WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key), - 1, IPCProtection, 0, 1, &status); - } + { + int status; + + WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key), + 1, IPCProtection, 0, 1, &status); + } #endif - PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); - LastRefCount = (long *) calloc(NBuffers, sizeof(long)); - CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long)); + PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); + LastRefCount = (long *) calloc(NBuffers, sizeof(long)); + CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long)); } /* ----------------------------------------------------- @@ -255,43 +270,41 @@ InitBufferPool(IPCKey key) int BufferShmemSize() { - int size = 0; - int nbuckets; - int nsegs; - int tmp; - - nbuckets = 1 << (int)my_log2((NBuffers - 1) / DEF_FFACTOR + 1); - nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1); - - /* size of shmem binding table */ - size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */ - size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ - size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - size += BUCKET_ALLOC_INCR * - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(BTABLE_KEYSIZE) + - MAXALIGN(BTABLE_DATASIZE)); - - /* size of buffer descriptors */ - size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc)); - - /* size of data pages */ - size += NBuffers * MAXALIGN(BLCKSZ); - - /* size of buffer hash table */ - size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */ - size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ - size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - tmp = (int)ceil((double)NBuffers/BUCKET_ALLOC_INCR); - size += tmp * BUCKET_ALLOC_INCR * - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(sizeof(BufferTag)) + - MAXALIGN(sizeof(Buffer))); - + int size = 0; + int nbuckets; + int nsegs; + int tmp; + + nbuckets = 1 << (int) my_log2((NBuffers - 1) / DEF_FFACTOR + 1); + nsegs = 1 << (int) my_log2((nbuckets - 1) / DEF_SEGSIZE + 1); + + /* size of shmem binding table */ + size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */ + size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ + size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + size += BUCKET_ALLOC_INCR * + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(BTABLE_KEYSIZE) + + MAXALIGN(BTABLE_DATASIZE)); + + /* size of buffer descriptors */ + size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc)); + + /* size of data pages */ + size += NBuffers * MAXALIGN(BLCKSZ); + + /* size of buffer hash table */ + size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */ + size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */ + size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + tmp = (int) ceil((double) NBuffers / BUCKET_ALLOC_INCR); + size += tmp * BUCKET_ALLOC_INCR * + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(sizeof(BufferTag)) + + MAXALIGN(sizeof(Buffer))); + #ifdef BMTRACE - size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long); + size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long); #endif - return size; + return size; } - - diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index 61e365ce55e..41b2b4d8ee0 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * buf_table.c-- - * routines for finding buffers in the buffer pool. + * routines for finding buffers in the buffer pool. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.4 1997/08/19 21:32:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.5 1997/09/07 04:48:17 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -16,30 +16,31 @@ * * Data Structures: * - * Buffers are identified by their BufferTag (buf.h). This + * Buffers are identified by their BufferTag (buf.h). This * file contains routines for allocating a shmem hash table to * map buffer tags to buffer descriptors. * * Synchronization: - * - * All routines in this file assume buffer manager spinlock is - * held by their caller. + * + * All routines in this file assume buffer manager spinlock is + * held by their caller. */ #include "postgres.h" #include "storage/bufmgr.h" -#include "storage/buf_internals.h" /* where the declarations go */ +#include "storage/buf_internals.h" /* where the declarations go */ #include "storage/shmem.h" #include "storage/spin.h" #include "utils/hsearch.h" -static HTAB *SharedBufHash; +static HTAB *SharedBufHash; -typedef struct lookup { - BufferTag key; - Buffer id; -} LookupEnt; +typedef struct lookup +{ + BufferTag key; + Buffer id; +} LookupEnt; /* * Initialize shmem hash table for mapping buffers @@ -47,109 +48,116 @@ typedef struct lookup { void InitBufTable() { - HASHCTL info; - int hash_flags; - - /* assume lock is held */ - - /* BufferTag maps to Buffer */ - info.keysize = sizeof(BufferTag); - info.datasize = sizeof(Buffer); - info.hash = tag_hash; - - hash_flags = (HASH_ELEM | HASH_FUNCTION); - - - SharedBufHash = (HTAB *) ShmemInitHash("Shared Buf Lookup Table", - NBuffers,NBuffers, - &info,hash_flags); - - if (! SharedBufHash) { - elog(FATAL,"couldn't initialize shared buffer pool Hash Tbl"); - exit(1); - } - + HASHCTL info; + int hash_flags; + + /* assume lock is held */ + + /* BufferTag maps to Buffer */ + info.keysize = sizeof(BufferTag); + info.datasize = sizeof(Buffer); + info.hash = tag_hash; + + hash_flags = (HASH_ELEM | HASH_FUNCTION); + + + SharedBufHash = (HTAB *) ShmemInitHash("Shared Buf Lookup Table", + NBuffers, NBuffers, + &info, hash_flags); + + if (!SharedBufHash) + { + elog(FATAL, "couldn't initialize shared buffer pool Hash Tbl"); + exit(1); + } + } -BufferDesc * -BufTableLookup(BufferTag *tagPtr) +BufferDesc * +BufTableLookup(BufferTag * tagPtr) { - LookupEnt * result; - bool found; - - if (tagPtr->blockNum == P_NEW) - return(NULL); - - result = (LookupEnt *) - hash_search(SharedBufHash,(char *) tagPtr,HASH_FIND,&found); - - if (! result){ - elog(WARN,"BufTableLookup: BufferLookup table corrupted"); - return(NULL); - } - if (! found) { - return(NULL); - } - return(&(BufferDescriptors[result->id])); + LookupEnt *result; + bool found; + + if (tagPtr->blockNum == P_NEW) + return (NULL); + + result = (LookupEnt *) + hash_search(SharedBufHash, (char *) tagPtr, HASH_FIND, &found); + + if (!result) + { + elog(WARN, "BufTableLookup: BufferLookup table corrupted"); + return (NULL); + } + if (!found) + { + return (NULL); + } + return (&(BufferDescriptors[result->id])); } /* * BufTableDelete */ bool -BufTableDelete(BufferDesc *buf) +BufTableDelete(BufferDesc * buf) { - LookupEnt * result; - bool found; - - /* buffer not initialized or has been removed from - * table already. BM_DELETED keeps us from removing - * buffer twice. - */ - if (buf->flags & BM_DELETED) { - return(TRUE); - } - - buf->flags |= BM_DELETED; - - result = (LookupEnt *) - hash_search(SharedBufHash,(char *) &(buf->tag),HASH_REMOVE,&found); - - if (! (result && found)) { - elog(WARN,"BufTableDelete: BufferLookup table corrupted"); - return(FALSE); - } - - return(TRUE); + LookupEnt *result; + bool found; + + /* + * buffer not initialized or has been removed from table already. + * BM_DELETED keeps us from removing buffer twice. + */ + if (buf->flags & BM_DELETED) + { + return (TRUE); + } + + buf->flags |= BM_DELETED; + + result = (LookupEnt *) + hash_search(SharedBufHash, (char *) &(buf->tag), HASH_REMOVE, &found); + + if (!(result && found)) + { + elog(WARN, "BufTableDelete: BufferLookup table corrupted"); + return (FALSE); + } + + return (TRUE); } bool -BufTableInsert(BufferDesc *buf) +BufTableInsert(BufferDesc * buf) { - LookupEnt * result; - bool found; - - /* cannot insert it twice */ - Assert (buf->flags & BM_DELETED); - buf->flags &= ~(BM_DELETED); - - result = (LookupEnt *) - hash_search(SharedBufHash,(char *) &(buf->tag),HASH_ENTER,&found); - - if (! result) { - Assert(0); - elog(WARN,"BufTableInsert: BufferLookup table corrupted"); - return(FALSE); - } - /* found something else in the table ! */ - if (found) { - Assert(0); - elog(WARN,"BufTableInsert: BufferLookup table corrupted"); - return(FALSE); - } - - result->id = buf->buf_id; - return(TRUE); + LookupEnt *result; + bool found; + + /* cannot insert it twice */ + Assert(buf->flags & BM_DELETED); + buf->flags &= ~(BM_DELETED); + + result = (LookupEnt *) + hash_search(SharedBufHash, (char *) &(buf->tag), HASH_ENTER, &found); + + if (!result) + { + Assert(0); + elog(WARN, "BufTableInsert: BufferLookup table corrupted"); + return (FALSE); + } + /* found something else in the table ! */ + if (found) + { + Assert(0); + elog(WARN, "BufTableInsert: BufferLookup table corrupted"); + return (FALSE); + } + + result->id = buf->buf_id; + return (TRUE); } /* prints out collision stats for the buf table */ @@ -157,8 +165,9 @@ BufTableInsert(BufferDesc *buf) void DBG_LookupListCheck(int nlookup) { - nlookup = 10; - - hash_stats("Shared",SharedBufHash); + nlookup = 10; + + hash_stats("Shared", SharedBufHash); } + #endif diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 466728c4a46..2a53e6bd78c 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1,44 +1,44 @@ /*------------------------------------------------------------------------- * * bufmgr.c-- - * buffer manager interface routines + * buffer manager interface routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.19 1997/08/19 21:32:39 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.20 1997/09/07 04:48:19 momjian Exp $ * *------------------------------------------------------------------------- */ /* * * BufferAlloc() -- lookup a buffer in the buffer table. If - * it isn't there add it, but do not read it into memory. - * This is used when we are about to reinitialize the - * buffer so don't care what the current disk contents are. - * BufferAlloc() pins the new buffer in memory. + * it isn't there add it, but do not read it into memory. + * This is used when we are about to reinitialize the + * buffer so don't care what the current disk contents are. + * BufferAlloc() pins the new buffer in memory. * * ReadBuffer() -- same as BufferAlloc() but reads the data - * on a buffer cache miss. + * on a buffer cache miss. * * ReleaseBuffer() -- unpin the buffer * * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty" - * but don't unpin. The disk IO is delayed until buffer - * replacement if WriteMode is BUFFER_LATE_WRITE. + * but don't unpin. The disk IO is delayed until buffer + * replacement if WriteMode is BUFFER_LATE_WRITE. * - * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() + * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() * * FlushBuffer() -- as above but never delayed write. * * BufferSync() -- flush all dirty buffers in the buffer pool. - * + * * InitBufferPool() -- Init the buffer module. * - * See other files: - * freelist.c -- chooses victim for buffer replacement - * buf_table.c -- manages the buffer lookup table + * See other files: + * freelist.c -- chooses victim for buffer replacement + * buf_table.c -- manages the buffer lookup table */ #include <sys/types.h> #include <sys/file.h> @@ -66,7 +66,7 @@ #include "utils/palloc.h" #include "utils/memutils.h" #include "utils/relcache.h" -#include "executor/execdebug.h" /* for NDirectFileRead */ +#include "executor/execdebug.h" /* for NDirectFileRead */ #include "catalog/catalog.h" extern SPINLOCK BufMgrLock; @@ -77,76 +77,88 @@ extern long int LocalBufferHitCount; extern long int BufferFlushCount; extern long int LocalBufferFlushCount; -static int WriteMode = BUFFER_LATE_WRITE; /* Delayed write is default */ +static int WriteMode = BUFFER_LATE_WRITE; /* Delayed write is + * default */ + +static void WaitIO(BufferDesc * buf, SPINLOCK spinlock); -static void WaitIO(BufferDesc *buf, SPINLOCK spinlock); #ifndef HAS_TEST_AND_SET -static void SignalIO(BufferDesc *buf); -extern long *NWaitIOBackendP; /* defined in buf_init.c */ -#endif /* HAS_TEST_AND_SET */ - -static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, - bool bufferLockHeld); -static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, - bool *foundPtr, bool bufferLockHeld); -static int FlushBuffer (Buffer buffer, bool release); -static void BufferSync(void); -static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld); +static void SignalIO(BufferDesc * buf); +extern long *NWaitIOBackendP;/* defined in buf_init.c */ + +#endif /* HAS_TEST_AND_SET */ + +static Buffer +ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, + bool bufferLockHeld); +static BufferDesc * +BufferAlloc(Relation reln, BlockNumber blockNum, + bool * foundPtr, bool bufferLockHeld); +static int FlushBuffer(Buffer buffer, bool release); +static void BufferSync(void); +static int BufferReplace(BufferDesc * bufHdr, bool bufferLockHeld); /* --------------------------------------------------- * RelationGetBufferWithBuffer - * see if the given buffer is what we want - * if yes, we don't need to bother the buffer manager + * see if the given buffer is what we want + * if yes, we don't need to bother the buffer manager * --------------------------------------------------- */ Buffer RelationGetBufferWithBuffer(Relation relation, - BlockNumber blockNumber, - Buffer buffer) + BlockNumber blockNumber, + Buffer buffer) { - BufferDesc *bufHdr; - LRelId lrelId; - - if (BufferIsValid(buffer)) { - if (!BufferIsLocal(buffer)) { - bufHdr = &BufferDescriptors[buffer-1]; - lrelId = RelationGetLRelId(relation); - SpinAcquire(BufMgrLock); - if (bufHdr->tag.blockNum == blockNumber && - bufHdr->tag.relId.relId == lrelId.relId && - bufHdr->tag.relId.dbId == lrelId.dbId) { - SpinRelease(BufMgrLock); - return(buffer); - } - return(ReadBufferWithBufferLock(relation, blockNumber, true)); - } else { - bufHdr = &LocalBufferDescriptors[-buffer-1]; - if (bufHdr->tag.relId.relId == relation->rd_id && - bufHdr->tag.blockNum == blockNumber) { - return(buffer); - } + BufferDesc *bufHdr; + LRelId lrelId; + + if (BufferIsValid(buffer)) + { + if (!BufferIsLocal(buffer)) + { + bufHdr = &BufferDescriptors[buffer - 1]; + lrelId = RelationGetLRelId(relation); + SpinAcquire(BufMgrLock); + if (bufHdr->tag.blockNum == blockNumber && + bufHdr->tag.relId.relId == lrelId.relId && + bufHdr->tag.relId.dbId == lrelId.dbId) + { + SpinRelease(BufMgrLock); + return (buffer); + } + return (ReadBufferWithBufferLock(relation, blockNumber, true)); + } + else + { + bufHdr = &LocalBufferDescriptors[-buffer - 1]; + if (bufHdr->tag.relId.relId == relation->rd_id && + bufHdr->tag.blockNum == blockNumber) + { + return (buffer); + } + } } - } - return(ReadBuffer(relation, blockNumber)); + return (ReadBuffer(relation, blockNumber)); } /* * ReadBuffer -- returns a buffer containing the requested - * block of the requested relation. If the blknum - * requested is P_NEW, extend the relation file and - * allocate a new block. + * block of the requested relation. If the blknum + * requested is P_NEW, extend the relation file and + * allocate a new block. * * Returns: the buffer number for the buffer containing - * the block read or NULL on an error. + * the block read or NULL on an error. * * Assume when this function is called, that reln has been - * opened already. + * opened already. */ -extern int ShowPinTrace; +extern int ShowPinTrace; -#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG defined */ +#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG + * defined */ /* * ReadBuffer -- @@ -155,7 +167,7 @@ extern int ShowPinTrace; Buffer ReadBuffer(Relation reln, BlockNumber blockNum) { - return ReadBufferWithBufferLock(reln, blockNum, false); + return ReadBufferWithBufferLock(reln, blockNum, false); } /* @@ -164,156 +176,176 @@ ReadBuffer(Relation reln, BlockNumber blockNum) * XXX caller must have already acquired BufMgrLock */ #ifdef NOT_USED -static bool +static bool is_userbuffer(Buffer buffer) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - if (IsSystemRelationName(buf->sb_relname)) - return false; - return true; + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + if (IsSystemRelationName(buf->sb_relname)) + return false; + return true; } + #endif #ifdef NOT_USED Buffer ReadBuffer_Debug(char *file, - int line, - Relation reln, - BlockNumber blockNum) + int line, + Relation reln, + BlockNumber blockNum) { - Buffer buffer; - - buffer = ReadBufferWithBufferLock(reln, blockNum, false); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \ + Buffer buffer; + + buffer = ReadBufferWithBufferLock(reln, blockNum, false); + if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } - return buffer; + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } + return buffer; } + #endif /* - * ReadBufferWithBufferLock -- does the work of - * ReadBuffer() but with the possibility that - * the buffer lock has already been held. this - * is yet another effort to reduce the number of - * semops in the system. + * ReadBufferWithBufferLock -- does the work of + * ReadBuffer() but with the possibility that + * the buffer lock has already been held. this + * is yet another effort to reduce the number of + * semops in the system. */ -static Buffer +static Buffer ReadBufferWithBufferLock(Relation reln, - BlockNumber blockNum, - bool bufferLockHeld) + BlockNumber blockNum, + bool bufferLockHeld) { - BufferDesc *bufHdr; - int extend; /* extending the file by one block */ - int status; - bool found; - bool isLocalBuf; - - extend = (blockNum == P_NEW); - isLocalBuf = reln->rd_islocal; - - if (isLocalBuf) { - ReadLocalBufferCount++; - bufHdr = LocalBufferAlloc(reln, blockNum, &found); - if (found) LocalBufferHitCount++; - } else { - ReadBufferCount++; - - /* lookup the buffer. IO_IN_PROGRESS is set if the requested - * block is not currently in memory. - */ - bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld); - if (found) BufferHitCount++; - } - - if (!bufHdr) { - return(InvalidBuffer); - } - - /* if its already in the buffer pool, we're done */ - if (found) { + BufferDesc *bufHdr; + int extend; /* extending the file by one block */ + int status; + bool found; + bool isLocalBuf; + + extend = (blockNum == P_NEW); + isLocalBuf = reln->rd_islocal; + + if (isLocalBuf) + { + ReadLocalBufferCount++; + bufHdr = LocalBufferAlloc(reln, blockNum, &found); + if (found) + LocalBufferHitCount++; + } + else + { + ReadBufferCount++; + + /* + * lookup the buffer. IO_IN_PROGRESS is set if the requested + * block is not currently in memory. + */ + bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld); + if (found) + BufferHitCount++; + } + + if (!bufHdr) + { + return (InvalidBuffer); + } + + /* if its already in the buffer pool, we're done */ + if (found) + { + + /* + * This happens when a bogus buffer was returned previously and is + * floating around in the buffer pool. A routine calling this + * would want this extended. + */ + if (extend) + { + /* new buffers are zero-filled */ + memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); + smgrextend(bufHdr->bufsmgr, reln, + (char *) MAKE_PTR(bufHdr->data)); + } + return (BufferDescriptorGetBuffer(bufHdr)); + + } + /* - * This happens when a bogus buffer was returned previously and is - * floating around in the buffer pool. A routine calling this would - * want this extended. + * if we have gotten to this point, the reln pointer must be ok and + * the relation file must be open. */ - if (extend) { - /* new buffers are zero-filled */ - memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - smgrextend(bufHdr->bufsmgr, reln, - (char *) MAKE_PTR(bufHdr->data)); + if (extend) + { + /* new buffers are zero-filled */ + memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); + status = smgrextend(bufHdr->bufsmgr, reln, + (char *) MAKE_PTR(bufHdr->data)); } - return (BufferDescriptorGetBuffer(bufHdr)); - - } - - /* - * if we have gotten to this point, the reln pointer must be ok - * and the relation file must be open. - */ - if (extend) { - /* new buffers are zero-filled */ - memset((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - status = smgrextend(bufHdr->bufsmgr, reln, - (char *) MAKE_PTR(bufHdr->data)); - } else { - status = smgrread(bufHdr->bufsmgr, reln, blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - if (isLocalBuf) - return (BufferDescriptorGetBuffer(bufHdr)); + else + { + status = smgrread(bufHdr->bufsmgr, reln, blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + + if (isLocalBuf) + return (BufferDescriptorGetBuffer(bufHdr)); + + /* lock buffer manager again to update IO IN PROGRESS */ + SpinAcquire(BufMgrLock); + + if (status == SM_FAIL) + { + /* IO Failed. cleanup the data structures and go home */ + + if (!BufTableDelete(bufHdr)) + { + SpinRelease(BufMgrLock); + elog(FATAL, "BufRead: buffer table broken after IO error\n"); + } + /* remember that BufferAlloc() pinned the buffer */ + UnpinBuffer(bufHdr); - /* lock buffer manager again to update IO IN PROGRESS */ - SpinAcquire(BufMgrLock); - - if (status == SM_FAIL) { - /* IO Failed. cleanup the data structures and go home */ - - if (! BufTableDelete(bufHdr)) { - SpinRelease(BufMgrLock); - elog(FATAL,"BufRead: buffer table broken after IO error\n"); + /* + * Have to reset the flag so that anyone waiting for the buffer + * can tell that the contents are invalid. + */ + bufHdr->flags |= BM_IO_ERROR; + bufHdr->flags &= ~BM_IO_IN_PROGRESS; } - /* remember that BufferAlloc() pinned the buffer */ - UnpinBuffer(bufHdr); - - /* - * Have to reset the flag so that anyone waiting for - * the buffer can tell that the contents are invalid. - */ - bufHdr->flags |= BM_IO_ERROR; - bufHdr->flags &= ~BM_IO_IN_PROGRESS; - } else { - /* IO Succeeded. clear the flags, finish buffer update */ - - bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); - } - - /* If anyone was waiting for IO to complete, wake them up now */ + else + { + /* IO Succeeded. clear the flags, finish buffer update */ + + bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); + } + + /* If anyone was waiting for IO to complete, wake them up now */ #ifdef HAS_TEST_AND_SET - S_UNLOCK(&(bufHdr->io_in_progress_lock)); + S_UNLOCK(&(bufHdr->io_in_progress_lock)); #else - if (bufHdr->refcount > 1) - SignalIO(bufHdr); + if (bufHdr->refcount > 1) + SignalIO(bufHdr); #endif - - SpinRelease(BufMgrLock); - - if (status == SM_FAIL) - return(InvalidBuffer); - - return(BufferDescriptorGetBuffer(bufHdr)); + + SpinRelease(BufMgrLock); + + if (status == SM_FAIL) + return (InvalidBuffer); + + return (BufferDescriptorGetBuffer(bufHdr)); } /* * BufferAlloc -- Get a buffer from the buffer pool but dont - * read it. + * read it. * * Returns: descriptor for buffer * @@ -321,321 +353,339 @@ ReadBufferWithBufferLock(Relation reln, */ static BufferDesc * BufferAlloc(Relation reln, - BlockNumber blockNum, - bool *foundPtr, - bool bufferLockHeld) + BlockNumber blockNum, + bool * foundPtr, + bool bufferLockHeld) { - BufferDesc *buf, *buf2; - BufferTag newTag; /* identity of requested block */ - bool inProgress; /* buffer undergoing IO */ - bool newblock = FALSE; - - /* create a new tag so we can lookup the buffer */ - /* assume that the relation is already open */ - if (blockNum == P_NEW) { - newblock = TRUE; - blockNum = smgrnblocks(reln->rd_rel->relsmgr, reln); - } - - INIT_BUFFERTAG(&newTag,reln,blockNum); - - if (!bufferLockHeld) - SpinAcquire(BufMgrLock); - - /* see if the block is in the buffer pool already */ - buf = BufTableLookup(&newTag); - if (buf != NULL) { - /* Found it. Now, (a) pin the buffer so no - * one steals it from the buffer pool, - * (b) check IO_IN_PROGRESS, someone may be - * faulting the buffer into the buffer pool. - */ - - PinBuffer(buf); - inProgress = (buf->flags & BM_IO_IN_PROGRESS); - - *foundPtr = TRUE; - if (inProgress) { - WaitIO(buf, BufMgrLock); - if (buf->flags & BM_IO_ERROR) { - /* wierd race condition: - * - * We were waiting for someone else to read the buffer. - * While we were waiting, the reader boof'd in some - * way, so the contents of the buffer are still - * invalid. By saying that we didn't find it, we can - * make the caller reinitialize the buffer. If two - * processes are waiting for this block, both will - * read the block. The second one to finish may overwrite - * any updates made by the first. (Assume higher level - * synchronization prevents this from happening). - * - * This is never going to happen, don't worry about it. - */ - *foundPtr = FALSE; - } + BufferDesc *buf, + *buf2; + BufferTag newTag; /* identity of requested block */ + bool inProgress; /* buffer undergoing IO */ + bool newblock = FALSE; + + /* create a new tag so we can lookup the buffer */ + /* assume that the relation is already open */ + if (blockNum == P_NEW) + { + newblock = TRUE; + blockNum = smgrnblocks(reln->rd_rel->relsmgr, reln); } + + INIT_BUFFERTAG(&newTag, reln, blockNum); + + if (!bufferLockHeld) + SpinAcquire(BufMgrLock); + + /* see if the block is in the buffer pool already */ + buf = BufTableLookup(&newTag); + if (buf != NULL) + { + + /* + * Found it. Now, (a) pin the buffer so no one steals it from the + * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting + * the buffer into the buffer pool. + */ + + PinBuffer(buf); + inProgress = (buf->flags & BM_IO_IN_PROGRESS); + + *foundPtr = TRUE; + if (inProgress) + { + WaitIO(buf, BufMgrLock); + if (buf->flags & BM_IO_ERROR) + { + + /* + * wierd race condition: + * + * We were waiting for someone else to read the buffer. While + * we were waiting, the reader boof'd in some way, so the + * contents of the buffer are still invalid. By saying + * that we didn't find it, we can make the caller + * reinitialize the buffer. If two processes are waiting + * for this block, both will read the block. The second + * one to finish may overwrite any updates made by the + * first. (Assume higher level synchronization prevents + * this from happening). + * + * This is never going to happen, don't worry about it. + */ + *foundPtr = FALSE; + } + } #ifdef BMTRACE - _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND); -#endif /* BMTRACE */ - - SpinRelease(BufMgrLock); - - return(buf); - } - - *foundPtr = FALSE; - - /* - * Didn't find it in the buffer pool. We'll have - * to initialize a new buffer. First, grab one from - * the free list. If it's dirty, flush it to disk. - * Remember to unlock BufMgr spinlock while doing the IOs. - */ - inProgress = FALSE; - for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL; ) { - - /* GetFreeBuffer will abort if it can't find a free buffer */ - buf = GetFreeBuffer(); - - /* - * But it can return buf == NULL if we are in aborting - * transaction now and so elog(WARN,...) in GetFreeBuffer - * will not abort again. - */ - if ( buf == NULL ) - return (NULL); - + _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND); +#endif /* BMTRACE */ + + SpinRelease(BufMgrLock); + + return (buf); + } + + *foundPtr = FALSE; + /* - * There should be exactly one pin on the buffer after - * it is allocated -- ours. If it had a pin it wouldn't - * have been on the free list. No one else could have - * pinned it between GetFreeBuffer and here because we - * have the BufMgrLock. + * Didn't find it in the buffer pool. We'll have to initialize a new + * buffer. First, grab one from the free list. If it's dirty, flush + * it to disk. Remember to unlock BufMgr spinlock while doing the IOs. */ - Assert(buf->refcount == 0); - buf->refcount = 1; - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1; - - if (buf->flags & BM_DIRTY) { - bool smok; - /* - * Set BM_IO_IN_PROGRESS to keep anyone from doing anything - * with the contents of the buffer while we write it out. - * We don't really care if they try to read it, but if they - * can complete a BufferAlloc on it they can then scribble - * into it, and we'd really like to avoid that while we are - * flushing the buffer. Setting this flag should block them - * in WaitIO until we're done. - */ - inProgress = TRUE; - buf->flags |= BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - /* - * All code paths that acquire this lock pin the buffer - * first; since no one had it pinned (it just came off the - * free list), no one else can have this lock. - */ - Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - S_LOCK(&(buf->io_in_progress_lock)); -#endif /* HAS_TEST_AND_SET */ - - /* - * Write the buffer out, being careful to release BufMgrLock - * before starting the I/O. - * - * This #ifndef is here because a few extra semops REALLY kill - * you on machines that don't have spinlocks. If you don't - * operate with much concurrency, well... - */ - smok = BufferReplace(buf, true); -#ifndef OPTIMIZE_SINGLE - SpinAcquire(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - - if ( smok == FALSE ) - { - elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s", - buf->tag.blockNum, buf->sb_dbname, buf->sb_relname); - inProgress = FALSE; - buf->flags |= BM_IO_ERROR; - buf->flags &= ~BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - buf->refcount--; - if ( buf->refcount == 0 ) - { - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - } - buf = (BufferDesc *) NULL; - } - else - { + inProgress = FALSE; + for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) + { + + /* GetFreeBuffer will abort if it can't find a free buffer */ + buf = GetFreeBuffer(); + /* - * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't - * be setted by anyone. - vadim 01/17/97 + * But it can return buf == NULL if we are in aborting transaction + * now and so elog(WARN,...) in GetFreeBuffer will not abort + * again. */ - if ( buf->flags & BM_JUST_DIRTIED ) - { - elog (FATAL, "BufferAlloc: content of block %u (%s) changed while flushing", - buf->tag.blockNum, buf->sb_relname); - } - else - { - buf->flags &= ~BM_DIRTY; - } - } - - /* - * Somebody could have pinned the buffer while we were - * doing the I/O and had given up the BufMgrLock (though - * they would be waiting for us to clear the BM_IO_IN_PROGRESS - * flag). That's why this is a loop -- if so, we need to clear - * the I/O flags, remove our pin and start all over again. - * - * People may be making buffers free at any time, so there's - * no reason to think that we have an immediate disaster on - * our hands. - */ - if ( buf && buf->refcount > 1 ) - { - inProgress = FALSE; - buf->flags &= ~BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - buf->refcount--; - buf = (BufferDesc *) NULL; - } - - /* - * Somebody could have allocated another buffer for the - * same block we are about to read in. (While we flush out - * the dirty buffer, we don't hold the lock and someone could - * have allocated another buffer for the same block. The problem - * is we haven't gotten around to insert the new tag into - * the buffer table. So we need to check here. -ay 3/95 - */ - buf2 = BufTableLookup(&newTag); - if (buf2 != NULL) { - /* Found it. Someone has already done what we're about - * to do. We'll just handle this as if it were found in - * the buffer pool in the first place. + if (buf == NULL) + return (NULL); + + /* + * There should be exactly one pin on the buffer after it is + * allocated -- ours. If it had a pin it wouldn't have been on + * the free list. No one else could have pinned it between + * GetFreeBuffer and here because we have the BufMgrLock. */ - if ( buf != NULL ) + Assert(buf->refcount == 0); + buf->refcount = 1; + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1; + + if (buf->flags & BM_DIRTY) { + bool smok; + + /* + * Set BM_IO_IN_PROGRESS to keep anyone from doing anything + * with the contents of the buffer while we write it out. We + * don't really care if they try to read it, but if they can + * complete a BufferAlloc on it they can then scribble into + * it, and we'd really like to avoid that while we are + * flushing the buffer. Setting this flag should block them + * in WaitIO until we're done. + */ + inProgress = TRUE; + buf->flags |= BM_IO_IN_PROGRESS; #ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ - - /* give up the buffer since we don't need it any more */ - buf->refcount--; - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - buf->flags &= ~BM_IO_IN_PROGRESS; - } - PinBuffer(buf2); - inProgress = (buf2->flags & BM_IO_IN_PROGRESS); - - *foundPtr = TRUE; - if (inProgress) { - WaitIO(buf2, BufMgrLock); - if (buf2->flags & BM_IO_ERROR) { - *foundPtr = FALSE; - } + /* + * All code paths that acquire this lock pin the buffer first; + * since no one had it pinned (it just came off the free + * list), no one else can have this lock. + */ + Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); + S_LOCK(&(buf->io_in_progress_lock)); +#endif /* HAS_TEST_AND_SET */ + + /* + * Write the buffer out, being careful to release BufMgrLock + * before starting the I/O. + * + * This #ifndef is here because a few extra semops REALLY kill + * you on machines that don't have spinlocks. If you don't + * operate with much concurrency, well... + */ + smok = BufferReplace(buf, true); +#ifndef OPTIMIZE_SINGLE + SpinAcquire(BufMgrLock); +#endif /* OPTIMIZE_SINGLE */ + + if (smok == FALSE) + { + elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s", + buf->tag.blockNum, buf->sb_dbname, buf->sb_relname); + inProgress = FALSE; + buf->flags |= BM_IO_ERROR; + buf->flags &= ~BM_IO_IN_PROGRESS; +#ifdef HAS_TEST_AND_SET + S_UNLOCK(&(buf->io_in_progress_lock)); +#else /* !HAS_TEST_AND_SET */ + if (buf->refcount > 1) + SignalIO(buf); +#endif /* !HAS_TEST_AND_SET */ + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; + buf->refcount--; + if (buf->refcount == 0) + { + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + } + buf = (BufferDesc *) NULL; + } + else + { + + /* + * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't + * be setted by anyone. - vadim 01/17/97 + */ + if (buf->flags & BM_JUST_DIRTIED) + { + elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing", + buf->tag.blockNum, buf->sb_relname); + } + else + { + buf->flags &= ~BM_DIRTY; + } + } + + /* + * Somebody could have pinned the buffer while we were doing + * the I/O and had given up the BufMgrLock (though they would + * be waiting for us to clear the BM_IO_IN_PROGRESS flag). + * That's why this is a loop -- if so, we need to clear the + * I/O flags, remove our pin and start all over again. + * + * People may be making buffers free at any time, so there's no + * reason to think that we have an immediate disaster on our + * hands. + */ + if (buf && buf->refcount > 1) + { + inProgress = FALSE; + buf->flags &= ~BM_IO_IN_PROGRESS; +#ifdef HAS_TEST_AND_SET + S_UNLOCK(&(buf->io_in_progress_lock)); +#else /* !HAS_TEST_AND_SET */ + if (buf->refcount > 1) + SignalIO(buf); +#endif /* !HAS_TEST_AND_SET */ + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; + buf->refcount--; + buf = (BufferDesc *) NULL; + } + + /* + * Somebody could have allocated another buffer for the same + * block we are about to read in. (While we flush out the + * dirty buffer, we don't hold the lock and someone could have + * allocated another buffer for the same block. The problem is + * we haven't gotten around to insert the new tag into the + * buffer table. So we need to check here. -ay 3/95 + */ + buf2 = BufTableLookup(&newTag); + if (buf2 != NULL) + { + + /* + * Found it. Someone has already done what we're about to + * do. We'll just handle this as if it were found in the + * buffer pool in the first place. + */ + if (buf != NULL) + { +#ifdef HAS_TEST_AND_SET + S_UNLOCK(&(buf->io_in_progress_lock)); +#else /* !HAS_TEST_AND_SET */ + if (buf->refcount > 1) + SignalIO(buf); +#endif /* !HAS_TEST_AND_SET */ + + /* give up the buffer since we don't need it any more */ + buf->refcount--; + PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + buf->flags &= ~BM_IO_IN_PROGRESS; + } + + PinBuffer(buf2); + inProgress = (buf2->flags & BM_IO_IN_PROGRESS); + + *foundPtr = TRUE; + if (inProgress) + { + WaitIO(buf2, BufMgrLock); + if (buf2->flags & BM_IO_ERROR) + { + *foundPtr = FALSE; + } + } + + SpinRelease(BufMgrLock); + + return (buf2); + } } - + } + + /* + * At this point we should have the sole pin on a non-dirty buffer and + * we may or may not already have the BM_IO_IN_PROGRESS flag set. + */ + + /* + * Change the name of the buffer in the lookup table: + * + * Need to update the lookup table before the read starts. If someone + * comes along looking for the buffer while we are reading it in, we + * don't want them to allocate a new buffer. For the same reason, we + * didn't want to erase the buf table entry for the buffer we were + * writing back until now, either. + */ + + if (!BufTableDelete(buf)) + { SpinRelease(BufMgrLock); - - return(buf2); - } + elog(FATAL, "buffer wasn't in the buffer table\n"); + } - } - /* - * At this point we should have the sole pin on a non-dirty - * buffer and we may or may not already have the BM_IO_IN_PROGRESS - * flag set. - */ - - /* - * Change the name of the buffer in the lookup table: - * - * Need to update the lookup table before the read starts. - * If someone comes along looking for the buffer while - * we are reading it in, we don't want them to allocate - * a new buffer. For the same reason, we didn't want - * to erase the buf table entry for the buffer we were - * writing back until now, either. - */ - - if (! BufTableDelete(buf)) { - SpinRelease(BufMgrLock); - elog(FATAL,"buffer wasn't in the buffer table\n"); - - } - - /* record the database name and relation name for this buffer */ - strcpy (buf->sb_relname, reln->rd_rel->relname.data); - strcpy (buf->sb_dbname, GetDatabaseName()); - - /* remember which storage manager is responsible for it */ - buf->bufsmgr = reln->rd_rel->relsmgr; - - INIT_BUFFERTAG(&(buf->tag),reln,blockNum); - if (! BufTableInsert(buf)) { - SpinRelease(BufMgrLock); - elog(FATAL,"Buffer in lookup table twice \n"); - } - - /* Buffer contents are currently invalid. Have - * to mark IO IN PROGRESS so no one fiddles with - * them until the read completes. If this routine - * has been called simply to allocate a buffer, no - * io will be attempted, so the flag isnt set. - */ - if (!inProgress) { - buf->flags |= BM_IO_IN_PROGRESS; + + /* record the database name and relation name for this buffer */ + strcpy(buf->sb_relname, reln->rd_rel->relname.data); + strcpy(buf->sb_dbname, GetDatabaseName()); + + /* remember which storage manager is responsible for it */ + buf->bufsmgr = reln->rd_rel->relsmgr; + + INIT_BUFFERTAG(&(buf->tag), reln, blockNum); + if (!BufTableInsert(buf)) + { + SpinRelease(BufMgrLock); + elog(FATAL, "Buffer in lookup table twice \n"); + } + + /* + * Buffer contents are currently invalid. Have to mark IO IN PROGRESS + * so no one fiddles with them until the read completes. If this + * routine has been called simply to allocate a buffer, no io will be + * attempted, so the flag isnt set. + */ + if (!inProgress) + { + buf->flags |= BM_IO_IN_PROGRESS; #ifdef HAS_TEST_AND_SET - Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - S_LOCK(&(buf->io_in_progress_lock)); -#endif /* HAS_TEST_AND_SET */ - } - + Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); + S_LOCK(&(buf->io_in_progress_lock)); +#endif /* HAS_TEST_AND_SET */ + } + #ifdef BMTRACE - _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); -#endif /* BMTRACE */ - - SpinRelease(BufMgrLock); - - return (buf); + _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); +#endif /* BMTRACE */ + + SpinRelease(BufMgrLock); + + return (buf); } /* * WriteBuffer-- * - * Pushes buffer contents to disk if WriteMode is BUFFER_FLUSH_WRITE. - * Otherwise, marks contents as dirty. + * Pushes buffer contents to disk if WriteMode is BUFFER_FLUSH_WRITE. + * Otherwise, marks contents as dirty. * * Assume that buffer is pinned. Assume that reln is - * valid. + * valid. * * Side Effects: - * Pin count is decremented. + * Pin count is decremented. */ #undef WriteBuffer @@ -643,92 +693,103 @@ BufferAlloc(Relation reln, int WriteBuffer(Buffer buffer) { - BufferDesc *bufHdr; + BufferDesc *bufHdr; - if (WriteMode == BUFFER_FLUSH_WRITE) { - return (FlushBuffer (buffer, TRUE)); - } else { + if (WriteMode == BUFFER_FLUSH_WRITE) + { + return (FlushBuffer(buffer, TRUE)); + } + else + { - if (BufferIsLocal(buffer)) - return WriteLocalBuffer(buffer, TRUE); - - if (BAD_BUFFER_ID(buffer)) - return(FALSE); + if (BufferIsLocal(buffer)) + return WriteLocalBuffer(buffer, TRUE); - bufHdr = &BufferDescriptors[buffer-1]; - - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); - CommitInfoNeedsSave[buffer - 1] = 0; - } - return(TRUE); -} + if (BAD_BUFFER_ID(buffer)) + return (FALSE); + + bufHdr = &BufferDescriptors[buffer - 1]; + + SpinAcquire(BufMgrLock); + Assert(bufHdr->refcount > 0); + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + UnpinBuffer(bufHdr); + SpinRelease(BufMgrLock); + CommitInfoNeedsSave[buffer - 1] = 0; + } + return (TRUE); +} #ifdef NOT_USED void WriteBuffer_Debug(char *file, int line, Buffer buffer) { - WriteBuffer(buffer); - if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf; - buf = &BufferDescriptors[buffer-1]; - fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \ + WriteBuffer(buffer); + if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf; + + buf = &BufferDescriptors[buffer - 1]; + fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif /* * DirtyBufferCopy() -- For a given dbid/relid/blockno, if the buffer is - * in the cache and is dirty, mark it clean and copy - * it to the requested location. This is a logical - * write, and has been installed to support the cache - * management code for write-once storage managers. + * in the cache and is dirty, mark it clean and copy + * it to the requested location. This is a logical + * write, and has been installed to support the cache + * management code for write-once storage managers. * - * DirtyBufferCopy() -- Copy a given dirty buffer to the requested - * destination. + * DirtyBufferCopy() -- Copy a given dirty buffer to the requested + * destination. * - * We treat this as a write. If the requested buffer is in the pool - * and is dirty, we copy it to the location requested and mark it - * clean. This routine supports the Sony jukebox storage manager, - * which agrees to take responsibility for the data once we mark - * it clean. + * We treat this as a write. If the requested buffer is in the pool + * and is dirty, we copy it to the location requested and mark it + * clean. This routine supports the Sony jukebox storage manager, + * which agrees to take responsibility for the data once we mark + * it clean. * - * NOTE: used by sony jukebox code in postgres 4.2 - ay 2/95 + * NOTE: used by sony jukebox code in postgres 4.2 - ay 2/95 */ #ifdef NOT_USED void DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest) { - BufferDesc *buf; - BufferTag btag; - - btag.relId.relId = relid; - btag.relId.dbId = dbid; - btag.blockNum = blkno; - - SpinAcquire(BufMgrLock); - buf = BufTableLookup(&btag); - - if (buf == (BufferDesc *) NULL - || !(buf->flags & BM_DIRTY) - || !(buf->flags & BM_VALID)) { + BufferDesc *buf; + BufferTag btag; + + btag.relId.relId = relid; + btag.relId.dbId = dbid; + btag.blockNum = blkno; + + SpinAcquire(BufMgrLock); + buf = BufTableLookup(&btag); + + if (buf == (BufferDesc *) NULL + || !(buf->flags & BM_DIRTY) + || !(buf->flags & BM_VALID)) + { + SpinRelease(BufMgrLock); + return; + } + + /* + * hate to do this holding the lock, but release and reacquire is + * slower + */ + memmove(dest, (char *) MAKE_PTR(buf->data), BLCKSZ); + + buf->flags &= ~BM_DIRTY; + SpinRelease(BufMgrLock); - return; - } - - /* hate to do this holding the lock, but release and reacquire is slower */ - memmove(dest, (char *) MAKE_PTR(buf->data), BLCKSZ); - - buf->flags &= ~BM_DIRTY; - - SpinRelease(BufMgrLock); } + #endif /* @@ -742,504 +803,541 @@ DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest) static int FlushBuffer(Buffer buffer, bool release) { - BufferDesc *bufHdr; - Oid bufdb; - Relation bufrel; - int status; - - if (BufferIsLocal(buffer)) - return FlushLocalBuffer(buffer, release); - - if (BAD_BUFFER_ID(buffer)) - return (STATUS_ERROR); - - bufHdr = &BufferDescriptors[buffer-1]; - bufdb = bufHdr->tag.relId.dbId; - - Assert (bufdb == MyDatabaseId || bufdb == (Oid) NULL); - bufrel = RelationIdCacheGetRelation (bufHdr->tag.relId.relId); - Assert (bufrel != (Relation) NULL); - - /* To check if block content changed while flushing. - vadim 01/17/97 */ - SpinAcquire(BufMgrLock); - bufHdr->flags &= ~BM_JUST_DIRTIED; - SpinRelease(BufMgrLock); - - status = smgrflush(bufHdr->bufsmgr, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - - if (status == SM_FAIL) - { - elog(WARN, "FlushBuffer: cannot flush block %u of the relation %s", - bufHdr->tag.blockNum, bufHdr->sb_relname); - return (STATUS_ERROR); - } - BufferFlushCount++; - - SpinAcquire(BufMgrLock); - /* - * If this buffer was marked by someone as DIRTY while - * we were flushing it out we must not clear DIRTY flag - * - vadim 01/17/97 - */ - if ( bufHdr->flags & BM_JUST_DIRTIED ) - { - elog (NOTICE, "FlusfBuffer: content of block %u (%s) changed while flushing", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - else - { - bufHdr->flags &= ~BM_DIRTY; - } - if ( release ) - UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); - CommitInfoNeedsSave[buffer - 1] = 0; - - return(STATUS_OK); + BufferDesc *bufHdr; + Oid bufdb; + Relation bufrel; + int status; + + if (BufferIsLocal(buffer)) + return FlushLocalBuffer(buffer, release); + + if (BAD_BUFFER_ID(buffer)) + return (STATUS_ERROR); + + bufHdr = &BufferDescriptors[buffer - 1]; + bufdb = bufHdr->tag.relId.dbId; + + Assert(bufdb == MyDatabaseId || bufdb == (Oid) NULL); + bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + Assert(bufrel != (Relation) NULL); + + /* To check if block content changed while flushing. - vadim 01/17/97 */ + SpinAcquire(BufMgrLock); + bufHdr->flags &= ~BM_JUST_DIRTIED; + SpinRelease(BufMgrLock); + + status = smgrflush(bufHdr->bufsmgr, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + + if (status == SM_FAIL) + { + elog(WARN, "FlushBuffer: cannot flush block %u of the relation %s", + bufHdr->tag.blockNum, bufHdr->sb_relname); + return (STATUS_ERROR); + } + BufferFlushCount++; + + SpinAcquire(BufMgrLock); + + /* + * If this buffer was marked by someone as DIRTY while we were + * flushing it out we must not clear DIRTY flag - vadim 01/17/97 + */ + if (bufHdr->flags & BM_JUST_DIRTIED) + { + elog(NOTICE, "FlusfBuffer: content of block %u (%s) changed while flushing", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + else + { + bufHdr->flags &= ~BM_DIRTY; + } + if (release) + UnpinBuffer(bufHdr); + SpinRelease(BufMgrLock); + CommitInfoNeedsSave[buffer - 1] = 0; + + return (STATUS_OK); } /* * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer - * when the operation is complete. + * when the operation is complete. * - * We know that the buffer is for a relation in our private cache, - * because this routine is called only to write out buffers that - * were changed by the executing backend. + * We know that the buffer is for a relation in our private cache, + * because this routine is called only to write out buffers that + * were changed by the executing backend. */ int WriteNoReleaseBuffer(Buffer buffer) { - BufferDesc *bufHdr; - - if (WriteMode == BUFFER_FLUSH_WRITE) { - return (FlushBuffer (buffer, FALSE)); - } else { + BufferDesc *bufHdr; - if (BufferIsLocal(buffer)) - return WriteLocalBuffer(buffer, FALSE); - - if (BAD_BUFFER_ID(buffer)) - return (STATUS_ERROR); + if (WriteMode == BUFFER_FLUSH_WRITE) + { + return (FlushBuffer(buffer, FALSE)); + } + else + { - bufHdr = &BufferDescriptors[buffer-1]; - - SpinAcquire(BufMgrLock); - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - SpinRelease(BufMgrLock); - CommitInfoNeedsSave[buffer - 1] = 0; - } - return(STATUS_OK); + if (BufferIsLocal(buffer)) + return WriteLocalBuffer(buffer, FALSE); + + if (BAD_BUFFER_ID(buffer)) + return (STATUS_ERROR); + + bufHdr = &BufferDescriptors[buffer - 1]; + + SpinAcquire(BufMgrLock); + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + SpinRelease(BufMgrLock); + CommitInfoNeedsSave[buffer - 1] = 0; + } + return (STATUS_OK); } #undef ReleaseAndReadBuffer /* * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() - * so that only one semop needs to be called. + * so that only one semop needs to be called. * */ Buffer ReleaseAndReadBuffer(Buffer buffer, - Relation relation, - BlockNumber blockNum) + Relation relation, + BlockNumber blockNum) { - BufferDesc *bufHdr; - Buffer retbuf; - - if (BufferIsLocal(buffer)) { - Assert(LocalRefCount[-buffer - 1] > 0); - LocalRefCount[-buffer - 1]--; - } else { - if (BufferIsValid(buffer)) { - bufHdr = &BufferDescriptors[buffer-1]; - Assert(PrivateRefCount[buffer - 1] > 0); - PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0 && - LastRefCount[buffer - 1] == 0) { - /* only release buffer if it is not pinned in previous ExecMain - level */ - SpinAcquire(BufMgrLock); - bufHdr->refcount--; - if (bufHdr->refcount == 0) { - AddBufferToFreelist(bufHdr); - bufHdr->flags |= BM_FREE; - } - if(CommitInfoNeedsSave[buffer - 1]) { - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - CommitInfoNeedsSave[buffer - 1] = 0; + BufferDesc *bufHdr; + Buffer retbuf; + + if (BufferIsLocal(buffer)) + { + Assert(LocalRefCount[-buffer - 1] > 0); + LocalRefCount[-buffer - 1]--; + } + else + { + if (BufferIsValid(buffer)) + { + bufHdr = &BufferDescriptors[buffer - 1]; + Assert(PrivateRefCount[buffer - 1] > 0); + PrivateRefCount[buffer - 1]--; + if (PrivateRefCount[buffer - 1] == 0 && + LastRefCount[buffer - 1] == 0) + { + + /* + * only release buffer if it is not pinned in previous + * ExecMain level + */ + SpinAcquire(BufMgrLock); + bufHdr->refcount--; + if (bufHdr->refcount == 0) + { + AddBufferToFreelist(bufHdr); + bufHdr->flags |= BM_FREE; + } + if (CommitInfoNeedsSave[buffer - 1]) + { + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + CommitInfoNeedsSave[buffer - 1] = 0; + } + retbuf = ReadBufferWithBufferLock(relation, blockNum, true); + return retbuf; + } } - retbuf = ReadBufferWithBufferLock(relation, blockNum, true); - return retbuf; - } } - } - return (ReadBuffer(relation, blockNum)); + return (ReadBuffer(relation, blockNum)); } /* * BufferSync -- Flush all dirty buffers in the pool. * - * This is called at transaction commit time. It does the wrong thing, - * right now. We should flush only our own changes to stable storage, - * and we should obey the lock protocol on the buffer manager metadata - * as we do it. Also, we need to be sure that no other transaction is - * modifying the page as we flush it. This is only a problem for objects - * that use a non-two-phase locking protocol, like btree indices. For - * those objects, we would like to set a write lock for the duration of - * our IO. Another possibility is to code updates to btree pages - * carefully, so that writing them out out of order cannot cause - * any unrecoverable errors. + * This is called at transaction commit time. It does the wrong thing, + * right now. We should flush only our own changes to stable storage, + * and we should obey the lock protocol on the buffer manager metadata + * as we do it. Also, we need to be sure that no other transaction is + * modifying the page as we flush it. This is only a problem for objects + * that use a non-two-phase locking protocol, like btree indices. For + * those objects, we would like to set a write lock for the duration of + * our IO. Another possibility is to code updates to btree pages + * carefully, so that writing them out out of order cannot cause + * any unrecoverable errors. * - * I don't want to think hard about this right now, so I will try - * to come back to it later. + * I don't want to think hard about this right now, so I will try + * to come back to it later. */ static void BufferSync() -{ - int i; - Oid bufdb; - Oid bufrel; - Relation reln; - BufferDesc *bufHdr; - int status; - - SpinAcquire(BufMgrLock); - for (i=0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) { - if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { - bufdb = bufHdr->tag.relId.dbId; - bufrel = bufHdr->tag.relId.relId; - if (bufdb == MyDatabaseId || bufdb == (Oid) 0) { - reln = RelationIdCacheGetRelation(bufrel); - - /* - * We have to pin buffer to keep anyone from stealing it - * from the buffer pool while we are flushing it or - * waiting in WaitIO. It's bad for GetFreeBuffer in - * BufferAlloc, but there is no other way to prevent - * writing into disk block data from some other buffer, - * getting smgr status of some other block and - * clearing BM_DIRTY of ... - VAdim 09/16/96 - */ - PinBuffer(bufHdr); - if (bufHdr->flags & BM_IO_IN_PROGRESS) +{ + int i; + Oid bufdb; + Oid bufrel; + Relation reln; + BufferDesc *bufHdr; + int status; + + SpinAcquire(BufMgrLock); + for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) + { + if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { - WaitIO(bufHdr, BufMgrLock); - UnpinBuffer(bufHdr); - if (bufHdr->flags & BM_IO_ERROR) - { - elog(WARN, "BufferSync: write error %u for %s", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - if (reln != (Relation)NULL) - RelationDecrementReferenceCount(reln); - continue; - } - - /* - * To check if block content changed while flushing - * (see below). - vadim 01/17/97 - */ - bufHdr->flags &= ~BM_JUST_DIRTIED; + bufdb = bufHdr->tag.relId.dbId; + bufrel = bufHdr->tag.relId.relId; + if (bufdb == MyDatabaseId || bufdb == (Oid) 0) + { + reln = RelationIdCacheGetRelation(bufrel); + + /* + * We have to pin buffer to keep anyone from stealing it + * from the buffer pool while we are flushing it or + * waiting in WaitIO. It's bad for GetFreeBuffer in + * BufferAlloc, but there is no other way to prevent + * writing into disk block data from some other buffer, + * getting smgr status of some other block and clearing + * BM_DIRTY of ... - VAdim 09/16/96 + */ + PinBuffer(bufHdr); + if (bufHdr->flags & BM_IO_IN_PROGRESS) + { + WaitIO(bufHdr, BufMgrLock); + UnpinBuffer(bufHdr); + if (bufHdr->flags & BM_IO_ERROR) + { + elog(WARN, "BufferSync: write error %u for %s", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + if (reln != (Relation) NULL) + RelationDecrementReferenceCount(reln); + continue; + } + + /* + * To check if block content changed while flushing (see + * below). - vadim 01/17/97 + */ + bufHdr->flags &= ~BM_JUST_DIRTIED; + + /* + * If we didn't have the reldesc in our local cache, flush + * this page out using the 'blind write' storage manager + * routine. If we did find it, use the standard + * interface. + */ - /* - * If we didn't have the reldesc in our local cache, flush this - * page out using the 'blind write' storage manager routine. If - * we did find it, use the standard interface. - */ - #ifndef OPTIMIZE_SINGLE - SpinRelease(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - if (reln == (Relation) NULL) { - status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, - bufHdr->sb_relname, bufdb, bufrel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } else { - status = smgrwrite(bufHdr->bufsmgr, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } + SpinRelease(BufMgrLock); +#endif /* OPTIMIZE_SINGLE */ + if (reln == (Relation) NULL) + { + status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, + bufHdr->sb_relname, bufdb, bufrel, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + else + { + status = smgrwrite(bufHdr->bufsmgr, reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } #ifndef OPTIMIZE_SINGLE - SpinAcquire(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - - UnpinBuffer(bufHdr); - if (status == SM_FAIL) { - bufHdr->flags |= BM_IO_ERROR; - elog(WARN, "BufferSync: cannot write %u for %s", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - BufferFlushCount++; - /* - * If this buffer was marked by someone as DIRTY while - * we were flushing it out we must not clear DIRTY flag - * - vadim 01/17/97 - */ - if ( bufHdr->flags & BM_JUST_DIRTIED ) - { - elog (NOTICE, "BufferSync: content of block %u (%s) changed while flushing", - bufHdr->tag.blockNum, bufHdr->sb_relname); - } - else - { - bufHdr->flags &= ~BM_DIRTY; + SpinAcquire(BufMgrLock); +#endif /* OPTIMIZE_SINGLE */ + + UnpinBuffer(bufHdr); + if (status == SM_FAIL) + { + bufHdr->flags |= BM_IO_ERROR; + elog(WARN, "BufferSync: cannot write %u for %s", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + BufferFlushCount++; + + /* + * If this buffer was marked by someone as DIRTY while we + * were flushing it out we must not clear DIRTY flag - + * vadim 01/17/97 + */ + if (bufHdr->flags & BM_JUST_DIRTIED) + { + elog(NOTICE, "BufferSync: content of block %u (%s) changed while flushing", + bufHdr->tag.blockNum, bufHdr->sb_relname); + } + else + { + bufHdr->flags &= ~BM_DIRTY; + } + if (reln != (Relation) NULL) + RelationDecrementReferenceCount(reln); + } } - if (reln != (Relation)NULL) - RelationDecrementReferenceCount(reln); - } } - } - SpinRelease(BufMgrLock); + SpinRelease(BufMgrLock); - LocalBufferSync(); + LocalBufferSync(); } /* * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' - * is cleared. Because IO_IN_PROGRESS conflicts are - * expected to be rare, there is only one BufferIO - * lock in the entire system. All processes block - * on this semaphore when they try to use a buffer - * that someone else is faulting in. Whenever a - * process finishes an IO and someone is waiting for - * the buffer, BufferIO is signaled (SignalIO). All - * waiting processes then wake up and check to see - * if their buffer is now ready. This implementation - * is simple, but efficient enough if WaitIO is - * rarely called by multiple processes simultaneously. + * is cleared. Because IO_IN_PROGRESS conflicts are + * expected to be rare, there is only one BufferIO + * lock in the entire system. All processes block + * on this semaphore when they try to use a buffer + * that someone else is faulting in. Whenever a + * process finishes an IO and someone is waiting for + * the buffer, BufferIO is signaled (SignalIO). All + * waiting processes then wake up and check to see + * if their buffer is now ready. This implementation + * is simple, but efficient enough if WaitIO is + * rarely called by multiple processes simultaneously. * - * ProcSleep atomically releases the spinlock and goes to - * sleep. + * ProcSleep atomically releases the spinlock and goes to + * sleep. * - * Note: there is an easy fix if the queue becomes long. - * save the id of the buffer we are waiting for in - * the queue structure. That way signal can figure - * out which proc to wake up. + * Note: there is an easy fix if the queue becomes long. + * save the id of the buffer we are waiting for in + * the queue structure. That way signal can figure + * out which proc to wake up. */ #ifdef HAS_TEST_AND_SET static void -WaitIO(BufferDesc *buf, SPINLOCK spinlock) +WaitIO(BufferDesc * buf, SPINLOCK spinlock) { - SpinRelease(spinlock); - S_LOCK(&(buf->io_in_progress_lock)); - S_UNLOCK(&(buf->io_in_progress_lock)); - SpinAcquire(spinlock); + SpinRelease(spinlock); + S_LOCK(&(buf->io_in_progress_lock)); + S_UNLOCK(&(buf->io_in_progress_lock)); + SpinAcquire(spinlock); } -#else /* HAS_TEST_AND_SET */ -IpcSemaphoreId WaitIOSemId; +#else /* HAS_TEST_AND_SET */ +IpcSemaphoreId WaitIOSemId; static void -WaitIO(BufferDesc *buf, SPINLOCK spinlock) +WaitIO(BufferDesc * buf, SPINLOCK spinlock) { - bool inProgress; - - for (;;) { - - /* wait until someone releases IO lock */ - (*NWaitIOBackendP)++; - SpinRelease(spinlock); - IpcSemaphoreLock(WaitIOSemId, 0, 1); - SpinAcquire(spinlock); - inProgress = (buf->flags & BM_IO_IN_PROGRESS); - if (!inProgress) break; - } + bool inProgress; + + for (;;) + { + + /* wait until someone releases IO lock */ + (*NWaitIOBackendP)++; + SpinRelease(spinlock); + IpcSemaphoreLock(WaitIOSemId, 0, 1); + SpinAcquire(spinlock); + inProgress = (buf->flags & BM_IO_IN_PROGRESS); + if (!inProgress) + break; + } } /* * SignalIO -- */ static void -SignalIO(BufferDesc *buf) +SignalIO(BufferDesc * buf) { - /* somebody better be waiting. */ - Assert( buf->refcount > 1); - IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP); - *NWaitIOBackendP = 0; + /* somebody better be waiting. */ + Assert(buf->refcount > 1); + IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP); + *NWaitIOBackendP = 0; } -#endif /* HAS_TEST_AND_SET */ -long NDirectFileRead; /* some I/O's are direct file access. bypass bufmgr */ -long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */ +#endif /* HAS_TEST_AND_SET */ + +long NDirectFileRead;/* some I/O's are direct file access. + * bypass bufmgr */ +long NDirectFileWrite; /* e.g., I/O in psort and + * hashjoin. */ void -PrintBufferUsage(FILE *statfp) +PrintBufferUsage(FILE * statfp) { - float hitrate; - float localhitrate; - - if (ReadBufferCount==0) - hitrate = 0.0; - else - hitrate = (float)BufferHitCount * 100.0/ReadBufferCount; - - if (ReadLocalBufferCount==0) - localhitrate = 0.0; - else - localhitrate = (float)LocalBufferHitCount * 100.0/ReadLocalBufferCount; - - fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", - ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate); - fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", - ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate); - fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n", - NDirectFileRead, NDirectFileWrite); + float hitrate; + float localhitrate; + + if (ReadBufferCount == 0) + hitrate = 0.0; + else + hitrate = (float) BufferHitCount *100.0 / ReadBufferCount; + + if (ReadLocalBufferCount == 0) + localhitrate = 0.0; + else + localhitrate = (float) LocalBufferHitCount *100.0 / ReadLocalBufferCount; + + fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", + ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate); + fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", + ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate); + fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n", + NDirectFileRead, NDirectFileWrite); } void ResetBufferUsage() { - BufferHitCount = 0; - ReadBufferCount = 0; - BufferFlushCount = 0; - LocalBufferHitCount = 0; - ReadLocalBufferCount = 0; - LocalBufferFlushCount = 0; - NDirectFileRead = 0; - NDirectFileWrite = 0; + BufferHitCount = 0; + ReadBufferCount = 0; + BufferFlushCount = 0; + LocalBufferHitCount = 0; + ReadLocalBufferCount = 0; + LocalBufferFlushCount = 0; + NDirectFileRead = 0; + NDirectFileWrite = 0; } /* ---------------------------------------------- - * ResetBufferPool + * ResetBufferPool * - * this routine is supposed to be called when a transaction aborts. - * it will release all the buffer pins held by the transaciton. + * this routine is supposed to be called when a transaction aborts. + * it will release all the buffer pins held by the transaciton. * * ---------------------------------------------- */ void ResetBufferPool() { - register int i; - for (i=1; i<=NBuffers; i++) { - CommitInfoNeedsSave[i - 1] = 0; - if (BufferIsValid(i)) { - while(PrivateRefCount[i - 1] > 0) { - ReleaseBuffer(i); - } + register int i; + + for (i = 1; i <= NBuffers; i++) + { + CommitInfoNeedsSave[i - 1] = 0; + if (BufferIsValid(i)) + { + while (PrivateRefCount[i - 1] > 0) + { + ReleaseBuffer(i); + } + } + LastRefCount[i - 1] = 0; } - LastRefCount[i - 1] = 0; - } - ResetLocalBufferPool(); + ResetLocalBufferPool(); } /* ----------------------------------------------- - * BufferPoolCheckLeak + * BufferPoolCheckLeak * - * check if there is buffer leak + * check if there is buffer leak * * ----------------------------------------------- */ int BufferPoolCheckLeak() { - register int i; - int error = 0; - - for (i = 1; i <= NBuffers; i++) { - if (BufferIsValid(i)) { - elog(NOTICE, - "buffer leak [%d] detected in BufferPoolCheckLeak()", i-1); - error = 1; + register int i; + int error = 0; + + for (i = 1; i <= NBuffers; i++) + { + if (BufferIsValid(i)) + { + elog(NOTICE, + "buffer leak [%d] detected in BufferPoolCheckLeak()", i - 1); + error = 1; + } } - } - if(error) { - PrintBufferDescs(); - return(1); - } - return(0); + if (error) + { + PrintBufferDescs(); + return (1); + } + return (0); } /* ------------------------------------------------ - * FlushBufferPool + * FlushBufferPool * - * flush all dirty blocks in buffer pool to disk + * flush all dirty blocks in buffer pool to disk * * ------------------------------------------------ */ void FlushBufferPool(int StableMainMemoryFlag) { - if (!StableMainMemoryFlag) { - BufferSync(); - smgrcommit(); - } + if (!StableMainMemoryFlag) + { + BufferSync(); + smgrcommit(); + } } /* * BufferIsValid -- - * True iff the refcnt of the local buffer is > 0 + * True iff the refcnt of the local buffer is > 0 * Note: - * BufferIsValid(InvalidBuffer) is False. - * BufferIsValid(UnknownBuffer) is False. + * BufferIsValid(InvalidBuffer) is False. + * BufferIsValid(UnknownBuffer) is False. */ bool BufferIsValid(Buffer bufnum) { - if (BufferIsLocal(bufnum)) - return (bufnum >= -NLocBuffer && LocalRefCount[-bufnum - 1] > 0); - - if (BAD_BUFFER_ID(bufnum)) - return(false); + if (BufferIsLocal(bufnum)) + return (bufnum >= -NLocBuffer && LocalRefCount[-bufnum - 1] > 0); - return ((bool)(PrivateRefCount[bufnum - 1] > 0)); + if (BAD_BUFFER_ID(bufnum)) + return (false); + + return ((bool) (PrivateRefCount[bufnum - 1] > 0)); } /* * BufferGetBlockNumber -- - * Returns the block number associated with a buffer. + * Returns the block number associated with a buffer. * * Note: - * Assumes that the buffer is valid. + * Assumes that the buffer is valid. */ BlockNumber BufferGetBlockNumber(Buffer buffer) { - Assert(BufferIsValid(buffer)); + Assert(BufferIsValid(buffer)); - /* XXX should be a critical section */ - if (BufferIsLocal(buffer)) - return (LocalBufferDescriptors[-buffer-1].tag.blockNum); - else - return (BufferDescriptors[buffer-1].tag.blockNum); + /* XXX should be a critical section */ + if (BufferIsLocal(buffer)) + return (LocalBufferDescriptors[-buffer - 1].tag.blockNum); + else + return (BufferDescriptors[buffer - 1].tag.blockNum); } /* * BufferGetRelation -- - * Returns the relation desciptor associated with a buffer. + * Returns the relation desciptor associated with a buffer. * * Note: - * Assumes buffer is valid. + * Assumes buffer is valid. */ Relation BufferGetRelation(Buffer buffer) { - Relation relation; - Oid relid; - - Assert(BufferIsValid(buffer)); - Assert(!BufferIsLocal(buffer)); /* not supported for local buffers */ - - /* XXX should be a critical section */ - relid = LRelIdGetRelationId(BufferDescriptors[buffer-1].tag.relId); - relation = RelationIdGetRelation(relid); - - RelationDecrementReferenceCount(relation); - - if (RelationHasReferenceCountZero(relation)) { - /* - elog(NOTICE, "BufferGetRelation: 0->1"); - */ - - RelationIncrementReferenceCount(relation); - } - - return (relation); + Relation relation; + Oid relid; + + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); /* not supported for local buffers */ + + /* XXX should be a critical section */ + relid = LRelIdGetRelationId(BufferDescriptors[buffer - 1].tag.relId); + relation = RelationIdGetRelation(relid); + + RelationDecrementReferenceCount(relation); + + if (RelationHasReferenceCountZero(relation)) + { + + /* + * elog(NOTICE, "BufferGetRelation: 0->1"); + */ + + RelationIncrementReferenceCount(relation); + } + + return (relation); } /* @@ -1249,217 +1347,232 @@ BufferGetRelation(Buffer buffer) * */ static int -BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld) +BufferReplace(BufferDesc * bufHdr, bool bufferLockHeld) { - Relation reln; - Oid bufdb, bufrel; - int status; - - if (!bufferLockHeld) - SpinAcquire(BufMgrLock); - - /* - * first try to find the reldesc in the cache, if no luck, - * don't bother to build the reldesc from scratch, just do - * a blind write. - */ - - bufdb = bufHdr->tag.relId.dbId; - bufrel = bufHdr->tag.relId.relId; - - if (bufdb == MyDatabaseId || bufdb == (Oid) NULL) - reln = RelationIdCacheGetRelation(bufrel); - else - reln = (Relation) NULL; - - /* To check if block content changed while flushing. - vadim 01/17/97 */ - bufHdr->flags &= ~BM_JUST_DIRTIED; - - SpinRelease(BufMgrLock); - - if (reln != (Relation) NULL) { - status = smgrflush(bufHdr->bufsmgr, reln, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } else { - - /* blind write always flushes */ - status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, - bufHdr->sb_relname, bufdb, bufrel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - if (status == SM_FAIL) - return (FALSE); - - BufferFlushCount++; - - return (TRUE); + Relation reln; + Oid bufdb, + bufrel; + int status; + + if (!bufferLockHeld) + SpinAcquire(BufMgrLock); + + /* + * first try to find the reldesc in the cache, if no luck, don't + * bother to build the reldesc from scratch, just do a blind write. + */ + + bufdb = bufHdr->tag.relId.dbId; + bufrel = bufHdr->tag.relId.relId; + + if (bufdb == MyDatabaseId || bufdb == (Oid) NULL) + reln = RelationIdCacheGetRelation(bufrel); + else + reln = (Relation) NULL; + + /* To check if block content changed while flushing. - vadim 01/17/97 */ + bufHdr->flags &= ~BM_JUST_DIRTIED; + + SpinRelease(BufMgrLock); + + if (reln != (Relation) NULL) + { + status = smgrflush(bufHdr->bufsmgr, reln, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + else + { + + /* blind write always flushes */ + status = smgrblindwrt(bufHdr->bufsmgr, bufHdr->sb_dbname, + bufHdr->sb_relname, bufdb, bufrel, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } + + if (status == SM_FAIL) + return (FALSE); + + BufferFlushCount++; + + return (TRUE); } /* * RelationGetNumberOfBlocks -- - * Returns the buffer descriptor associated with a page in a relation. + * Returns the buffer descriptor associated with a page in a relation. * * Note: - * XXX may fail for huge relations. - * XXX should be elsewhere. - * XXX maybe should be hidden + * XXX may fail for huge relations. + * XXX should be elsewhere. + * XXX maybe should be hidden */ BlockNumber RelationGetNumberOfBlocks(Relation relation) { - return - ((relation->rd_islocal) ? relation->rd_nblocks : - smgrnblocks(relation->rd_rel->relsmgr, relation)); + return + ((relation->rd_islocal) ? relation->rd_nblocks : + smgrnblocks(relation->rd_rel->relsmgr, relation)); } /* * BufferGetBlock -- - * Returns a reference to a disk page image associated with a buffer. + * Returns a reference to a disk page image associated with a buffer. * * Note: - * Assumes buffer is valid. + * Assumes buffer is valid. */ Block BufferGetBlock(Buffer buffer) { - Assert(BufferIsValid(buffer)); + Assert(BufferIsValid(buffer)); - if (BufferIsLocal(buffer)) - return((Block)MAKE_PTR(LocalBufferDescriptors[-buffer-1].data)); - else - return((Block)MAKE_PTR(BufferDescriptors[buffer-1].data)); + if (BufferIsLocal(buffer)) + return ((Block) MAKE_PTR(LocalBufferDescriptors[-buffer - 1].data)); + else + return ((Block) MAKE_PTR(BufferDescriptors[buffer - 1].data)); } /* --------------------------------------------------------------------- - * ReleaseRelationBuffers + * ReleaseRelationBuffers * - * this function unmarks all the dirty pages of a relation - * in the buffer pool so that at the end of transaction - * these pages will not be flushed. - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. + * this function unmarks all the dirty pages of a relation + * in the buffer pool so that at the end of transaction + * these pages will not be flushed. + * XXX currently it sequentially searches the buffer pool, should be + * changed to more clever ways of searching. * -------------------------------------------------------------------- */ void -ReleaseRelationBuffers (Relation rdesc) +ReleaseRelationBuffers(Relation rdesc) { - register int i; - int holding = 0; - BufferDesc *buf; - - if ( rdesc->rd_islocal ) - { - for (i = 0; i < NLocBuffer; i++) - { - buf = &LocalBufferDescriptors[i]; - if ((buf->flags & BM_DIRTY) && - (buf->tag.relId.relId == rdesc->rd_id)) - { - buf->flags &= ~BM_DIRTY; - } - } - return; - } - - for (i=1; i<=NBuffers; i++) { - buf = &BufferDescriptors[i-1]; - if (!holding) { - SpinAcquire(BufMgrLock); - holding = 1; + register int i; + int holding = 0; + BufferDesc *buf; + + if (rdesc->rd_islocal) + { + for (i = 0; i < NLocBuffer; i++) + { + buf = &LocalBufferDescriptors[i]; + if ((buf->flags & BM_DIRTY) && + (buf->tag.relId.relId == rdesc->rd_id)) + { + buf->flags &= ~BM_DIRTY; + } + } + return; } - if ((buf->flags & BM_DIRTY) && - (buf->tag.relId.dbId == MyDatabaseId) && - (buf->tag.relId.relId == rdesc->rd_id)) { - buf->flags &= ~BM_DIRTY; - if (!(buf->flags & BM_FREE)) { - SpinRelease(BufMgrLock); - holding = 0; - ReleaseBuffer(i); - } + + for (i = 1; i <= NBuffers; i++) + { + buf = &BufferDescriptors[i - 1]; + if (!holding) + { + SpinAcquire(BufMgrLock); + holding = 1; + } + if ((buf->flags & BM_DIRTY) && + (buf->tag.relId.dbId == MyDatabaseId) && + (buf->tag.relId.relId == rdesc->rd_id)) + { + buf->flags &= ~BM_DIRTY; + if (!(buf->flags & BM_FREE)) + { + SpinRelease(BufMgrLock); + holding = 0; + ReleaseBuffer(i); + } + } } - } - if (holding) - SpinRelease(BufMgrLock); + if (holding) + SpinRelease(BufMgrLock); } /* --------------------------------------------------------------------- - * DropBuffers + * DropBuffers * - * This function marks all the buffers in the buffer cache for a - * particular database as clean. This is used when we destroy a - * database, to avoid trying to flush data to disk when the directory - * tree no longer exists. + * This function marks all the buffers in the buffer cache for a + * particular database as clean. This is used when we destroy a + * database, to avoid trying to flush data to disk when the directory + * tree no longer exists. * - * This is an exceedingly non-public interface. + * This is an exceedingly non-public interface. * -------------------------------------------------------------------- */ void DropBuffers(Oid dbid) { - register int i; - BufferDesc *buf; - - SpinAcquire(BufMgrLock); - for (i=1; i<=NBuffers; i++) { - buf = &BufferDescriptors[i-1]; - if ((buf->tag.relId.dbId == dbid) && (buf->flags & BM_DIRTY)) { - buf->flags &= ~BM_DIRTY; - } - } - SpinRelease(BufMgrLock); + register int i; + BufferDesc *buf; + + SpinAcquire(BufMgrLock); + for (i = 1; i <= NBuffers; i++) + { + buf = &BufferDescriptors[i - 1]; + if ((buf->tag.relId.dbId == dbid) && (buf->flags & BM_DIRTY)) + { + buf->flags &= ~BM_DIRTY; + } + } + SpinRelease(BufMgrLock); } /* ----------------------------------------------------------------- - * PrintBufferDescs + * PrintBufferDescs * - * this function prints all the buffer descriptors, for debugging - * use only. + * this function prints all the buffer descriptors, for debugging + * use only. * ----------------------------------------------------------------- */ void PrintBufferDescs() { - int i; - BufferDesc *buf = BufferDescriptors; + int i; + BufferDesc *buf = BufferDescriptors; - if (IsUnderPostmaster) { - SpinAcquire(BufMgrLock); - for (i = 0; i < NBuffers; ++i, ++buf) { - elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ + if (IsUnderPostmaster) + { + SpinAcquire(BufMgrLock); + for (i = 0; i < NBuffers; ++i, ++buf) + { + elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ blockNum=%d, flags=0x%x, refcount=%d %d)", - i, buf->freeNext, buf->freePrev, - buf->sb_relname, buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); + i, buf->freeNext, buf->freePrev, + buf->sb_relname, buf->tag.blockNum, buf->flags, + buf->refcount, PrivateRefCount[i]); + } + SpinRelease(BufMgrLock); } - SpinRelease(BufMgrLock); - } else { - /* interactive backend */ - for (i = 0; i < NBuffers; ++i, ++buf) { - printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n", - i, buf->sb_relname, buf->tag.blockNum, - buf->flags, buf->refcount, PrivateRefCount[i]); + else + { + /* interactive backend */ + for (i = 0; i < NBuffers; ++i, ++buf) + { + printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n", + i, buf->sb_relname, buf->tag.blockNum, + buf->flags, buf->refcount, PrivateRefCount[i]); + } } - } } void PrintPinnedBufs() { - int i; - BufferDesc *buf = BufferDescriptors; - - SpinAcquire(BufMgrLock); - for (i = 0; i < NBuffers; ++i, ++buf) { - if (PrivateRefCount[i] > 0) - elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ + int i; + BufferDesc *buf = BufferDescriptors; + + SpinAcquire(BufMgrLock); + for (i = 0; i < NBuffers; ++i, ++buf) + { + if (PrivateRefCount[i] > 0) + elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ blockNum=%d, flags=0x%x, refcount=%d %d)\n", - i, buf->freeNext, buf->freePrev, buf->sb_relname, - buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); - } - SpinRelease(BufMgrLock); + i, buf->freeNext, buf->freePrev, buf->sb_relname, + buf->tag.blockNum, buf->flags, + buf->refcount, PrivateRefCount[i]); + } + SpinRelease(BufMgrLock); } /* @@ -1474,17 +1587,20 @@ blockNum=%d, flags=0x%x, refcount=%d %d)\n", void BufferPoolBlowaway() { - register int i; - - BufferSync(); - for (i=1; i<=NBuffers; i++) { - if (BufferIsValid(i)) { - while(BufferIsValid(i)) - ReleaseBuffer(i); - } - BufTableDelete(&BufferDescriptors[i-1]); - } + register int i; + + BufferSync(); + for (i = 1; i <= NBuffers; i++) + { + if (BufferIsValid(i)) + { + while (BufferIsValid(i)) + ReleaseBuffer(i); + } + BufTableDelete(&BufferDescriptors[i - 1]); + } } + #endif #undef IncrBufferRefCount @@ -1493,297 +1609,328 @@ BufferPoolBlowaway() void IncrBufferRefCount(Buffer buffer) { - if (BufferIsLocal(buffer)) { - Assert(LocalRefCount[-buffer - 1] >= 0); - LocalRefCount[-buffer - 1]++; - } else { - Assert(!BAD_BUFFER_ID(buffer)); - Assert(PrivateRefCount[buffer - 1] >= 0); - PrivateRefCount[buffer - 1]++; - } + if (BufferIsLocal(buffer)) + { + Assert(LocalRefCount[-buffer - 1] >= 0); + LocalRefCount[-buffer - 1]++; + } + else + { + Assert(!BAD_BUFFER_ID(buffer)); + Assert(PrivateRefCount[buffer - 1] >= 0); + PrivateRefCount[buffer - 1]++; + } } /* * ReleaseBuffer -- remove the pin on a buffer without - * marking it dirty. + * marking it dirty. * */ int ReleaseBuffer(Buffer buffer) { - BufferDesc *bufHdr; - - if (BufferIsLocal(buffer)) { - Assert(LocalRefCount[-buffer - 1] > 0); - LocalRefCount[-buffer - 1]--; - return (STATUS_OK); - } - - if (BAD_BUFFER_ID(buffer)) - return(STATUS_ERROR); - - bufHdr = &BufferDescriptors[buffer-1]; - - Assert(PrivateRefCount[buffer - 1] > 0); - PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) { - /* only release buffer if it is not pinned in previous ExecMain - levels */ - SpinAcquire(BufMgrLock); - bufHdr->refcount--; - if (bufHdr->refcount == 0) { - AddBufferToFreelist(bufHdr); - bufHdr->flags |= BM_FREE; + BufferDesc *bufHdr; + + if (BufferIsLocal(buffer)) + { + Assert(LocalRefCount[-buffer - 1] > 0); + LocalRefCount[-buffer - 1]--; + return (STATUS_OK); } - if(CommitInfoNeedsSave[buffer - 1]) { - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - CommitInfoNeedsSave[buffer - 1] = 0; + + if (BAD_BUFFER_ID(buffer)) + return (STATUS_ERROR); + + bufHdr = &BufferDescriptors[buffer - 1]; + + Assert(PrivateRefCount[buffer - 1] > 0); + PrivateRefCount[buffer - 1]--; + if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) + { + + /* + * only release buffer if it is not pinned in previous ExecMain + * levels + */ + SpinAcquire(BufMgrLock); + bufHdr->refcount--; + if (bufHdr->refcount == 0) + { + AddBufferToFreelist(bufHdr); + bufHdr->flags |= BM_FREE; + } + if (CommitInfoNeedsSave[buffer - 1]) + { + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + CommitInfoNeedsSave[buffer - 1] = 0; + } + SpinRelease(BufMgrLock); } - SpinRelease(BufMgrLock); - } - - return(STATUS_OK); + + return (STATUS_OK); } #ifdef NOT_USED void IncrBufferRefCount_Debug(char *file, int line, Buffer buffer) { - IncrBufferRefCount(buffer); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \ + IncrBufferRefCount(buffer); + if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif #ifdef NOT_USED void ReleaseBuffer_Debug(char *file, int line, Buffer buffer) { - ReleaseBuffer(buffer); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \ + ReleaseBuffer(buffer); + if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif #ifdef NOT_USED int ReleaseAndReadBuffer_Debug(char *file, - int line, - Buffer buffer, - Relation relation, - BlockNumber blockNum) + int line, + Buffer buffer, + Relation relation, + BlockNumber blockNum) { - bool bufferValid; - Buffer b; - - bufferValid = BufferIsValid(buffer); - b = ReleaseAndReadBuffer(buffer, relation, blockNum); - if (ShowPinTrace && bufferValid && BufferIsLocal(buffer) - && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[buffer-1]; - - fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ + bool bufferValid; + Buffer b; + + bufferValid = BufferIsValid(buffer); + b = ReleaseAndReadBuffer(buffer, relation, blockNum); + if (ShowPinTrace && bufferValid && BufferIsLocal(buffer) + && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[buffer - 1]; + + fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } - if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) { - BufferDesc *buf = &BufferDescriptors[b-1]; - - fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } + if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) + { + BufferDesc *buf = &BufferDescriptors[b - 1]; + + fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - b, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[b - 1], file, line); - } - return b; + b, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[b - 1], file, line); + } + return b; } + #endif #ifdef BMTRACE /* - * trace allocations and deallocations in a circular buffer in - * shared memory. check the buffer before doing the allocation, - * and die if there's anything fishy. + * trace allocations and deallocations in a circular buffer in + * shared memory. check the buffer before doing the allocation, + * and die if there's anything fishy. */ _bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType) { - static int mypid = 0; - long start, cur; - bmtrace *tb; - - if (mypid == 0) - mypid = getpid(); - - start = *CurTraceBuf; - - if (start > 0) - cur = start - 1; - else - cur = BMT_LIMIT - 1; - - for (;;) { - tb = &TraceBuf[cur]; - if (tb->bmt_op != BMT_NOTUSED) { - if (tb->bmt_buf == bufNo) { - if ((tb->bmt_op == BMT_DEALLOC) - || (tb->bmt_dbid == dbId && tb->bmt_relid == relId - && tb->bmt_blkno == blkNo)) - goto okay; - - /* die holding the buffer lock */ - _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur); - } - } - - if (cur == start) - goto okay; - - if (cur == 0) - cur = BMT_LIMIT - 1; + static int mypid = 0; + long start, + cur; + bmtrace *tb; + + if (mypid == 0) + mypid = getpid(); + + start = *CurTraceBuf; + + if (start > 0) + cur = start - 1; else - cur--; - } - - okay: - tb = &TraceBuf[start]; - tb->bmt_pid = mypid; - tb->bmt_buf = bufNo; - tb->bmt_dbid = dbId; - tb->bmt_relid = relId; - tb->bmt_blkno = blkNo; - tb->bmt_op = allocType; - - *CurTraceBuf = (start + 1) % BMT_LIMIT; + cur = BMT_LIMIT - 1; + + for (;;) + { + tb = &TraceBuf[cur]; + if (tb->bmt_op != BMT_NOTUSED) + { + if (tb->bmt_buf == bufNo) + { + if ((tb->bmt_op == BMT_DEALLOC) + || (tb->bmt_dbid == dbId && tb->bmt_relid == relId + && tb->bmt_blkno == blkNo)) + goto okay; + + /* die holding the buffer lock */ + _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur); + } + } + + if (cur == start) + goto okay; + + if (cur == 0) + cur = BMT_LIMIT - 1; + else + cur--; + } + +okay: + tb = &TraceBuf[start]; + tb->bmt_pid = mypid; + tb->bmt_buf = bufNo; + tb->bmt_dbid = dbId; + tb->bmt_relid = relId; + tb->bmt_blkno = blkNo; + tb->bmt_op = allocType; + + *CurTraceBuf = (start + 1) % BMT_LIMIT; } _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, - int allocType, long start, long cur) + int allocType, long start, long cur) { - FILE *fp; - bmtrace *tb; - int i; - - tb = &TraceBuf[cur]; - - if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL) - elog(FATAL, "buffer alloc trace error and can't open log file"); - - fprintf(fp, "buffer alloc trace detected the following error:\n\n"); - fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n", - bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"), - (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation")); - - fprintf(fp, "the trace buffer contains:\n"); - - i = start; - for (;;) { - tb = &TraceBuf[i]; - if (tb->bmt_op != BMT_NOTUSED) { - fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%d,%d> ", - i, (i == cur ? " ---> " : "\t"), - tb->bmt_pid, tb->bmt_buf, - tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); - - switch (tb->bmt_op) { - case BMT_ALLOCFND: + FILE *fp; + bmtrace *tb; + int i; + + tb = &TraceBuf[cur]; + + if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL) + elog(FATAL, "buffer alloc trace error and can't open log file"); + + fprintf(fp, "buffer alloc trace detected the following error:\n\n"); + fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n", + bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"), + (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation")); + + fprintf(fp, "the trace buffer contains:\n"); + + i = start; + for (;;) + { + tb = &TraceBuf[i]; + if (tb->bmt_op != BMT_NOTUSED) + { + fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%d,%d> ", + i, (i == cur ? " ---> " : "\t"), + tb->bmt_pid, tb->bmt_buf, + tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); + + switch (tb->bmt_op) + { + case BMT_ALLOCFND: + fprintf(fp, "allocate (found)\n"); + break; + + case BMT_ALLOCNOTFND: + fprintf(fp, "allocate (not found)\n"); + break; + + case BMT_DEALLOC: + fprintf(fp, "deallocate\n"); + break; + + default: + fprintf(fp, "unknown op type %d\n", tb->bmt_op); + break; + } + } + + i = (i + 1) % BMT_LIMIT; + if (i == start) + break; + } + + fprintf(fp, "\noperation causing error:\n"); + fprintf(fp, "\tpid %d buf %d for <%d,%d,%d> ", + getpid(), bufNo, dbId, relId, blkNo); + + switch (allocType) + { + case BMT_ALLOCFND: fprintf(fp, "allocate (found)\n"); break; - - case BMT_ALLOCNOTFND: + + case BMT_ALLOCNOTFND: fprintf(fp, "allocate (not found)\n"); break; - - case BMT_DEALLOC: + + case BMT_DEALLOC: fprintf(fp, "deallocate\n"); break; - - default: - fprintf(fp, "unknown op type %d\n", tb->bmt_op); + + default: + fprintf(fp, "unknown op type %d\n", allocType); break; - } } - - i = (i + 1) % BMT_LIMIT; - if (i == start) - break; - } - - fprintf(fp, "\noperation causing error:\n"); - fprintf(fp, "\tpid %d buf %d for <%d,%d,%d> ", - getpid(), bufNo, dbId, relId, blkNo); - - switch (allocType) { - case BMT_ALLOCFND: - fprintf(fp, "allocate (found)\n"); - break; - - case BMT_ALLOCNOTFND: - fprintf(fp, "allocate (not found)\n"); - break; - - case BMT_DEALLOC: - fprintf(fp, "deallocate\n"); - break; - - default: - fprintf(fp, "unknown op type %d\n", allocType); - break; - } - - FreeFile(fp); - - kill(getpid(), SIGILL); + + FreeFile(fp); + + kill(getpid(), SIGILL); } -#endif /* BMTRACE */ +#endif /* BMTRACE */ void BufferRefCountReset(int *refcountsave) { - int i; - for (i=0; i<NBuffers; i++) { - refcountsave[i] = PrivateRefCount[i]; - LastRefCount[i] += PrivateRefCount[i]; - PrivateRefCount[i] = 0; - } + int i; + + for (i = 0; i < NBuffers; i++) + { + refcountsave[i] = PrivateRefCount[i]; + LastRefCount[i] += PrivateRefCount[i]; + PrivateRefCount[i] = 0; + } } void BufferRefCountRestore(int *refcountsave) { - int i; - for (i=0; i<NBuffers; i++) { - PrivateRefCount[i] = refcountsave[i]; - LastRefCount[i] -= refcountsave[i]; - refcountsave[i] = 0; - } + int i; + + for (i = 0; i < NBuffers; i++) + { + PrivateRefCount[i] = refcountsave[i]; + LastRefCount[i] -= refcountsave[i]; + refcountsave[i] = 0; + } } -int SetBufferWriteMode (int mode) +int +SetBufferWriteMode(int mode) { - int old; - - old = WriteMode; - WriteMode = mode; - return (old); + int old; + + old = WriteMode; + WriteMode = mode; + return (old); } -void SetBufferCommitInfoNeedsSave(Buffer buffer) +void +SetBufferCommitInfoNeedsSave(Buffer buffer) { - if ( !BufferIsLocal(buffer) ) - CommitInfoNeedsSave[buffer - 1]++; + if (!BufferIsLocal(buffer)) + CommitInfoNeedsSave[buffer - 1]++; } diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index f4e7bcdc57a..94a8e84b8c6 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * freelist.c-- - * routines for manipulating the buffer pool's replacement strategy - * freelist. + * routines for manipulating the buffer pool's replacement strategy + * freelist. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.4 1997/08/19 21:32:44 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.5 1997/09/07 04:48:22 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -16,23 +16,23 @@ * OLD COMMENTS * * Data Structures: - * SharedFreeList is a circular queue. Notice that this - * is a shared memory queue so the next/prev "ptrs" are - * buffer ids, not addresses. + * SharedFreeList is a circular queue. Notice that this + * is a shared memory queue so the next/prev "ptrs" are + * buffer ids, not addresses. * * Sync: all routines in this file assume that the buffer - * semaphore has been acquired by the caller. + * semaphore has been acquired by the caller. */ #include <stdio.h> #include "postgres.h" #include "storage/bufmgr.h" -#include "storage/buf_internals.h" /* where declarations go */ +#include "storage/buf_internals.h" /* where declarations go */ #include "storage/spin.h" -static BufferDesc *SharedFreeList; +static BufferDesc *SharedFreeList; /* only actually used in debugging. The lock * should be acquired before calling the freelist manager. @@ -40,40 +40,40 @@ static BufferDesc *SharedFreeList; extern SPINLOCK BufMgrLock; #define IsInQueue(bf) \ - Assert((bf->freeNext != INVALID_DESCRIPTOR));\ - Assert((bf->freePrev != INVALID_DESCRIPTOR));\ - Assert((bf->flags & BM_FREE)) + Assert((bf->freeNext != INVALID_DESCRIPTOR));\ + Assert((bf->freePrev != INVALID_DESCRIPTOR));\ + Assert((bf->flags & BM_FREE)) #define NotInQueue(bf) \ - Assert((bf->freeNext == INVALID_DESCRIPTOR));\ - Assert((bf->freePrev == INVALID_DESCRIPTOR));\ - Assert(! (bf->flags & BM_FREE)) + Assert((bf->freeNext == INVALID_DESCRIPTOR));\ + Assert((bf->freePrev == INVALID_DESCRIPTOR));\ + Assert(! (bf->flags & BM_FREE)) /* - * AddBufferToFreelist -- + * AddBufferToFreelist -- * * In theory, this is the only routine that needs to be changed - * if the buffer replacement strategy changes. Just change + * if the buffer replacement strategy changes. Just change * the manner in which buffers are added to the freelist queue. * Currently, they are added on an LRU basis. */ void -AddBufferToFreelist(BufferDesc *bf) +AddBufferToFreelist(BufferDesc * bf) { #ifdef BMTRACE - _bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum, - BufferDescriptorGetBuffer(bf), BMT_DEALLOC); -#endif /* BMTRACE */ - NotInQueue(bf); - - /* change bf so it points to inFrontOfNew and its successor */ - bf->freePrev = SharedFreeList->freePrev; - bf->freeNext = Free_List_Descriptor; - - /* insert new into chain */ - BufferDescriptors[bf->freeNext].freePrev = bf->buf_id; - BufferDescriptors[bf->freePrev].freeNext = bf->buf_id; + _bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum, + BufferDescriptorGetBuffer(bf), BMT_DEALLOC); +#endif /* BMTRACE */ + NotInQueue(bf); + + /* change bf so it points to inFrontOfNew and its successor */ + bf->freePrev = SharedFreeList->freePrev; + bf->freeNext = Free_List_Descriptor; + + /* insert new into chain */ + BufferDescriptors[bf->freeNext].freePrev = bf->buf_id; + BufferDescriptors[bf->freePrev].freeNext = bf->buf_id; } #undef PinBuffer @@ -82,47 +82,52 @@ AddBufferToFreelist(BufferDesc *bf) * PinBuffer -- make buffer unavailable for replacement. */ void -PinBuffer(BufferDesc *buf) +PinBuffer(BufferDesc * buf) { - long b; - - /* Assert (buf->refcount < 25); */ - - if (buf->refcount == 0) { - IsInQueue(buf); - - /* remove from freelist queue */ - BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; - BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; - buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; - - /* mark buffer as no longer free */ - buf->flags &= ~BM_FREE; - } else { - NotInQueue(buf); - } - - b = BufferDescriptorGetBuffer(buf) - 1; - Assert(PrivateRefCount[b] >= 0); - if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) - buf->refcount++; - PrivateRefCount[b]++; + long b; + + /* Assert (buf->refcount < 25); */ + + if (buf->refcount == 0) + { + IsInQueue(buf); + + /* remove from freelist queue */ + BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; + BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; + buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; + + /* mark buffer as no longer free */ + buf->flags &= ~BM_FREE; + } + else + { + NotInQueue(buf); + } + + b = BufferDescriptorGetBuffer(buf) - 1; + Assert(PrivateRefCount[b] >= 0); + if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) + buf->refcount++; + PrivateRefCount[b]++; } #ifdef NOT_USED void -PinBuffer_Debug(char *file, int line, BufferDesc *buf) +PinBuffer_Debug(char *file, int line, BufferDesc * buf) { - PinBuffer(buf); - if (ShowPinTrace) { - Buffer buffer = BufferDescriptorGetBuffer(buf); - - fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \ + PinBuffer(buf); + if (ShowPinTrace) + { + Buffer buffer = BufferDescriptorGetBuffer(buf); + + fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif #undef UnpinBuffer @@ -131,95 +136,102 @@ refcount = %ld, file: %s, line: %d\n", * UnpinBuffer -- make buffer available for replacement. */ void -UnpinBuffer(BufferDesc *buf) +UnpinBuffer(BufferDesc * buf) { - long b = BufferDescriptorGetBuffer(buf) - 1; - - Assert(buf->refcount); - Assert(PrivateRefCount[b] > 0); - PrivateRefCount[b]--; - if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) - buf->refcount--; - NotInQueue(buf); - - if (buf->refcount == 0) { - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - } else { - /* do nothing */ - } + long b = BufferDescriptorGetBuffer(buf) - 1; + + Assert(buf->refcount); + Assert(PrivateRefCount[b] > 0); + PrivateRefCount[b]--; + if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) + buf->refcount--; + NotInQueue(buf); + + if (buf->refcount == 0) + { + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + } + else + { + /* do nothing */ + } } #ifdef NOT_USED void -UnpinBuffer_Debug(char *file, int line, BufferDesc *buf) +UnpinBuffer_Debug(char *file, int line, BufferDesc * buf) { - UnpinBuffer(buf); - if (ShowPinTrace) { - Buffer buffer = BufferDescriptorGetBuffer(buf); - - fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \ + UnpinBuffer(buf); + if (ShowPinTrace) + { + Buffer buffer = BufferDescriptorGetBuffer(buf); + + fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \ refcount = %ld, file: %s, line: %d\n", - buffer, buf->sb_relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } + buffer, buf->sb_relname, buf->tag.blockNum, + PrivateRefCount[buffer - 1], file, line); + } } + #endif /* * GetFreeBuffer() -- get the 'next' buffer from the freelist. * */ -BufferDesc * +BufferDesc * GetFreeBuffer() { - BufferDesc *buf; - - if (Free_List_Descriptor == SharedFreeList->freeNext) { - - /* queue is empty. All buffers in the buffer pool are pinned. */ - elog(WARN,"out of free buffers: time to abort !\n"); - return(NULL); - } - buf = &(BufferDescriptors[SharedFreeList->freeNext]); - - /* remove from freelist queue */ - BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; - BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; - buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; - - buf->flags &= ~(BM_FREE); - - return(buf); + BufferDesc *buf; + + if (Free_List_Descriptor == SharedFreeList->freeNext) + { + + /* queue is empty. All buffers in the buffer pool are pinned. */ + elog(WARN, "out of free buffers: time to abort !\n"); + return (NULL); + } + buf = &(BufferDescriptors[SharedFreeList->freeNext]); + + /* remove from freelist queue */ + BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; + BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; + buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; + + buf->flags &= ~(BM_FREE); + + return (buf); } /* * InitFreeList -- initialize the dummy buffer descriptor used - * as a freelist head. + * as a freelist head. * * Assume: All of the buffers are already linked in a circular - * queue. Only called by postmaster and only during - * initialization. + * queue. Only called by postmaster and only during + * initialization. */ void InitFreeList(bool init) { - SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]); - - if (init) { - /* we only do this once, normally the postmaster */ - SharedFreeList->data = INVALID_OFFSET; - SharedFreeList->flags = 0; - SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE); - SharedFreeList->buf_id = Free_List_Descriptor; - - /* insert it into a random spot in the circular queue */ - SharedFreeList->freeNext = BufferDescriptors[0].freeNext; - SharedFreeList->freePrev = 0; - BufferDescriptors[SharedFreeList->freeNext].freePrev = - BufferDescriptors[SharedFreeList->freePrev].freeNext = - Free_List_Descriptor; - } + SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]); + + if (init) + { + /* we only do this once, normally the postmaster */ + SharedFreeList->data = INVALID_OFFSET; + SharedFreeList->flags = 0; + SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE); + SharedFreeList->buf_id = Free_List_Descriptor; + + /* insert it into a random spot in the circular queue */ + SharedFreeList->freeNext = BufferDescriptors[0].freeNext; + SharedFreeList->freePrev = 0; + BufferDescriptors[SharedFreeList->freeNext].freePrev = + BufferDescriptors[SharedFreeList->freePrev].freeNext = + Free_List_Descriptor; + } } @@ -230,67 +242,78 @@ InitFreeList(bool init) void DBG_FreeListCheck(int nfree) { - int i; - BufferDesc *buf; - - buf = &(BufferDescriptors[SharedFreeList->freeNext]); - for (i=0;i<nfree;i++,buf = &(BufferDescriptors[buf->freeNext])) { - - if (! (buf->flags & (BM_FREE))){ - if (buf != SharedFreeList) { - printf("\tfree list corrupted: %d flags %x\n", - buf->buf_id,buf->flags); - } else { - printf("\tfree list corrupted: too short -- %d not %d\n", - i,nfree); - - } - - + int i; + BufferDesc *buf; + + buf = &(BufferDescriptors[SharedFreeList->freeNext]); + for (i = 0; i < nfree; i++, buf = &(BufferDescriptors[buf->freeNext])) + { + + if (!(buf->flags & (BM_FREE))) + { + if (buf != SharedFreeList) + { + printf("\tfree list corrupted: %d flags %x\n", + buf->buf_id, buf->flags); + } + else + { + printf("\tfree list corrupted: too short -- %d not %d\n", + i, nfree); + + } + + + } + if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) || + (BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) + { + printf("\tfree list links corrupted: %d %ld %ld\n", + buf->buf_id, buf->freePrev, buf->freeNext); + } + } - if ((BufferDescriptors[buf->freeNext].freePrev != buf->buf_id) || - (BufferDescriptors[buf->freePrev].freeNext != buf->buf_id)) { - printf("\tfree list links corrupted: %d %ld %ld\n", - buf->buf_id,buf->freePrev,buf->freeNext); + if (buf != SharedFreeList) + { + printf("\tfree list corrupted: %d-th buffer is %d\n", + nfree, buf->buf_id); + } - - } - if (buf != SharedFreeList) { - printf("\tfree list corrupted: %d-th buffer is %d\n", - nfree,buf->buf_id); - - } } + #endif #ifdef NOT_USED /* * PrintBufferFreeList - - * prints the buffer free list, for debugging + * prints the buffer free list, for debugging */ static void PrintBufferFreeList() { - BufferDesc *buf; - - if (SharedFreeList->freeNext == Free_List_Descriptor) { - printf("free list is empty.\n"); - return; - } - - buf = &(BufferDescriptors[SharedFreeList->freeNext]); - for (;;) { - int i = (buf - BufferDescriptors); - printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n", - i, buf->sb_relname, buf->tag.blockNum, - buf->flags, buf->refcount, PrivateRefCount[i], - buf->freeNext, buf->freePrev); - - if (buf->freeNext == Free_List_Descriptor) - break; - - buf = &(BufferDescriptors[buf->freeNext]); - } + BufferDesc *buf; + + if (SharedFreeList->freeNext == Free_List_Descriptor) + { + printf("free list is empty.\n"); + return; + } + + buf = &(BufferDescriptors[SharedFreeList->freeNext]); + for (;;) + { + int i = (buf - BufferDescriptors); + + printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n", + i, buf->sb_relname, buf->tag.blockNum, + buf->flags, buf->refcount, PrivateRefCount[i], + buf->freeNext, buf->freePrev); + + if (buf->freeNext == Free_List_Descriptor) + break; + + buf = &(BufferDescriptors[buf->freeNext]); + } } #endif diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 910cb668d7a..072830b3dd6 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -1,21 +1,21 @@ /*------------------------------------------------------------------------- * * localbuf.c-- - * local buffer manager. Fast buffer manager for temporary tables - * or special cases when the operation is not visible to other backends. + * local buffer manager. Fast buffer manager for temporary tables + * or special cases when the operation is not visible to other backends. * - * When a relation is being created, the descriptor will have rd_islocal - * set to indicate that the local buffer manager should be used. During - * the same transaction the relation is being created, any inserts or - * selects from the newly created relation will use the local buffer - * pool. rd_islocal is reset at the end of a transaction (commit/abort). - * This is useful for queries like SELECT INTO TABLE and create index. + * When a relation is being created, the descriptor will have rd_islocal + * set to indicate that the local buffer manager should be used. During + * the same transaction the relation is being created, any inserts or + * selects from the newly created relation will use the local buffer + * pool. rd_islocal is reset at the end of a transaction (commit/abort). + * This is useful for queries like SELECT INTO TABLE and create index. * * Copyright (c) 1994-5, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.8 1997/07/28 00:54:48 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.9 1997/09/07 04:48:23 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -45,252 +45,262 @@ #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/relcache.h" -#include "executor/execdebug.h" /* for NDirectFileRead */ +#include "executor/execdebug.h" /* for NDirectFileRead */ #include "catalog/catalog.h" extern long int LocalBufferFlushCount; -int NLocBuffer = 64; -BufferDesc *LocalBufferDescriptors = NULL; -long *LocalRefCount = NULL; +int NLocBuffer = 64; +BufferDesc *LocalBufferDescriptors = NULL; +long *LocalRefCount = NULL; -static int nextFreeLocalBuf = 0; +static int nextFreeLocalBuf = 0; /*#define LBDEBUG*/ /* * LocalBufferAlloc - - * allocate a local buffer. We do round robin allocation for now. + * allocate a local buffer. We do round robin allocation for now. */ -BufferDesc * -LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) +BufferDesc * +LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool * foundPtr) { - int i; - BufferDesc *bufHdr = (BufferDesc *) NULL; + int i; + BufferDesc *bufHdr = (BufferDesc *) NULL; - if (blockNum == P_NEW) { - blockNum = reln->rd_nblocks; - reln->rd_nblocks++; - } + if (blockNum == P_NEW) + { + blockNum = reln->rd_nblocks; + reln->rd_nblocks++; + } - /* a low tech search for now -- not optimized for scans */ - for (i=0; i < NLocBuffer; i++) { - if (LocalBufferDescriptors[i].tag.relId.relId == reln->rd_id && - LocalBufferDescriptors[i].tag.blockNum == blockNum) { + /* a low tech search for now -- not optimized for scans */ + for (i = 0; i < NLocBuffer; i++) + { + if (LocalBufferDescriptors[i].tag.relId.relId == reln->rd_id && + LocalBufferDescriptors[i].tag.blockNum == blockNum) + { #ifdef LBDEBUG - fprintf(stderr, "LB ALLOC (%d,%d) %d\n", - reln->rd_id, blockNum, -i-1); -#endif - LocalRefCount[i]++; - *foundPtr = TRUE; - return &LocalBufferDescriptors[i]; + fprintf(stderr, "LB ALLOC (%d,%d) %d\n", + reln->rd_id, blockNum, -i - 1); +#endif + LocalRefCount[i]++; + *foundPtr = TRUE; + return &LocalBufferDescriptors[i]; + } } - } #ifdef LBDEBUG - fprintf(stderr, "LB ALLOC (%d,%d) %d\n", - reln->rd_id, blockNum, -nextFreeLocalBuf-1); -#endif - - /* need to get a new buffer (round robin for now) */ - for(i=0; i < NLocBuffer; i++) { - int b = (nextFreeLocalBuf + i) % NLocBuffer; - - if (LocalRefCount[b]==0) { - bufHdr = &LocalBufferDescriptors[b]; - LocalRefCount[b]++; - nextFreeLocalBuf = (b + 1) % NLocBuffer; - break; + fprintf(stderr, "LB ALLOC (%d,%d) %d\n", + reln->rd_id, blockNum, -nextFreeLocalBuf - 1); +#endif + + /* need to get a new buffer (round robin for now) */ + for (i = 0; i < NLocBuffer; i++) + { + int b = (nextFreeLocalBuf + i) % NLocBuffer; + + if (LocalRefCount[b] == 0) + { + bufHdr = &LocalBufferDescriptors[b]; + LocalRefCount[b]++; + nextFreeLocalBuf = (b + 1) % NLocBuffer; + break; + } } - } - if (bufHdr==NULL) - elog(WARN, "no empty local buffer."); - - /* - * this buffer is not referenced but it might still be dirty (the - * last transaction to touch it doesn't need its contents but has - * not flushed it). if that's the case, write it out before - * reusing it! - */ - if (bufHdr->flags & BM_DIRTY) { - Relation bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + if (bufHdr == NULL) + elog(WARN, "no empty local buffer."); - Assert(bufrel != NULL); - - /* flush this page */ - smgrwrite(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - LocalBufferFlushCount++; - } - - /* - * it's all ours now. - */ - bufHdr->tag.relId.relId = reln->rd_id; - bufHdr->tag.blockNum = blockNum; - bufHdr->flags &= ~BM_DIRTY; - - /* - * lazy memory allocation. (see MAKE_PTR for why we need to do - * MAKE_OFFSET.) - */ - if (bufHdr->data == (SHMEM_OFFSET)0) { - char *data = (char *)malloc(BLCKSZ); - - bufHdr->data = MAKE_OFFSET(data); - } - - *foundPtr = FALSE; - return bufHdr; + /* + * this buffer is not referenced but it might still be dirty (the last + * transaction to touch it doesn't need its contents but has not + * flushed it). if that's the case, write it out before reusing it! + */ + if (bufHdr->flags & BM_DIRTY) + { + Relation bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + + Assert(bufrel != NULL); + + /* flush this page */ + smgrwrite(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + LocalBufferFlushCount++; + } + + /* + * it's all ours now. + */ + bufHdr->tag.relId.relId = reln->rd_id; + bufHdr->tag.blockNum = blockNum; + bufHdr->flags &= ~BM_DIRTY; + + /* + * lazy memory allocation. (see MAKE_PTR for why we need to do + * MAKE_OFFSET.) + */ + if (bufHdr->data == (SHMEM_OFFSET) 0) + { + char *data = (char *) malloc(BLCKSZ); + + bufHdr->data = MAKE_OFFSET(data); + } + + *foundPtr = FALSE; + return bufHdr; } /* * WriteLocalBuffer - - * writes out a local buffer + * writes out a local buffer */ int WriteLocalBuffer(Buffer buffer, bool release) { - int bufid; + int bufid; - Assert(BufferIsLocal(buffer)); + Assert(BufferIsLocal(buffer)); #ifdef LBDEBUG - fprintf(stderr, "LB WRITE %d\n", buffer); -#endif - - bufid = - (buffer + 1); - LocalBufferDescriptors[bufid].flags |= BM_DIRTY; + fprintf(stderr, "LB WRITE %d\n", buffer); +#endif - if (release) { - Assert(LocalRefCount[bufid] > 0); - LocalRefCount[bufid]--; - } + bufid = -(buffer + 1); + LocalBufferDescriptors[bufid].flags |= BM_DIRTY; + + if (release) + { + Assert(LocalRefCount[bufid] > 0); + LocalRefCount[bufid]--; + } - return true; + return true; } /* * FlushLocalBuffer - - * flushes a local buffer + * flushes a local buffer */ int FlushLocalBuffer(Buffer buffer, bool release) { - int bufid; - Relation bufrel; - BufferDesc *bufHdr; + int bufid; + Relation bufrel; + BufferDesc *bufHdr; - Assert(BufferIsLocal(buffer)); + Assert(BufferIsLocal(buffer)); #ifdef LBDEBUG - fprintf(stderr, "LB FLUSH %d\n", buffer); -#endif - - bufid = - (buffer + 1); - bufHdr = &LocalBufferDescriptors[bufid]; - bufHdr->flags &= ~BM_DIRTY; - bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); - - Assert(bufrel != NULL); - smgrflush(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - LocalBufferFlushCount++; - - Assert(LocalRefCount[bufid] > 0); - if ( release ) - LocalRefCount[bufid]--; - - return true; + fprintf(stderr, "LB FLUSH %d\n", buffer); +#endif + + bufid = -(buffer + 1); + bufHdr = &LocalBufferDescriptors[bufid]; + bufHdr->flags &= ~BM_DIRTY; + bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); + + Assert(bufrel != NULL); + smgrflush(bufrel->rd_rel->relsmgr, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + LocalBufferFlushCount++; + + Assert(LocalRefCount[bufid] > 0); + if (release) + LocalRefCount[bufid]--; + + return true; } /* * InitLocalBuffer - - * init the local buffer cache. Since most queries (esp. multi-user ones) - * don't involve local buffers, we delay allocating memory for actual the - * buffer until we need it. + * init the local buffer cache. Since most queries (esp. multi-user ones) + * don't involve local buffers, we delay allocating memory for actual the + * buffer until we need it. */ void InitLocalBuffer(void) { - int i; - - /* - * these aren't going away. I'm not gonna use palloc. - */ - LocalBufferDescriptors = - (BufferDesc *)malloc(sizeof(BufferDesc) * NLocBuffer); - memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer); - nextFreeLocalBuf = 0; - - for (i = 0; i < NLocBuffer; i++) { - BufferDesc *buf = &LocalBufferDescriptors[i]; + int i; /* - * negative to indicate local buffer. This is tricky: shared buffers - * start with 0. We have to start with -2. (Note that the routine - * BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id - * is -1.) + * these aren't going away. I'm not gonna use palloc. */ - buf->buf_id = - i - 2; - } + LocalBufferDescriptors = + (BufferDesc *) malloc(sizeof(BufferDesc) * NLocBuffer); + memset(LocalBufferDescriptors, 0, sizeof(BufferDesc) * NLocBuffer); + nextFreeLocalBuf = 0; + + for (i = 0; i < NLocBuffer; i++) + { + BufferDesc *buf = &LocalBufferDescriptors[i]; + + /* + * negative to indicate local buffer. This is tricky: shared + * buffers start with 0. We have to start with -2. (Note that the + * routine BufferDescriptorGetBuffer adds 1 to buf_id so our first + * buffer id is -1.) + */ + buf->buf_id = -i - 2; + } - LocalRefCount = - (long *)malloc(sizeof(long) * NLocBuffer); - memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); + LocalRefCount = + (long *) malloc(sizeof(long) * NLocBuffer); + memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); } /* * LocalBufferSync - - * flush all dirty buffers in the local buffer cache. Since the buffer - * cache is only used for keeping relations visible during a transaction, - * we will not need these buffers again. + * flush all dirty buffers in the local buffer cache. Since the buffer + * cache is only used for keeping relations visible during a transaction, + * we will not need these buffers again. */ void LocalBufferSync(void) { - int i; - - for (i = 0; i < NLocBuffer; i++) { - BufferDesc *buf = &LocalBufferDescriptors[i]; - Relation bufrel; + int i; + + for (i = 0; i < NLocBuffer; i++) + { + BufferDesc *buf = &LocalBufferDescriptors[i]; + Relation bufrel; - if (buf->flags & BM_DIRTY) { + if (buf->flags & BM_DIRTY) + { #ifdef LBDEBUG - fprintf(stderr, "LB SYNC %d\n", -i-1); -#endif - bufrel = RelationIdCacheGetRelation(buf->tag.relId.relId); - - Assert(bufrel != NULL); - - smgrwrite(bufrel->rd_rel->relsmgr, bufrel, buf->tag.blockNum, - (char *) MAKE_PTR(buf->data)); - LocalBufferFlushCount++; - - buf->tag.relId.relId = InvalidOid; - buf->flags &= ~BM_DIRTY; + fprintf(stderr, "LB SYNC %d\n", -i - 1); +#endif + bufrel = RelationIdCacheGetRelation(buf->tag.relId.relId); + + Assert(bufrel != NULL); + + smgrwrite(bufrel->rd_rel->relsmgr, bufrel, buf->tag.blockNum, + (char *) MAKE_PTR(buf->data)); + LocalBufferFlushCount++; + + buf->tag.relId.relId = InvalidOid; + buf->flags &= ~BM_DIRTY; + } } - } - memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; + memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); + nextFreeLocalBuf = 0; } void ResetLocalBufferPool(void) { - int i; + int i; - for (i = 0; i < NLocBuffer; i++) - { - BufferDesc *buf = &LocalBufferDescriptors[i]; + for (i = 0; i < NLocBuffer; i++) + { + BufferDesc *buf = &LocalBufferDescriptors[i]; - buf->tag.relId.relId = InvalidOid; - buf->flags &= ~BM_DIRTY; - buf->buf_id = - i - 2; - } + buf->tag.relId.relId = InvalidOid; + buf->flags &= ~BM_DIRTY; + buf->buf_id = -i - 2; + } - memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; + memset(LocalRefCount, 0, sizeof(long) * NLocBuffer); + nextFreeLocalBuf = 0; } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 03605332344..2e998f27196 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -1,12 +1,12 @@ /*------------------------------------------------------------------------- * * fd.c-- - * Virtual file descriptor code. + * Virtual file descriptor code. * * Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Id: fd.c,v 1.22 1997/08/19 21:32:48 momjian Exp $ + * $Id: fd.c,v 1.23 1997/09/07 04:48:25 momjian Exp $ * * NOTES: * @@ -48,7 +48,7 @@ #include <fcntl.h> #include "postgres.h" -#include "miscadmin.h" /* for DataDir */ +#include "miscadmin.h" /* for DataDir */ #include "utils/palloc.h" #include "storage/fd.h" @@ -67,8 +67,8 @@ * dynamic loading. Keep this here.) */ #ifndef RESERVE_FOR_LD -#define RESERVE_FOR_LD 10 -#endif +#define RESERVE_FOR_LD 10 +#endif /* * We need to ensure that we have at least some file descriptors @@ -85,9 +85,9 @@ /* Debugging.... */ #ifdef FDDEBUG -# define DO_DB(A) A +#define DO_DB(A) A #else -# define DO_DB(A) /* A */ +#define DO_DB(A) /* A */ #endif #define VFD_CLOSED -1 @@ -97,474 +97,501 @@ #define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED) -typedef struct vfd { - signed short fd; - unsigned short fdstate; +typedef struct vfd +{ + signed short fd; + unsigned short fdstate; -#define FD_DIRTY (1 << 0) +#define FD_DIRTY (1 << 0) - File nextFree; - File lruMoreRecently; - File lruLessRecently; - long seekPos; - char *fileName; - int fileFlags; - int fileMode; -} Vfd; + File nextFree; + File lruMoreRecently; + File lruLessRecently; + long seekPos; + char *fileName; + int fileFlags; + int fileMode; +} Vfd; /* - * Virtual File Descriptor array pointer and size. This grows as + * Virtual File Descriptor array pointer and size. This grows as * needed. */ -static Vfd *VfdCache; -static Size SizeVfdCache = 0; +static Vfd *VfdCache; +static Size SizeVfdCache = 0; /* * Number of file descriptors known to be open. */ -static int nfile = 0; +static int nfile = 0; -static char Sep_char = '/'; +static char Sep_char = '/'; /* * Private Routines * - * Delete - delete a file from the Lru ring - * LruDelete - remove a file from the Lru ring and close - * Insert - put a file at the front of the Lru ring - * LruInsert - put a file at the front of the Lru ring and open + * Delete - delete a file from the Lru ring + * LruDelete - remove a file from the Lru ring and close + * Insert - put a file at the front of the Lru ring + * LruInsert - put a file at the front of the Lru ring and open * AssertLruRoom - make sure that there is a free fd. * * the Last Recently Used ring is a doubly linked list that begins and * ends on element zero. Element zero is special -- it doesn't represent - * a file and its "fd" field always == VFD_CLOSED. Element zero is just an + * a file and its "fd" field always == VFD_CLOSED. Element zero is just an * anchor that shows us the beginning/end of the ring. * * example: * - * /--less----\ /---------\ - * v \ v \ - * #0 --more---> LeastRecentlyUsed --more-\ \ - * ^\ | | - * \\less--> MostRecentlyUsedFile <---/ | - * \more---/ \--less--/ + * /--less----\ /---------\ + * v \ v \ + * #0 --more---> LeastRecentlyUsed --more-\ \ + * ^\ | | + * \\less--> MostRecentlyUsedFile <---/ | + * \more---/ \--less--/ * - * AllocateVfd - grab a free (or new) file record (from VfdArray) - * FreeVfd - free a file record + * AllocateVfd - grab a free (or new) file record (from VfdArray) + * FreeVfd - free a file record * */ -static void Delete(File file); -static void LruDelete(File file); -static void Insert(File file); -static int LruInsert (File file); -static void AssertLruRoom(void); -static File AllocateVfd(void); -static void FreeVfd(File file); - -static int FileAccess(File file); -static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode); -static char *filepath(char *filename); -static long pg_nofile(void); +static void Delete(File file); +static void LruDelete(File file); +static void Insert(File file); +static int LruInsert(File file); +static void AssertLruRoom(void); +static File AllocateVfd(void); +static void FreeVfd(File file); + +static int FileAccess(File file); +static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode); +static char *filepath(char *filename); +static long pg_nofile(void); int pg_fsync(int fd) { - extern int fsyncOff; - return fsyncOff ? 0 : fsync(fd); + extern int fsyncOff; + + return fsyncOff ? 0 : fsync(fd); } + #define fsync pg_fsync long pg_nofile(void) { - static long no_files = 0; + static long no_files = 0; - if (no_files == 0) { -#ifndef HAVE_SYSCONF - no_files = (long)NOFILE; + if (no_files == 0) + { +#ifndef HAVE_SYSCONF + no_files = (long) NOFILE; #else - no_files = sysconf(_SC_OPEN_MAX); - if (no_files == -1) { - elog(DEBUG,"pg_nofile: Unable to get _SC_OPEN_MAX using sysconf() using (%d)", NOFILE); - no_files = (long)NOFILE; + no_files = sysconf(_SC_OPEN_MAX); + if (no_files == -1) + { + elog(DEBUG, "pg_nofile: Unable to get _SC_OPEN_MAX using sysconf() using (%d)", NOFILE); + no_files = (long) NOFILE; } -#endif - } +#endif + } if ((no_files - RESERVE_FOR_LD) < FD_MINFREE) - elog(FATAL,"pg_nofile: insufficient File Descriptors in postmaster to start backend (%ld).\n" - " O/S allows %ld, Postmaster reserves %d, We need %d (MIN) after that.", - no_files - RESERVE_FOR_LD, no_files, RESERVE_FOR_LD, FD_MINFREE); - return no_files - RESERVE_FOR_LD; + elog(FATAL, "pg_nofile: insufficient File Descriptors in postmaster to start backend (%ld).\n" + " O/S allows %ld, Postmaster reserves %d, We need %d (MIN) after that.", + no_files - RESERVE_FOR_LD, no_files, RESERVE_FOR_LD, FD_MINFREE); + return no_files - RESERVE_FOR_LD; } #if defined(FDDEBUG) static void _dump_lru() { - int mru = VfdCache[0].lruLessRecently; - Vfd *vfdP = &VfdCache[mru]; - char buf[2048]; - - sprintf(buf, "LRU: MOST %d ", mru); - while (mru != 0) - { - mru = vfdP->lruLessRecently; - vfdP = &VfdCache[mru]; - sprintf (buf + strlen(buf), "%d ", mru); - } - sprintf(buf + strlen(buf), "LEAST"); - elog (DEBUG, buf); + int mru = VfdCache[0].lruLessRecently; + Vfd *vfdP = &VfdCache[mru]; + char buf[2048]; + + sprintf(buf, "LRU: MOST %d ", mru); + while (mru != 0) + { + mru = vfdP->lruLessRecently; + vfdP = &VfdCache[mru]; + sprintf(buf + strlen(buf), "%d ", mru); + } + sprintf(buf + strlen(buf), "LEAST"); + elog(DEBUG, buf); } -#endif /* FDDEBUG */ + +#endif /* FDDEBUG */ static void Delete(File file) { - Vfd *fileP; - - DO_DB(elog (DEBUG, "Delete %d (%s)", - file, VfdCache[file].fileName)); - DO_DB(_dump_lru()); - - Assert(file != 0); - - fileP = &VfdCache[file]; - - VfdCache[fileP->lruLessRecently].lruMoreRecently = - VfdCache[file].lruMoreRecently; - VfdCache[fileP->lruMoreRecently].lruLessRecently = - VfdCache[file].lruLessRecently; - - DO_DB(_dump_lru()); + Vfd *fileP; + + DO_DB(elog(DEBUG, "Delete %d (%s)", + file, VfdCache[file].fileName)); + DO_DB(_dump_lru()); + + Assert(file != 0); + + fileP = &VfdCache[file]; + + VfdCache[fileP->lruLessRecently].lruMoreRecently = + VfdCache[file].lruMoreRecently; + VfdCache[fileP->lruMoreRecently].lruLessRecently = + VfdCache[file].lruLessRecently; + + DO_DB(_dump_lru()); } static void LruDelete(File file) { - Vfd *fileP; - int returnValue; - - DO_DB(elog (DEBUG, "LruDelete %d (%s)", - file, VfdCache[file].fileName)); - - Assert(file != 0); - - fileP = &VfdCache[file]; - - /* delete the vfd record from the LRU ring */ - Delete(file); - - /* save the seek position */ - fileP->seekPos = (long) lseek(fileP->fd, 0L, SEEK_CUR); - Assert( fileP->seekPos != -1); - - /* if we have written to the file, sync it */ - if (fileP->fdstate & FD_DIRTY) { - returnValue = fsync(fileP->fd); - Assert(returnValue != -1); - fileP->fdstate &= ~FD_DIRTY; - } - - /* close the file */ - returnValue = close(fileP->fd); - Assert(returnValue != -1); - - --nfile; - fileP->fd = VFD_CLOSED; + Vfd *fileP; + int returnValue; + + DO_DB(elog(DEBUG, "LruDelete %d (%s)", + file, VfdCache[file].fileName)); + + Assert(file != 0); + + fileP = &VfdCache[file]; + + /* delete the vfd record from the LRU ring */ + Delete(file); + + /* save the seek position */ + fileP->seekPos = (long) lseek(fileP->fd, 0L, SEEK_CUR); + Assert(fileP->seekPos != -1); + + /* if we have written to the file, sync it */ + if (fileP->fdstate & FD_DIRTY) + { + returnValue = fsync(fileP->fd); + Assert(returnValue != -1); + fileP->fdstate &= ~FD_DIRTY; + } + + /* close the file */ + returnValue = close(fileP->fd); + Assert(returnValue != -1); + + --nfile; + fileP->fd = VFD_CLOSED; } static void Insert(File file) { - Vfd *vfdP; - - DO_DB(elog(DEBUG, "Insert %d (%s)", - file, VfdCache[file].fileName)); - DO_DB(_dump_lru()); - - vfdP = &VfdCache[file]; - - vfdP->lruMoreRecently = 0; - vfdP->lruLessRecently = VfdCache[0].lruLessRecently; - VfdCache[0].lruLessRecently = file; - VfdCache[vfdP->lruLessRecently].lruMoreRecently = file; - - DO_DB(_dump_lru()); + Vfd *vfdP; + + DO_DB(elog(DEBUG, "Insert %d (%s)", + file, VfdCache[file].fileName)); + DO_DB(_dump_lru()); + + vfdP = &VfdCache[file]; + + vfdP->lruMoreRecently = 0; + vfdP->lruLessRecently = VfdCache[0].lruLessRecently; + VfdCache[0].lruLessRecently = file; + VfdCache[vfdP->lruLessRecently].lruMoreRecently = file; + + DO_DB(_dump_lru()); } static int -LruInsert (File file) +LruInsert(File file) { - Vfd *vfdP; - int returnValue; - - DO_DB(elog(DEBUG, "LruInsert %d (%s)", - file, VfdCache[file].fileName)); - - vfdP = &VfdCache[file]; - - if (FileIsNotOpen(file)) { - - if ( nfile >= pg_nofile() ) - AssertLruRoom(); - - /* - * Note, we check to see if there's a free file descriptor - * before attempting to open a file. One general way to do - * this is to try to open the null device which everybody - * should be able to open all the time. If this fails, we - * assume this is because there's no free file descriptors. - */ - tryAgain: - vfdP->fd = open(vfdP->fileName,vfdP->fileFlags,vfdP->fileMode); - if (vfdP->fd < 0 && (errno == EMFILE || errno == ENFILE)) { - errno = 0; - AssertLruRoom(); - goto tryAgain; - } - - if (vfdP->fd < 0) { - DO_DB(elog(DEBUG, "RE_OPEN FAILED: %d", - errno)); - return (vfdP->fd); - } else { - DO_DB(elog (DEBUG, "RE_OPEN SUCCESS")); - ++nfile; - } - - /* seek to the right position */ - if (vfdP->seekPos != 0L) { - returnValue = - lseek(vfdP->fd, vfdP->seekPos, SEEK_SET); - Assert(returnValue != -1); - } - - /* init state on open */ - vfdP->fdstate = 0x0; - - } - - /* - * put it at the head of the Lru ring - */ - - Insert(file); - - return (0); + Vfd *vfdP; + int returnValue; + + DO_DB(elog(DEBUG, "LruInsert %d (%s)", + file, VfdCache[file].fileName)); + + vfdP = &VfdCache[file]; + + if (FileIsNotOpen(file)) + { + + if (nfile >= pg_nofile()) + AssertLruRoom(); + + /* + * Note, we check to see if there's a free file descriptor before + * attempting to open a file. One general way to do this is to try + * to open the null device which everybody should be able to open + * all the time. If this fails, we assume this is because there's + * no free file descriptors. + */ +tryAgain: + vfdP->fd = open(vfdP->fileName, vfdP->fileFlags, vfdP->fileMode); + if (vfdP->fd < 0 && (errno == EMFILE || errno == ENFILE)) + { + errno = 0; + AssertLruRoom(); + goto tryAgain; + } + + if (vfdP->fd < 0) + { + DO_DB(elog(DEBUG, "RE_OPEN FAILED: %d", + errno)); + return (vfdP->fd); + } + else + { + DO_DB(elog(DEBUG, "RE_OPEN SUCCESS")); + ++nfile; + } + + /* seek to the right position */ + if (vfdP->seekPos != 0L) + { + returnValue = + lseek(vfdP->fd, vfdP->seekPos, SEEK_SET); + Assert(returnValue != -1); + } + + /* init state on open */ + vfdP->fdstate = 0x0; + + } + + /* + * put it at the head of the Lru ring + */ + + Insert(file); + + return (0); } static void AssertLruRoom() { - DO_DB(elog(DEBUG, "AssertLruRoom. Opened %d", nfile)); - - if ( nfile <= 0 ) - elog (FATAL, "AssertLruRoom: No opened files - no one can be closed"); - /* - * There are opened files and so there should be at least one used vfd - * in the ring. - */ - Assert(VfdCache[0].lruMoreRecently != 0); - LruDelete(VfdCache[0].lruMoreRecently); + DO_DB(elog(DEBUG, "AssertLruRoom. Opened %d", nfile)); + + if (nfile <= 0) + elog(FATAL, "AssertLruRoom: No opened files - no one can be closed"); + + /* + * There are opened files and so there should be at least one used vfd + * in the ring. + */ + Assert(VfdCache[0].lruMoreRecently != 0); + LruDelete(VfdCache[0].lruMoreRecently); } -static File +static File AllocateVfd() { - Index i; - File file; - - DO_DB(elog(DEBUG, "AllocateVfd. Size %d", SizeVfdCache)); - - if (SizeVfdCache == 0) { - - /* initialize */ - VfdCache = (Vfd *)malloc(sizeof(Vfd)); - VfdCache->nextFree = 0; - VfdCache->lruMoreRecently = 0; - VfdCache->lruLessRecently = 0; - VfdCache->fd = VFD_CLOSED; - VfdCache->fdstate = 0x0; - - SizeVfdCache = 1; - } - - if (VfdCache[0].nextFree == 0) - { - /* - * The free list is empty so it is time to increase the - * size of the array - */ - - VfdCache =(Vfd *)realloc(VfdCache, sizeof(Vfd)*SizeVfdCache*2); - Assert(VfdCache != NULL); - - /* - * Set up the free list for the new entries - */ - - for (i = SizeVfdCache; i < 2*SizeVfdCache; i++) { - memset((char *) &(VfdCache[i]), 0, sizeof(VfdCache[0])); - VfdCache[i].nextFree = i+1; - VfdCache[i].fd = VFD_CLOSED; - } - - /* - * Element 0 is the first and last element of the free - * list - */ - - VfdCache[0].nextFree = SizeVfdCache; - VfdCache[2*SizeVfdCache-1].nextFree = 0; - - /* - * Record the new size - */ - - SizeVfdCache *= 2; - } - file = VfdCache[0].nextFree; - - VfdCache[0].nextFree = VfdCache[file].nextFree; - - return file; + Index i; + File file; + + DO_DB(elog(DEBUG, "AllocateVfd. Size %d", SizeVfdCache)); + + if (SizeVfdCache == 0) + { + + /* initialize */ + VfdCache = (Vfd *) malloc(sizeof(Vfd)); + VfdCache->nextFree = 0; + VfdCache->lruMoreRecently = 0; + VfdCache->lruLessRecently = 0; + VfdCache->fd = VFD_CLOSED; + VfdCache->fdstate = 0x0; + + SizeVfdCache = 1; + } + + if (VfdCache[0].nextFree == 0) + { + + /* + * The free list is empty so it is time to increase the size of + * the array + */ + + VfdCache = (Vfd *) realloc(VfdCache, sizeof(Vfd) * SizeVfdCache * 2); + Assert(VfdCache != NULL); + + /* + * Set up the free list for the new entries + */ + + for (i = SizeVfdCache; i < 2 * SizeVfdCache; i++) + { + memset((char *) &(VfdCache[i]), 0, sizeof(VfdCache[0])); + VfdCache[i].nextFree = i + 1; + VfdCache[i].fd = VFD_CLOSED; + } + + /* + * Element 0 is the first and last element of the free list + */ + + VfdCache[0].nextFree = SizeVfdCache; + VfdCache[2 * SizeVfdCache - 1].nextFree = 0; + + /* + * Record the new size + */ + + SizeVfdCache *= 2; + } + file = VfdCache[0].nextFree; + + VfdCache[0].nextFree = VfdCache[file].nextFree; + + return file; } static void FreeVfd(File file) { - DO_DB(elog(DEBUG, "FreeVfd: %d (%s)", - file, VfdCache[file].fileName)); - - VfdCache[file].nextFree = VfdCache[0].nextFree; - VfdCache[0].nextFree = file; + DO_DB(elog(DEBUG, "FreeVfd: %d (%s)", + file, VfdCache[file].fileName)); + + VfdCache[file].nextFree = VfdCache[0].nextFree; + VfdCache[0].nextFree = file; } -static char * +static char * filepath(char *filename) { - char *buf; - char basename[16]; - int len; - - if (*filename != Sep_char) { - /* Either /base/ or \base\ */ - sprintf(basename, "%cbase%c", Sep_char, Sep_char); - - len = strlen(DataDir) + strlen(basename) + strlen(GetDatabaseName()) - + strlen(filename) + 2; - buf = (char*) palloc(len); - sprintf(buf, "%s%s%s%c%s", - DataDir, basename, GetDatabaseName(), Sep_char, filename); - } else { - buf = (char *) palloc(strlen(filename) + 1); - strcpy(buf, filename); - } - - return(buf); + char *buf; + char basename[16]; + int len; + + if (*filename != Sep_char) + { + /* Either /base/ or \base\ */ + sprintf(basename, "%cbase%c", Sep_char, Sep_char); + + len = strlen(DataDir) + strlen(basename) + strlen(GetDatabaseName()) + + strlen(filename) + 2; + buf = (char *) palloc(len); + sprintf(buf, "%s%s%s%c%s", + DataDir, basename, GetDatabaseName(), Sep_char, filename); + } + else + { + buf = (char *) palloc(strlen(filename) + 1); + strcpy(buf, filename); + } + + return (buf); } static int FileAccess(File file) { - int returnValue; - - DO_DB(elog(DEBUG, "FileAccess %d (%s)", - file, VfdCache[file].fileName)); - - /* - * Is the file open? If not, close the least recently used, - * then open it and stick it at the head of the used ring - */ - - if (FileIsNotOpen(file)) { - - returnValue = LruInsert(file); - if (returnValue != 0) - return returnValue; - - } else { - - /* - * We now know that the file is open and that it is not the - * last one accessed, so we need to more it to the head of - * the Lru ring. - */ - - Delete(file); - Insert(file); - } - - return (0); + int returnValue; + + DO_DB(elog(DEBUG, "FileAccess %d (%s)", + file, VfdCache[file].fileName)); + + /* + * Is the file open? If not, close the least recently used, then open + * it and stick it at the head of the used ring + */ + + if (FileIsNotOpen(file)) + { + + returnValue = LruInsert(file); + if (returnValue != 0) + return returnValue; + + } + else + { + + /* + * We now know that the file is open and that it is not the last + * one accessed, so we need to more it to the head of the Lru + * ring. + */ + + Delete(file); + Insert(file); + } + + return (0); } /* - * Called when we get a shared invalidation message on some relation. + * Called when we get a shared invalidation message on some relation. */ #ifdef NOT_USED void FileInvalidate(File file) { - Assert(file > 0); - if (!FileIsNotOpen(file)) { - LruDelete(file); - } + Assert(file > 0); + if (!FileIsNotOpen(file)) + { + LruDelete(file); + } } + #endif /* VARARGS2 */ -static File +static File fileNameOpenFile(FileName fileName, - int fileFlags, - int fileMode) + int fileFlags, + int fileMode) { - File file; - Vfd *vfdP; - - DO_DB(elog(DEBUG, "fileNameOpenFile: %s %x %o", - fileName, fileFlags, fileMode)); - - file = AllocateVfd(); - vfdP = &VfdCache[file]; - - if ( nfile >= pg_nofile() ) - AssertLruRoom(); - - tryAgain: - vfdP->fd = open(fileName,fileFlags,fileMode); - if (vfdP->fd < 0 && (errno == EMFILE || errno == ENFILE)) { - DO_DB(elog(DEBUG, "fileNameOpenFile: not enough descs, retry, er= %d", - errno)); - errno = 0; - AssertLruRoom(); - goto tryAgain; - } - - vfdP->fdstate = 0x0; - - if (vfdP->fd < 0) { - FreeVfd(file); - return -1; - } - ++nfile; - DO_DB(elog(DEBUG, "fileNameOpenFile: success %d", - vfdP->fd)); - - Insert(file); - - if (fileName==NULL) { - elog(WARN, "fileNameOpenFile: NULL fname"); - } - vfdP->fileName = malloc(strlen(fileName)+1); - strcpy(vfdP->fileName,fileName); - - vfdP->fileFlags = fileFlags & ~(O_TRUNC|O_EXCL); - vfdP->fileMode = fileMode; - vfdP->seekPos = 0; - - return file; + File file; + Vfd *vfdP; + + DO_DB(elog(DEBUG, "fileNameOpenFile: %s %x %o", + fileName, fileFlags, fileMode)); + + file = AllocateVfd(); + vfdP = &VfdCache[file]; + + if (nfile >= pg_nofile()) + AssertLruRoom(); + +tryAgain: + vfdP->fd = open(fileName, fileFlags, fileMode); + if (vfdP->fd < 0 && (errno == EMFILE || errno == ENFILE)) + { + DO_DB(elog(DEBUG, "fileNameOpenFile: not enough descs, retry, er= %d", + errno)); + errno = 0; + AssertLruRoom(); + goto tryAgain; + } + + vfdP->fdstate = 0x0; + + if (vfdP->fd < 0) + { + FreeVfd(file); + return -1; + } + ++nfile; + DO_DB(elog(DEBUG, "fileNameOpenFile: success %d", + vfdP->fd)); + + Insert(file); + + if (fileName == NULL) + { + elog(WARN, "fileNameOpenFile: NULL fname"); + } + vfdP->fileName = malloc(strlen(fileName) + 1); + strcpy(vfdP->fileName, fileName); + + vfdP->fileFlags = fileFlags & ~(O_TRUNC | O_EXCL); + vfdP->fileMode = fileMode; + vfdP->seekPos = 0; + + return file; } /* @@ -573,13 +600,13 @@ fileNameOpenFile(FileName fileName, File FileNameOpenFile(FileName fileName, int fileFlags, int fileMode) { - File fd; - char *fname; - - fname = filepath(fileName); - fd = fileNameOpenFile(fname, fileFlags, fileMode); - pfree(fname); - return(fd); + File fd; + char *fname; + + fname = filepath(fileName); + fd = fileNameOpenFile(fname, fileFlags, fileMode); + pfree(fname); + return (fd); } /* @@ -588,150 +615,162 @@ FileNameOpenFile(FileName fileName, int fileFlags, int fileMode) File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode) { - return(fileNameOpenFile(fileName, fileFlags, fileMode)); + return (fileNameOpenFile(fileName, fileFlags, fileMode)); } void FileClose(File file) { - int returnValue; - - DO_DB(elog(DEBUG, "FileClose: %d (%s)", - file, VfdCache[file].fileName)); - - if (!FileIsNotOpen(file)) { - - /* remove the file from the lru ring */ - Delete(file); - - /* if we did any writes, sync the file before closing */ - if (VfdCache[file].fdstate & FD_DIRTY) { - returnValue = fsync(VfdCache[file].fd); - Assert(returnValue != -1); - VfdCache[file].fdstate &= ~FD_DIRTY; - } - - /* close the file */ - returnValue = close(VfdCache[file].fd); - Assert(returnValue != -1); - - --nfile; - VfdCache[file].fd = VFD_CLOSED; - } - /* - * Add the Vfd slot to the free list - */ - FreeVfd(file); - /* - * Free the filename string - */ - free(VfdCache[file].fileName); + int returnValue; + + DO_DB(elog(DEBUG, "FileClose: %d (%s)", + file, VfdCache[file].fileName)); + + if (!FileIsNotOpen(file)) + { + + /* remove the file from the lru ring */ + Delete(file); + + /* if we did any writes, sync the file before closing */ + if (VfdCache[file].fdstate & FD_DIRTY) + { + returnValue = fsync(VfdCache[file].fd); + Assert(returnValue != -1); + VfdCache[file].fdstate &= ~FD_DIRTY; + } + + /* close the file */ + returnValue = close(VfdCache[file].fd); + Assert(returnValue != -1); + + --nfile; + VfdCache[file].fd = VFD_CLOSED; + } + + /* + * Add the Vfd slot to the free list + */ + FreeVfd(file); + + /* + * Free the filename string + */ + free(VfdCache[file].fileName); } void FileUnlink(File file) { - int returnValue; - - DO_DB(elog(DEBUG, "FileUnlink: %d (%s)", - file, VfdCache[file].fileName)); - - if (!FileIsNotOpen(file)) { - - /* remove the file from the lru ring */ - Delete(file); - - /* if we did any writes, sync the file before closing */ - if (VfdCache[file].fdstate & FD_DIRTY) { - returnValue = fsync(VfdCache[file].fd); - Assert(returnValue != -1); - VfdCache[file].fdstate &= ~FD_DIRTY; - } - - /* close the file */ - returnValue = close(VfdCache[file].fd); - Assert(returnValue != -1); - - --nfile; - VfdCache[file].fd = VFD_CLOSED; - } - /* add the Vfd slot to the free list */ - FreeVfd(file); - - /* free the filename string */ - unlink(VfdCache[file].fileName); - free(VfdCache[file].fileName); + int returnValue; + + DO_DB(elog(DEBUG, "FileUnlink: %d (%s)", + file, VfdCache[file].fileName)); + + if (!FileIsNotOpen(file)) + { + + /* remove the file from the lru ring */ + Delete(file); + + /* if we did any writes, sync the file before closing */ + if (VfdCache[file].fdstate & FD_DIRTY) + { + returnValue = fsync(VfdCache[file].fd); + Assert(returnValue != -1); + VfdCache[file].fdstate &= ~FD_DIRTY; + } + + /* close the file */ + returnValue = close(VfdCache[file].fd); + Assert(returnValue != -1); + + --nfile; + VfdCache[file].fd = VFD_CLOSED; + } + /* add the Vfd slot to the free list */ + FreeVfd(file); + + /* free the filename string */ + unlink(VfdCache[file].fileName); + free(VfdCache[file].fileName); } int FileRead(File file, char *buffer, int amount) { - int returnCode; - - DO_DB(elog(DEBUG, "FileRead: %d (%s) %d %p", - file, VfdCache[file].fileName, amount, buffer)); - - FileAccess(file); - returnCode = read(VfdCache[file].fd, buffer, amount); - if (returnCode > 0) { - VfdCache[file].seekPos += returnCode; - } - - return returnCode; + int returnCode; + + DO_DB(elog(DEBUG, "FileRead: %d (%s) %d %p", + file, VfdCache[file].fileName, amount, buffer)); + + FileAccess(file); + returnCode = read(VfdCache[file].fd, buffer, amount); + if (returnCode > 0) + { + VfdCache[file].seekPos += returnCode; + } + + return returnCode; } int FileWrite(File file, char *buffer, int amount) { - int returnCode; - - DO_DB(elog(DEBUG, "FileWrite: %d (%s) %d %p", - file, VfdCache[file].fileName, amount, buffer)); - - FileAccess(file); - returnCode = write(VfdCache[file].fd, buffer, amount); - if (returnCode > 0) { /* changed by Boris with Mao's advice */ - VfdCache[file].seekPos += returnCode; - } - - /* record the write */ - VfdCache[file].fdstate |= FD_DIRTY; - - return returnCode; + int returnCode; + + DO_DB(elog(DEBUG, "FileWrite: %d (%s) %d %p", + file, VfdCache[file].fileName, amount, buffer)); + + FileAccess(file); + returnCode = write(VfdCache[file].fd, buffer, amount); + if (returnCode > 0) + { /* changed by Boris with Mao's advice */ + VfdCache[file].seekPos += returnCode; + } + + /* record the write */ + VfdCache[file].fdstate |= FD_DIRTY; + + return returnCode; } long FileSeek(File file, long offset, int whence) { - int returnCode; - - DO_DB(elog (DEBUG, "FileSeek: %d (%s) %ld %d", - file, VfdCache[file].fileName, offset, whence)); - - if (FileIsNotOpen(file)) { - switch(whence) { - case SEEK_SET: - VfdCache[file].seekPos = offset; - return offset; - case SEEK_CUR: - VfdCache[file].seekPos = VfdCache[file].seekPos +offset; - return VfdCache[file].seekPos; - case SEEK_END: - FileAccess(file); - returnCode = VfdCache[file].seekPos = - lseek(VfdCache[file].fd, offset, whence); - return returnCode; - default: - elog(WARN, "FileSeek: invalid whence: %d", whence); - break; - } - } else { - returnCode = VfdCache[file].seekPos = - lseek(VfdCache[file].fd, offset, whence); - return returnCode; - } - /*NOTREACHED*/ - return(-1L); + int returnCode; + + DO_DB(elog(DEBUG, "FileSeek: %d (%s) %ld %d", + file, VfdCache[file].fileName, offset, whence)); + + if (FileIsNotOpen(file)) + { + switch (whence) + { + case SEEK_SET: + VfdCache[file].seekPos = offset; + return offset; + case SEEK_CUR: + VfdCache[file].seekPos = VfdCache[file].seekPos + offset; + return VfdCache[file].seekPos; + case SEEK_END: + FileAccess(file); + returnCode = VfdCache[file].seekPos = + lseek(VfdCache[file].fd, offset, whence); + return returnCode; + default: + elog(WARN, "FileSeek: invalid whence: %d", whence); + break; + } + } + else + { + returnCode = VfdCache[file].seekPos = + lseek(VfdCache[file].fd, offset, whence); + return returnCode; + } + /* NOTREACHED */ + return (-1L); } /* @@ -741,58 +780,61 @@ FileSeek(File file, long offset, int whence) long FileTell(File file) { - DO_DB(elog(DEBUG, "FileTell %d (%s)", - file, VfdCache[file].fileName)); - return VfdCache[file].seekPos; + DO_DB(elog(DEBUG, "FileTell %d (%s)", + file, VfdCache[file].fileName)); + return VfdCache[file].seekPos; } + #endif int FileTruncate(File file, int offset) { - int returnCode; - - DO_DB(elog(DEBUG, "FileTruncate %d (%s)", - file, VfdCache[file].fileName)); - - FileSync(file); - FileAccess(file); - returnCode = ftruncate(VfdCache[file].fd, offset); - return(returnCode); + int returnCode; + + DO_DB(elog(DEBUG, "FileTruncate %d (%s)", + file, VfdCache[file].fileName)); + + FileSync(file); + FileAccess(file); + returnCode = ftruncate(VfdCache[file].fd, offset); + return (returnCode); } int FileSync(File file) { - int returnCode; - - /* - * If the file isn't open, then we don't need to sync it; we - * always sync files when we close them. Also, if we haven't - * done any writes that we haven't already synced, we can ignore - * the request. - */ - - if (VfdCache[file].fd < 0 || !(VfdCache[file].fdstate & FD_DIRTY)) { - returnCode = 0; - } else { - returnCode = fsync(VfdCache[file].fd); - VfdCache[file].fdstate &= ~FD_DIRTY; - } - - return returnCode; + int returnCode; + + /* + * If the file isn't open, then we don't need to sync it; we always + * sync files when we close them. Also, if we haven't done any writes + * that we haven't already synced, we can ignore the request. + */ + + if (VfdCache[file].fd < 0 || !(VfdCache[file].fdstate & FD_DIRTY)) + { + returnCode = 0; + } + else + { + returnCode = fsync(VfdCache[file].fd); + VfdCache[file].fdstate &= ~FD_DIRTY; + } + + return returnCode; } int FileNameUnlink(char *filename) { - int retval; - char *fname; + int retval; + char *fname; - fname = filepath(filename); - retval = unlink(fname); - pfree(fname); - return(retval); + fname = filepath(filename); + retval = unlink(fname); + pfree(fname); + return (retval); } /* @@ -804,33 +846,36 @@ FileNameUnlink(char *filename) * allocatedFiles keeps track of how many have been allocated so we * can give a warning if there are too few left. */ -static int allocatedFiles = 0; +static int allocatedFiles = 0; -FILE * +FILE * AllocateFile(char *name, char *mode) { - FILE *file; - int fdleft; - - DO_DB(elog(DEBUG, "AllocateFile: Allocated %d.", allocatedFiles)); + FILE *file; + int fdleft; + + DO_DB(elog(DEBUG, "AllocateFile: Allocated %d.", allocatedFiles)); TryAgain: - if ((file = fopen(name, mode)) == NULL) { - if (errno == EMFILE || errno == ENFILE) { - DO_DB(elog(DEBUG, "AllocateFile: not enough descs, retry, er= %d", - errno)); - errno = 0; - AssertLruRoom(); - goto TryAgain; - } - } - else { - ++allocatedFiles; - fdleft = pg_nofile() - allocatedFiles; - if (fdleft < 6) - elog(NOTICE,"warning: few usable file descriptors left (%d)", fdleft); - } - return file; + if ((file = fopen(name, mode)) == NULL) + { + if (errno == EMFILE || errno == ENFILE) + { + DO_DB(elog(DEBUG, "AllocateFile: not enough descs, retry, er= %d", + errno)); + errno = 0; + AssertLruRoom(); + goto TryAgain; + } + } + else + { + ++allocatedFiles; + fdleft = pg_nofile() - allocatedFiles; + if (fdleft < 6) + elog(NOTICE, "warning: few usable file descriptors left (%d)", fdleft); + } + return file; } /* @@ -838,22 +883,24 @@ TryAgain: * AllocateFile()? */ void -FreeFile(FILE *file) +FreeFile(FILE * file) { - DO_DB(elog(DEBUG, "FreeFile: Allocated %d.", allocatedFiles)); + DO_DB(elog(DEBUG, "FreeFile: Allocated %d.", allocatedFiles)); - Assert(allocatedFiles > 0); - fclose(file); - --allocatedFiles; + Assert(allocatedFiles > 0); + fclose(file); + --allocatedFiles; } void closeAllVfds() { - int i; - Assert (FileIsNotOpen(0)); /* Make sure ring not corrupted */ - for (i=1; i<SizeVfdCache; i++) { - if (!FileIsNotOpen(i)) - LruDelete(i); - } + int i; + + Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */ + for (i = 1; i < SizeVfdCache; i++) + { + if (!FileIsNotOpen(i)) + LruDelete(i); + } } diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c index a5573e89151..3dd6d2ec094 100644 --- a/src/backend/storage/ipc/ipc.c +++ b/src/backend/storage/ipc/ipc.c @@ -1,26 +1,26 @@ /*------------------------------------------------------------------------- * * ipc.c-- - * POSTGRES inter-process communication definitions. + * POSTGRES inter-process communication definitions. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.11 1997/08/19 21:32:54 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.12 1997/09/07 04:48:30 momjian Exp $ * * NOTES * - * Currently, semaphores are used (my understanding anyway) in two - * different ways: - * 1. as mutexes on machines that don't have test-and-set (eg. - * mips R3000). - * 2. for putting processes to sleep when waiting on a lock - * and waking them up when the lock is free. - * The number of semaphores in (1) is fixed and those are shared - * among all backends. In (2), there is 1 semaphore per process and those - * are not shared with anyone else. - * -ay 4/95 + * Currently, semaphores are used (my understanding anyway) in two + * different ways: + * 1. as mutexes on machines that don't have test-and-set (eg. + * mips R3000). + * 2. for putting processes to sleep when waiting on a lock + * and waking them up when the lock is free. + * The number of semaphores in (1) is fixed and those are shared + * among all backends. In (2), there is 1 semaphore per process and those + * are not shared with anyone else. + * -ay 4/95 * *------------------------------------------------------------------------- */ @@ -44,94 +44,98 @@ #endif #if defined(bsd44) -int UsePrivateMemory = 1; +int UsePrivateMemory = 1; + #else -int UsePrivateMemory = 0; +int UsePrivateMemory = 0; + #endif -static void IpcMemoryDetach(int status, char *shmaddr); +static void IpcMemoryDetach(int status, char *shmaddr); /* ---------------------------------------------------------------- - * exit() handling stuff + * exit() handling stuff * ---------------------------------------------------------------- */ #define MAX_ON_EXITS 20 -static struct ONEXIT { - void (*function)(); - caddr_t arg; -} onexit_list[ MAX_ON_EXITS ]; +static struct ONEXIT +{ + void (*function) (); + caddr_t arg; +} onexit_list[MAX_ON_EXITS]; -static int onexit_index; -static void IpcConfigTip(void); +static int onexit_index; +static void IpcConfigTip(void); -typedef struct _PrivateMemStruct { - int id; - char *memptr; -} PrivateMem; +typedef struct _PrivateMemStruct +{ + int id; + char *memptr; +} PrivateMem; -PrivateMem IpcPrivateMem[16]; +PrivateMem IpcPrivateMem[16]; static int PrivateMemoryCreate(IpcMemoryKey memKey, - uint32 size) + uint32 size) { - static int memid = 0; - - UsePrivateMemory = 1; - - IpcPrivateMem[memid].id = memid; - IpcPrivateMem[memid].memptr = malloc(size); - if (IpcPrivateMem[memid].memptr == NULL) - elog(WARN, "PrivateMemoryCreate: not enough memory to malloc"); - memset(IpcPrivateMem[memid].memptr, 0, size); /* XXX PURIFY */ - - return (memid++); + static int memid = 0; + + UsePrivateMemory = 1; + + IpcPrivateMem[memid].id = memid; + IpcPrivateMem[memid].memptr = malloc(size); + if (IpcPrivateMem[memid].memptr == NULL) + elog(WARN, "PrivateMemoryCreate: not enough memory to malloc"); + memset(IpcPrivateMem[memid].memptr, 0, size); /* XXX PURIFY */ + + return (memid++); } -static char * +static char * PrivateMemoryAttach(IpcMemoryId memid) { - return ( IpcPrivateMem[memid].memptr ); + return (IpcPrivateMem[memid].memptr); } /* ---------------------------------------------------------------- - * exitpg + * exitpg * - * this function calls all the callbacks registered - * for it (to free resources) and then calls exit. - * This should be the only function to call exit(). - * -cim 2/6/90 + * this function calls all the callbacks registered + * for it (to free resources) and then calls exit. + * This should be the only function to call exit(). + * -cim 2/6/90 * ---------------------------------------------------------------- */ -static int exitpg_inprogress = 0; +static int exitpg_inprogress = 0; void exitpg(int code) { - int i; - - /* ---------------- - * if exitpg_inprocess is true, then it means that we - * are being invoked from within an on_exit() handler - * and so we return immediately to avoid recursion. - * ---------------- - */ - if (exitpg_inprogress) - return; - - exitpg_inprogress = 1; - - /* ---------------- - * call all the callbacks registered before calling exit(). - * ---------------- - */ - for (i = onexit_index - 1; i >= 0; --i) - (*onexit_list[i].function)(code, onexit_list[i].arg); - - exit(code); + int i; + + /* ---------------- + * if exitpg_inprocess is true, then it means that we + * are being invoked from within an on_exit() handler + * and so we return immediately to avoid recursion. + * ---------------- + */ + if (exitpg_inprogress) + return; + + exitpg_inprogress = 1; + + /* ---------------- + * call all the callbacks registered before calling exit(). + * ---------------- + */ + for (i = onexit_index - 1; i >= 0; --i) + (*onexit_list[i].function) (code, onexit_list[i].arg); + + exit(code); } /* ------------------ @@ -143,591 +147,628 @@ exitpg(int code) void quasi_exitpg() { - int i; - - /* ---------------- - * if exitpg_inprocess is true, then it means that we - * are being invoked from within an on_exit() handler - * and so we return immediately to avoid recursion. - * ---------------- - */ - if (exitpg_inprogress) - return; - - exitpg_inprogress = 1; - - /* ---------------- - * call all the callbacks registered before calling exit(). - * ---------------- - */ - for (i = onexit_index - 1; i >= 0; --i) - (*onexit_list[i].function)(0, onexit_list[i].arg); - - onexit_index = 0; - exitpg_inprogress = 0; + int i; + + /* ---------------- + * if exitpg_inprocess is true, then it means that we + * are being invoked from within an on_exit() handler + * and so we return immediately to avoid recursion. + * ---------------- + */ + if (exitpg_inprogress) + return; + + exitpg_inprogress = 1; + + /* ---------------- + * call all the callbacks registered before calling exit(). + * ---------------- + */ + for (i = onexit_index - 1; i >= 0; --i) + (*onexit_list[i].function) (0, onexit_list[i].arg); + + onexit_index = 0; + exitpg_inprogress = 0; } /* ---------------------------------------------------------------- - * on_exitpg + * on_exitpg * - * this function adds a callback function to the list of - * functions invoked by exitpg(). -cim 2/6/90 + * this function adds a callback function to the list of + * functions invoked by exitpg(). -cim 2/6/90 * ---------------------------------------------------------------- */ int -on_exitpg(void (*function)(), caddr_t arg) + on_exitpg(void (*function) (), caddr_t arg) { - if (onexit_index >= MAX_ON_EXITS) - return(-1); - - onexit_list[ onexit_index ].function = function; - onexit_list[ onexit_index ].arg = arg; - - ++onexit_index; - - return(0); + if (onexit_index >= MAX_ON_EXITS) + return (-1); + + onexit_list[onexit_index].function = function; + onexit_list[onexit_index].arg = arg; + + ++onexit_index; + + return (0); } /****************************************************************************/ -/* IPCPrivateSemaphoreKill(status, semId) */ -/* */ +/* IPCPrivateSemaphoreKill(status, semId) */ +/* */ /****************************************************************************/ static void IPCPrivateSemaphoreKill(int status, - int semId) /* caddr_t */ + int semId) /* caddr_t */ { - union semun semun; - semctl(semId, 0, IPC_RMID, semun); + union semun semun; + + semctl(semId, 0, IPC_RMID, semun); } /****************************************************************************/ -/* IPCPrivateMemoryKill(status, shmId) */ -/* */ +/* IPCPrivateMemoryKill(status, shmId) */ +/* */ /****************************************************************************/ static void IPCPrivateMemoryKill(int status, - int shmId) /* caddr_t */ + int shmId) /* caddr_t */ { - if ( UsePrivateMemory ) { - /* free ( IpcPrivateMem[shmId].memptr ); */ - } else { - if (shmctl(shmId, IPC_RMID, (struct shmid_ds *) NULL) < 0) { - elog(NOTICE, "IPCPrivateMemoryKill: shmctl(%d, %d, 0) failed: %m", - shmId, IPC_RMID); + if (UsePrivateMemory) + { + /* free ( IpcPrivateMem[shmId].memptr ); */ + } + else + { + if (shmctl(shmId, IPC_RMID, (struct shmid_ds *) NULL) < 0) + { + elog(NOTICE, "IPCPrivateMemoryKill: shmctl(%d, %d, 0) failed: %m", + shmId, IPC_RMID); + } } - } } /****************************************************************************/ -/* IpcSemaphoreCreate(semKey, semNum, permission, semStartValue) */ -/* */ -/* - returns a semaphore identifier: */ -/* */ +/* IpcSemaphoreCreate(semKey, semNum, permission, semStartValue) */ +/* */ +/* - returns a semaphore identifier: */ +/* */ /* if key doesn't exist: return a new id, status:= IpcSemIdNotExist */ -/* if key exists: return the old id, status:= IpcSemIdExist */ -/* if semNum > MAX : return # of argument, status:=IpcInvalidArgument */ -/* */ +/* if key exists: return the old id, status:= IpcSemIdExist */ +/* if semNum > MAX : return # of argument, status:=IpcInvalidArgument */ +/* */ /****************************************************************************/ /* * Note: - * XXX This should be split into two different calls. One should - * XXX be used to create a semaphore set. The other to "attach" a + * XXX This should be split into two different calls. One should + * XXX be used to create a semaphore set. The other to "attach" a * XXX existing set. It should be an error for the semaphore set * XXX to to already exist or for it not to, respectively. * - * Currently, the semaphore sets are "attached" and an error - * is detected only when a later shared memory attach fails. + * Currently, the semaphore sets are "attached" and an error + * is detected only when a later shared memory attach fails. */ IpcSemaphoreId IpcSemaphoreCreate(IpcSemaphoreKey semKey, - int semNum, - int permission, - int semStartValue, - int removeOnExit, - int *status) + int semNum, + int permission, + int semStartValue, + int removeOnExit, + int *status) { - int i; - int errStatus; - int semId; - u_short array[IPC_NMAXSEM]; - union semun semun; - - /* get a semaphore if non-existent */ - /* check arguments */ - if (semNum > IPC_NMAXSEM || semNum <= 0) { - *status = IpcInvalidArgument; - return(2); /* returns the number of the invalid argument */ - } - - semId = semget(semKey, 0, 0); - - if (semId == -1) { - *status = IpcSemIdNotExist; /* there doesn't exist a semaphore */ + int i; + int errStatus; + int semId; + u_short array[IPC_NMAXSEM]; + union semun semun; + + /* get a semaphore if non-existent */ + /* check arguments */ + if (semNum > IPC_NMAXSEM || semNum <= 0) + { + *status = IpcInvalidArgument; + return (2); /* returns the number of the invalid + * argument */ + } + + semId = semget(semKey, 0, 0); + + if (semId == -1) + { + *status = IpcSemIdNotExist; /* there doesn't exist a semaphore */ #ifdef DEBUG_IPC - fprintf(stderr,"calling semget with %d, %d , %d\n", - semKey, - semNum, - IPC_CREAT|permission ); + fprintf(stderr, "calling semget with %d, %d , %d\n", + semKey, + semNum, + IPC_CREAT | permission); #endif - semId = semget(semKey, semNum, IPC_CREAT|permission); + semId = semget(semKey, semNum, IPC_CREAT | permission); + + if (semId < 0) + { + perror("semget"); + IpcConfigTip(); + exitpg(3); + } + for (i = 0; i < semNum; i++) + { + array[i] = semStartValue; + } + semun.array = array; + errStatus = semctl(semId, 0, SETALL, semun); + if (errStatus == -1) + { + perror("semctl"); + IpcConfigTip(); + } + + if (removeOnExit) + on_exitpg(IPCPrivateSemaphoreKill, (caddr_t) semId); - if (semId < 0) { - perror("semget"); - IpcConfigTip(); - exitpg(3); - } - for (i = 0; i < semNum; i++) { - array[i] = semStartValue; } - semun.array = array; - errStatus = semctl(semId, 0, SETALL, semun); - if (errStatus == -1) { - perror("semctl"); - IpcConfigTip(); + else + { + /* there is a semaphore id for this key */ + *status = IpcSemIdExist; } - - if (removeOnExit) - on_exitpg(IPCPrivateSemaphoreKill, (caddr_t)semId); - - } else { - /* there is a semaphore id for this key */ - *status = IpcSemIdExist; - } - + #ifdef DEBUG_IPC - fprintf(stderr,"\nIpcSemaphoreCreate, status %d, returns %d\n", - *status, - semId ); - fflush(stdout); - fflush(stderr); + fprintf(stderr, "\nIpcSemaphoreCreate, status %d, returns %d\n", + *status, + semId); + fflush(stdout); + fflush(stderr); #endif - return(semId); + return (semId); } /****************************************************************************/ -/* IpcSemaphoreSet() - sets the initial value of the semaphore */ -/* */ -/* note: the xxx_return variables are only used for debugging. */ +/* IpcSemaphoreSet() - sets the initial value of the semaphore */ +/* */ +/* note: the xxx_return variables are only used for debugging. */ /****************************************************************************/ #ifdef NOT_USED -static int IpcSemaphoreSet_return; +static int IpcSemaphoreSet_return; void IpcSemaphoreSet(int semId, int semno, int value) { - int errStatus; - union semun semun; - - semun.val = value; - errStatus = semctl(semId, semno, SETVAL, semun); - IpcSemaphoreSet_return = errStatus; - - if (errStatus == -1) - { - perror("semctl"); - IpcConfigTip(); - } + int errStatus; + union semun semun; + + semun.val = value; + errStatus = semctl(semId, semno, SETVAL, semun); + IpcSemaphoreSet_return = errStatus; + + if (errStatus == -1) + { + perror("semctl"); + IpcConfigTip(); + } } + #endif /****************************************************************************/ -/* IpcSemaphoreKill(key) - removes a semaphore */ -/* */ +/* IpcSemaphoreKill(key) - removes a semaphore */ +/* */ /****************************************************************************/ void IpcSemaphoreKill(IpcSemaphoreKey key) { - int semId; - union semun semun; - - /* kill semaphore if existent */ - - semId = semget(key, 0, 0); - if (semId != -1) - semctl(semId, 0, IPC_RMID, semun); + int semId; + union semun semun; + + /* kill semaphore if existent */ + + semId = semget(key, 0, 0); + if (semId != -1) + semctl(semId, 0, IPC_RMID, semun); } /****************************************************************************/ -/* IpcSemaphoreLock(semId, sem, lock) - locks a semaphore */ -/* */ -/* note: the xxx_return variables are only used for debugging. */ +/* IpcSemaphoreLock(semId, sem, lock) - locks a semaphore */ +/* */ +/* note: the xxx_return variables are only used for debugging. */ /****************************************************************************/ -static int IpcSemaphoreLock_return; +static int IpcSemaphoreLock_return; void IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock) { - extern int errno; - int errStatus; - struct sembuf sops; - - sops.sem_op = lock; - sops.sem_flg = 0; - sops.sem_num = sem; - - /* ---------------- - * Note: if errStatus is -1 and errno == EINTR then it means we - * returned from the operation prematurely because we were - * sent a signal. So we try and lock the semaphore again. - * I am not certain this is correct, but the semantics aren't - * clear it fixes problems with parallel abort synchronization, - * namely that after processing an abort signal, the semaphore - * call returns with -1 (and errno == EINTR) before it should. - * -cim 3/28/90 - * ---------------- - */ - do { - errStatus = semop(semId, &sops, 1); - } while (errStatus == -1 && errno == EINTR); - - IpcSemaphoreLock_return = errStatus; - - if (errStatus == -1) { - perror("semop"); - IpcConfigTip(); - exitpg(255); - } + extern int errno; + int errStatus; + struct sembuf sops; + + sops.sem_op = lock; + sops.sem_flg = 0; + sops.sem_num = sem; + + /* ---------------- + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were + * sent a signal. So we try and lock the semaphore again. + * I am not certain this is correct, but the semantics aren't + * clear it fixes problems with parallel abort synchronization, + * namely that after processing an abort signal, the semaphore + * call returns with -1 (and errno == EINTR) before it should. + * -cim 3/28/90 + * ---------------- + */ + do + { + errStatus = semop(semId, &sops, 1); + } while (errStatus == -1 && errno == EINTR); + + IpcSemaphoreLock_return = errStatus; + + if (errStatus == -1) + { + perror("semop"); + IpcConfigTip(); + exitpg(255); + } } /****************************************************************************/ -/* IpcSemaphoreUnlock(semId, sem, lock) - unlocks a semaphore */ -/* */ -/* note: the xxx_return variables are only used for debugging. */ +/* IpcSemaphoreUnlock(semId, sem, lock) - unlocks a semaphore */ +/* */ +/* note: the xxx_return variables are only used for debugging. */ /****************************************************************************/ -static int IpcSemaphoreUnlock_return; +static int IpcSemaphoreUnlock_return; void IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock) { - extern int errno; - int errStatus; - struct sembuf sops; - - sops.sem_op = -lock; - sops.sem_flg = 0; - sops.sem_num = sem; - - - /* ---------------- - * Note: if errStatus is -1 and errno == EINTR then it means we - * returned from the operation prematurely because we were - * sent a signal. So we try and lock the semaphore again. - * I am not certain this is correct, but the semantics aren't - * clear it fixes problems with parallel abort synchronization, - * namely that after processing an abort signal, the semaphore - * call returns with -1 (and errno == EINTR) before it should. - * -cim 3/28/90 - * ---------------- - */ - do { - errStatus = semop(semId, &sops, 1); - } while (errStatus == -1 && errno == EINTR); - - IpcSemaphoreUnlock_return = errStatus; - - if (errStatus == -1) { - perror("semop"); - IpcConfigTip(); - exitpg(255); - } + extern int errno; + int errStatus; + struct sembuf sops; + + sops.sem_op = -lock; + sops.sem_flg = 0; + sops.sem_num = sem; + + + /* ---------------- + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were + * sent a signal. So we try and lock the semaphore again. + * I am not certain this is correct, but the semantics aren't + * clear it fixes problems with parallel abort synchronization, + * namely that after processing an abort signal, the semaphore + * call returns with -1 (and errno == EINTR) before it should. + * -cim 3/28/90 + * ---------------- + */ + do + { + errStatus = semop(semId, &sops, 1); + } while (errStatus == -1 && errno == EINTR); + + IpcSemaphoreUnlock_return = errStatus; + + if (errStatus == -1) + { + perror("semop"); + IpcConfigTip(); + exitpg(255); + } } int -IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem) +IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem) { - int semncnt; - union semun dummy; /* for Solaris */ - - semncnt = semctl(semId, sem, GETNCNT, dummy); - return semncnt; + int semncnt; + union semun dummy; /* for Solaris */ + + semncnt = semctl(semId, sem, GETNCNT, dummy); + return semncnt; } int -IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem) +IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem) { - int semval; - union semun dummy; /* for Solaris */ - - semval = semctl(semId, sem, GETVAL, dummy); - return semval; + int semval; + union semun dummy; /* for Solaris */ + + semval = semctl(semId, sem, GETVAL, dummy); + return semval; } /****************************************************************************/ -/* IpcMemoryCreate(memKey) */ -/* */ -/* - returns the memory identifier, if creation succeeds */ -/* returns IpcMemCreationFailed, if failure */ +/* IpcMemoryCreate(memKey) */ +/* */ +/* - returns the memory identifier, if creation succeeds */ +/* returns IpcMemCreationFailed, if failure */ /****************************************************************************/ IpcMemoryId IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission) { - IpcMemoryId shmid; - - if (memKey == PrivateIPCKey) { - /* private */ - shmid = PrivateMemoryCreate(memKey, size); - }else { - shmid = shmget(memKey, size, IPC_CREAT|permission); - } - - if (shmid < 0) { - fprintf(stderr,"IpcMemoryCreate: memKey=%d , size=%d , permission=%d", - memKey, size , permission ); - perror("IpcMemoryCreate: shmget(..., create, ...) failed"); - IpcConfigTip(); - return(IpcMemCreationFailed); - } - - /* if (memKey == PrivateIPCKey) */ - on_exitpg(IPCPrivateMemoryKill, (caddr_t)shmid); - - return(shmid); + IpcMemoryId shmid; + + if (memKey == PrivateIPCKey) + { + /* private */ + shmid = PrivateMemoryCreate(memKey, size); + } + else + { + shmid = shmget(memKey, size, IPC_CREAT | permission); + } + + if (shmid < 0) + { + fprintf(stderr, "IpcMemoryCreate: memKey=%d , size=%d , permission=%d", + memKey, size, permission); + perror("IpcMemoryCreate: shmget(..., create, ...) failed"); + IpcConfigTip(); + return (IpcMemCreationFailed); + } + + /* if (memKey == PrivateIPCKey) */ + on_exitpg(IPCPrivateMemoryKill, (caddr_t) shmid); + + return (shmid); } /****************************************************************************/ -/* IpcMemoryIdGet(memKey, size) returns the shared memory Id */ -/* or IpcMemIdGetFailed */ +/* IpcMemoryIdGet(memKey, size) returns the shared memory Id */ +/* or IpcMemIdGetFailed */ /****************************************************************************/ IpcMemoryId IpcMemoryIdGet(IpcMemoryKey memKey, uint32 size) { - IpcMemoryId shmid; - - shmid = shmget(memKey, size, 0); - - if (shmid < 0) { - fprintf(stderr,"IpcMemoryIdGet: memKey=%d , size=%d , permission=%d", - memKey, size , 0 ); - perror("IpcMemoryIdGet: shmget() failed"); - IpcConfigTip(); - return(IpcMemIdGetFailed); - } - - return(shmid); + IpcMemoryId shmid; + + shmid = shmget(memKey, size, 0); + + if (shmid < 0) + { + fprintf(stderr, "IpcMemoryIdGet: memKey=%d , size=%d , permission=%d", + memKey, size, 0); + perror("IpcMemoryIdGet: shmget() failed"); + IpcConfigTip(); + return (IpcMemIdGetFailed); + } + + return (shmid); } /****************************************************************************/ -/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */ -/* from a backend address space */ -/* (only called by backends running under the postmaster) */ +/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */ +/* from a backend address space */ +/* (only called by backends running under the postmaster) */ /****************************************************************************/ static void IpcMemoryDetach(int status, char *shmaddr) { - if (shmdt(shmaddr) < 0) { - elog(NOTICE, "IpcMemoryDetach: shmdt(0x%x): %m", shmaddr); - } + if (shmdt(shmaddr) < 0) + { + elog(NOTICE, "IpcMemoryDetach: shmdt(0x%x): %m", shmaddr); + } } /****************************************************************************/ -/* IpcMemoryAttach(memId) returns the adress of shared memory */ -/* or IpcMemAttachFailed */ -/* */ -/* CALL IT: addr = (struct <MemoryStructure> *) IpcMemoryAttach(memId); */ -/* */ +/* IpcMemoryAttach(memId) returns the adress of shared memory */ +/* or IpcMemAttachFailed */ +/* */ +/* CALL IT: addr = (struct <MemoryStructure> *) IpcMemoryAttach(memId); */ +/* */ /****************************************************************************/ -char * +char * IpcMemoryAttach(IpcMemoryId memId) { - char *memAddress; - - if (UsePrivateMemory) { - memAddress = (char *) PrivateMemoryAttach(memId); - } else { - memAddress = (char *) shmat(memId, 0, 0); - } - - /* if ( *memAddress == -1) { XXX ??? */ - if ( memAddress == (char *)-1) { - perror("IpcMemoryAttach: shmat() failed"); - IpcConfigTip(); - return(IpcMemAttachFailed); - } - - if (!UsePrivateMemory) - on_exitpg(IpcMemoryDetach, (caddr_t) memAddress); - - return((char *) memAddress); + char *memAddress; + + if (UsePrivateMemory) + { + memAddress = (char *) PrivateMemoryAttach(memId); + } + else + { + memAddress = (char *) shmat(memId, 0, 0); + } + + /* if ( *memAddress == -1) { XXX ??? */ + if (memAddress == (char *) -1) + { + perror("IpcMemoryAttach: shmat() failed"); + IpcConfigTip(); + return (IpcMemAttachFailed); + } + + if (!UsePrivateMemory) + on_exitpg(IpcMemoryDetach, (caddr_t) memAddress); + + return ((char *) memAddress); } /****************************************************************************/ -/* IpcMemoryKill(memKey) removes a shared memory segment */ -/* (only called by the postmaster and standalone backends) */ +/* IpcMemoryKill(memKey) removes a shared memory segment */ +/* (only called by the postmaster and standalone backends) */ /****************************************************************************/ void IpcMemoryKill(IpcMemoryKey memKey) -{ - IpcMemoryId shmid; - - if (!UsePrivateMemory && (shmid = shmget(memKey, 0, 0)) >= 0) { - if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0) { - elog(NOTICE, "IpcMemoryKill: shmctl(%d, %d, 0) failed: %m", - shmid, IPC_RMID); +{ + IpcMemoryId shmid; + + if (!UsePrivateMemory && (shmid = shmget(memKey, 0, 0)) >= 0) + { + if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0) + { + elog(NOTICE, "IpcMemoryKill: shmctl(%d, %d, 0) failed: %m", + shmid, IPC_RMID); + } } - } -} +} #ifdef HAS_TEST_AND_SET /* ------------------ - * use hardware locks to replace semaphores for sequent machines - * to avoid costs of swapping processes and to provide unlimited - * supply of locks. + * use hardware locks to replace semaphores for sequent machines + * to avoid costs of swapping processes and to provide unlimited + * supply of locks. * ------------------ */ -static SLock *SLockArray = NULL; -static SLock **FreeSLockPP; -static int *UnusedSLockIP; +static SLock *SLockArray = NULL; +static SLock **FreeSLockPP; +static int *UnusedSLockIP; static slock_t *SLockMemoryLock; static IpcMemoryId SLockMemoryId = -1; -struct ipcdummy { /* to get alignment/size right */ - SLock *free; - int unused; - slock_t memlock; - SLock slocks[NSLOCKS]; +struct ipcdummy +{ /* to get alignment/size right */ + SLock *free; + int unused; + slock_t memlock; + SLock slocks[NSLOCKS]; }; -static int SLockMemorySize = sizeof(struct ipcdummy); +static int SLockMemorySize = sizeof(struct ipcdummy); void CreateAndInitSLockMemory(IPCKey key) { - int id; - SLock *slckP; - - SLockMemoryId = IpcMemoryCreate(key, - SLockMemorySize, - 0700); - AttachSLockMemory(key); - *FreeSLockPP = NULL; - *UnusedSLockIP = (int)FIRSTFREELOCKID; - for (id=0; id<(int)FIRSTFREELOCKID; id++) { - slckP = &(SLockArray[id]); - S_INIT_LOCK(&(slckP->locklock)); - slckP->flag = NOLOCK; - slckP->nshlocks = 0; - S_INIT_LOCK(&(slckP->shlock)); - S_INIT_LOCK(&(slckP->exlock)); - S_INIT_LOCK(&(slckP->comlock)); - slckP->next = NULL; - } - return; + int id; + SLock *slckP; + + SLockMemoryId = IpcMemoryCreate(key, + SLockMemorySize, + 0700); + AttachSLockMemory(key); + *FreeSLockPP = NULL; + *UnusedSLockIP = (int) FIRSTFREELOCKID; + for (id = 0; id < (int) FIRSTFREELOCKID; id++) + { + slckP = &(SLockArray[id]); + S_INIT_LOCK(&(slckP->locklock)); + slckP->flag = NOLOCK; + slckP->nshlocks = 0; + S_INIT_LOCK(&(slckP->shlock)); + S_INIT_LOCK(&(slckP->exlock)); + S_INIT_LOCK(&(slckP->comlock)); + slckP->next = NULL; + } + return; } void AttachSLockMemory(IPCKey key) { - struct ipcdummy *slockM; - - if (SLockMemoryId == -1) - SLockMemoryId = IpcMemoryIdGet(key,SLockMemorySize); - if (SLockMemoryId == -1) - elog(FATAL, "SLockMemory not in shared memory"); - slockM = (struct ipcdummy *) IpcMemoryAttach(SLockMemoryId); - if (slockM == IpcMemAttachFailed) - elog(FATAL, "AttachSLockMemory: could not attach segment"); - FreeSLockPP = (SLock **) &(slockM->free); - UnusedSLockIP = (int *) &(slockM->unused); - SLockMemoryLock = (slock_t *) &(slockM->memlock); - S_INIT_LOCK(SLockMemoryLock); - SLockArray = (SLock *) &(slockM->slocks[0]); - return; + struct ipcdummy *slockM; + + if (SLockMemoryId == -1) + SLockMemoryId = IpcMemoryIdGet(key, SLockMemorySize); + if (SLockMemoryId == -1) + elog(FATAL, "SLockMemory not in shared memory"); + slockM = (struct ipcdummy *) IpcMemoryAttach(SLockMemoryId); + if (slockM == IpcMemAttachFailed) + elog(FATAL, "AttachSLockMemory: could not attach segment"); + FreeSLockPP = (SLock **) & (slockM->free); + UnusedSLockIP = (int *) &(slockM->unused); + SLockMemoryLock = (slock_t *) & (slockM->memlock); + S_INIT_LOCK(SLockMemoryLock); + SLockArray = (SLock *) & (slockM->slocks[0]); + return; } #ifdef LOCKDEBUG #define PRINT_LOCK(LOCK) printf("(locklock = %d, flag = %d, nshlocks = %d, \ shlock = %d, exlock =%d)\n", LOCK->locklock, \ - LOCK->flag, LOCK->nshlocks, LOCK->shlock, \ - LOCK->exlock) + LOCK->flag, LOCK->nshlocks, LOCK->shlock, \ + LOCK->exlock) #endif void ExclusiveLock(int lockid) { - SLock *slckP; - slckP = &(SLockArray[lockid]); + SLock *slckP; + + slckP = &(SLockArray[lockid]); #ifdef LOCKDEBUG - printf("ExclusiveLock(%d)\n", lockid); - printf("IN: "); - PRINT_LOCK(slckP); + printf("ExclusiveLock(%d)\n", lockid); + printf("IN: "); + PRINT_LOCK(slckP); #endif - ex_try_again: - S_LOCK(&(slckP->locklock)); - switch (slckP->flag) { - case NOLOCK: - slckP->flag = EXCLUSIVELOCK; - S_LOCK(&(slckP->exlock)); - S_LOCK(&(slckP->shlock)); - S_UNLOCK(&(slckP->locklock)); +ex_try_again: + S_LOCK(&(slckP->locklock)); + switch (slckP->flag) + { + case NOLOCK: + slckP->flag = EXCLUSIVELOCK; + S_LOCK(&(slckP->exlock)); + S_LOCK(&(slckP->shlock)); + S_UNLOCK(&(slckP->locklock)); #ifdef LOCKDEBUG - printf("OUT: "); - PRINT_LOCK(slckP); + printf("OUT: "); + PRINT_LOCK(slckP); #endif - return; - case SHAREDLOCK: - case EXCLUSIVELOCK: - S_UNLOCK(&(slckP->locklock)); - S_LOCK(&(slckP->exlock)); - S_UNLOCK(&(slckP->exlock)); - goto ex_try_again; - } + return; + case SHAREDLOCK: + case EXCLUSIVELOCK: + S_UNLOCK(&(slckP->locklock)); + S_LOCK(&(slckP->exlock)); + S_UNLOCK(&(slckP->exlock)); + goto ex_try_again; + } } void ExclusiveUnlock(int lockid) { - SLock *slckP; - - slckP = &(SLockArray[lockid]); + SLock *slckP; + + slckP = &(SLockArray[lockid]); #ifdef LOCKDEBUG - printf("ExclusiveUnlock(%d)\n", lockid); - printf("IN: "); - PRINT_LOCK(slckP); + printf("ExclusiveUnlock(%d)\n", lockid); + printf("IN: "); + PRINT_LOCK(slckP); #endif - S_LOCK(&(slckP->locklock)); - /* ------------- - * give favor to read processes - * ------------- - */ - slckP->flag = NOLOCK; - if (slckP->nshlocks > 0) { - while (slckP->nshlocks > 0) { - S_UNLOCK(&(slckP->shlock)); - S_LOCK(&(slckP->comlock)); + S_LOCK(&(slckP->locklock)); + /* ------------- + * give favor to read processes + * ------------- + */ + slckP->flag = NOLOCK; + if (slckP->nshlocks > 0) + { + while (slckP->nshlocks > 0) + { + S_UNLOCK(&(slckP->shlock)); + S_LOCK(&(slckP->comlock)); + } + S_UNLOCK(&(slckP->shlock)); } - S_UNLOCK(&(slckP->shlock)); - } - else { - S_UNLOCK(&(slckP->shlock)); - } - S_UNLOCK(&(slckP->exlock)); - S_UNLOCK(&(slckP->locklock)); + else + { + S_UNLOCK(&(slckP->shlock)); + } + S_UNLOCK(&(slckP->exlock)); + S_UNLOCK(&(slckP->locklock)); #ifdef LOCKDEBUG - printf("OUT: "); - PRINT_LOCK(slckP); + printf("OUT: "); + PRINT_LOCK(slckP); #endif - return; + return; } bool LockIsFree(int lockid) { - return(SLockArray[lockid].flag == NOLOCK); + return (SLockArray[lockid].flag == NOLOCK); } -#endif /* HAS_TEST_AND_SET */ +#endif /* HAS_TEST_AND_SET */ static void IpcConfigTip(void) { - fprintf(stderr,"This type of error is usually caused by improper\n"); - fprintf(stderr,"shared memory or System V IPC semaphore configuration.\n"); - fprintf(stderr,"See the FAQ for more detailed information\n"); + fprintf(stderr, "This type of error is usually caused by improper\n"); + fprintf(stderr, "shared memory or System V IPC semaphore configuration.\n"); + fprintf(stderr, "See the FAQ for more detailed information\n"); } diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index f949041f44d..4aad8e85f54 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * ipci.c-- - * POSTGRES inter-process communication initialization code. + * POSTGRES inter-process communication initialization code. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.5 1997/01/08 08:32:03 bryanh Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.6 1997/09/07 04:48:33 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -23,129 +23,131 @@ #include "storage/proc.h" #include "storage/smgr.h" #include "storage/lock.h" -#include "miscadmin.h" /* for DebugLvl */ +#include "miscadmin.h" /* for DebugLvl */ /* * SystemPortAddressCreateMemoryKey -- - * Returns a memory key given a port address. + * Returns a memory key given a port address. */ IPCKey SystemPortAddressCreateIPCKey(SystemPortAddress address) { - Assert(address < 32768); /* XXX */ - - return (SystemPortAddressGetIPCKey(address)); + Assert(address < 32768); /* XXX */ + + return (SystemPortAddressGetIPCKey(address)); } /* * CreateSharedMemoryAndSemaphores -- - * Creates and initializes shared memory and semaphores. + * Creates and initializes shared memory and semaphores. */ /************************************************** - + CreateSharedMemoryAndSemaphores is called exactly *ONCE* by the postmaster. It is *NEVER* called by the postgres backend - + 0) destroy any existing semaphores for both buffer and lock managers. 1) create the appropriate *SHARED* memory segments for the two resource managers. - + **************************************************/ void CreateSharedMemoryAndSemaphores(IPCKey key) { - int size; - + int size; + #ifdef HAS_TEST_AND_SET - /* --------------- - * create shared memory for slocks - * -------------- - */ - CreateAndInitSLockMemory(IPCKeyGetSLockSharedMemoryKey(key)); + /* --------------- + * create shared memory for slocks + * -------------- + */ + CreateAndInitSLockMemory(IPCKeyGetSLockSharedMemoryKey(key)); #endif - /* ---------------- - * kill and create the buffer manager buffer pool (and semaphore) - * ---------------- - */ - CreateSpinlocks(IPCKeyGetSpinLockSemaphoreKey(key)); - size = BufferShmemSize() + LockShmemSize(); - + /* ---------------- + * kill and create the buffer manager buffer pool (and semaphore) + * ---------------- + */ + CreateSpinlocks(IPCKeyGetSpinLockSemaphoreKey(key)); + size = BufferShmemSize() + LockShmemSize(); + #ifdef MAIN_MEMORY - size += MMShmemSize(); -#endif /* MAIN_MEMORY */ - - if (DebugLvl > 1) { - fprintf(stderr, "binding ShmemCreate(key=%x, size=%d)\n", - IPCKeyGetBufferMemoryKey(key), size); - } - ShmemCreate(IPCKeyGetBufferMemoryKey(key), size); - ShmemBindingTabReset(); - InitShmem(key, size); - InitBufferPool(key); - - /* ---------------- - * do the lock table stuff - * ---------------- - */ - InitLocks(); - InitMultiLevelLockm(); - if (InitMultiLevelLockm() == INVALID_TABLEID) - elog(FATAL, "Couldn't create the lock table"); - - /* ---------------- - * do process table stuff - * ---------------- - */ - InitProcGlobal(key); - on_exitpg(ProcFreeAllSemaphores, 0); - - CreateSharedInvalidationState(key); + size += MMShmemSize(); +#endif /* MAIN_MEMORY */ + + if (DebugLvl > 1) + { + fprintf(stderr, "binding ShmemCreate(key=%x, size=%d)\n", + IPCKeyGetBufferMemoryKey(key), size); + } + ShmemCreate(IPCKeyGetBufferMemoryKey(key), size); + ShmemBindingTabReset(); + InitShmem(key, size); + InitBufferPool(key); + + /* ---------------- + * do the lock table stuff + * ---------------- + */ + InitLocks(); + InitMultiLevelLockm(); + if (InitMultiLevelLockm() == INVALID_TABLEID) + elog(FATAL, "Couldn't create the lock table"); + + /* ---------------- + * do process table stuff + * ---------------- + */ + InitProcGlobal(key); + on_exitpg(ProcFreeAllSemaphores, 0); + + CreateSharedInvalidationState(key); } /* * AttachSharedMemoryAndSemaphores -- - * Attachs existant shared memory and semaphores. + * Attachs existant shared memory and semaphores. */ void AttachSharedMemoryAndSemaphores(IPCKey key) { - int size; - - /* ---------------- - * create rather than attach if using private key - * ---------------- - */ - if (key == PrivateIPCKey) { - CreateSharedMemoryAndSemaphores(key); - return; - } - + int size; + + /* ---------------- + * create rather than attach if using private key + * ---------------- + */ + if (key == PrivateIPCKey) + { + CreateSharedMemoryAndSemaphores(key); + return; + } + #ifdef HAS_TEST_AND_SET - /* ---------------- - * attach the slock shared memory - * ---------------- - */ - AttachSLockMemory(IPCKeyGetSLockSharedMemoryKey(key)); + /* ---------------- + * attach the slock shared memory + * ---------------- + */ + AttachSLockMemory(IPCKeyGetSLockSharedMemoryKey(key)); #endif - /* ---------------- - * attach the buffer manager buffer pool (and semaphore) - * ---------------- - */ - size = BufferShmemSize() + LockShmemSize(); - InitShmem(key, size); - InitBufferPool(key); - - /* ---------------- - * initialize lock table stuff - * ---------------- - */ - InitLocks(); - if (InitMultiLevelLockm() == INVALID_TABLEID) - elog(FATAL, "Couldn't attach to the lock table"); - - AttachSharedInvalidationState(key); + /* ---------------- + * attach the buffer manager buffer pool (and semaphore) + * ---------------- + */ + size = BufferShmemSize() + LockShmemSize(); + InitShmem(key, size); + InitBufferPool(key); + + /* ---------------- + * initialize lock table stuff + * ---------------- + */ + InitLocks(); + if (InitMultiLevelLockm() == INVALID_TABLEID) + elog(FATAL, "Couldn't attach to the lock table"); + + AttachSharedInvalidationState(key); } diff --git a/src/backend/storage/ipc/s_lock.c b/src/backend/storage/ipc/s_lock.c index 146c2d7080a..70f0deb62c9 100644 --- a/src/backend/storage/ipc/s_lock.c +++ b/src/backend/storage/ipc/s_lock.c @@ -1,40 +1,40 @@ /*------------------------------------------------------------------------- * * s_lock.c-- - * This file contains the implementation (if any) for spinlocks. + * This file contains the implementation (if any) for spinlocks. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/s_lock.c,v 1.21 1997/09/05 18:10:54 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/s_lock.c,v 1.22 1997/09/07 04:48:35 momjian Exp $ * *------------------------------------------------------------------------- */ /* - * DESCRIPTION - * The following code fragment should be written (in assembly - * language) on machines that have a native test-and-set instruction: + * DESCRIPTION + * The following code fragment should be written (in assembly + * language) on machines that have a native test-and-set instruction: * - * void - * S_LOCK(char_address) - * char *char_address; - * { - * while (test_and_set(char_address)) - * ; - * } + * void + * S_LOCK(char_address) + * char *char_address; + * { + * while (test_and_set(char_address)) + * ; + * } * - * If this is not done, POSTGRES will default to using System V - * semaphores (and take a large performance hit -- around 40% of - * its time on a DS5000/240 is spent in semop(3)...). + * If this is not done, POSTGRES will default to using System V + * semaphores (and take a large performance hit -- around 40% of + * its time on a DS5000/240 is spent in semop(3)...). * - * NOTES - * AIX has a test-and-set but the recommended interface is the cs(3) - * system call. This provides an 8-instruction (plus system call - * overhead) uninterruptible compare-and-set operation. True - * spinlocks might be faster but using cs(3) still speeds up the - * regression test suite by about 25%. I don't have an assembler - * manual for POWER in any case. + * NOTES + * AIX has a test-and-set but the recommended interface is the cs(3) + * system call. This provides an 8-instruction (plus system call + * overhead) uninterruptible compare-and-set operation. True + * spinlocks might be faster but using cs(3) still speeds up the + * regression test suite by about 25%. I don't have an assembler + * manual for POWER in any case. * */ #include "postgres.h" @@ -50,71 +50,71 @@ * slock_t is defined as a struct mutex. */ void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { mutex_lock(lock); } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { mutex_unlock(lock); } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - mutex_init(lock); + mutex_init(lock); } /* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */ int - S_LOCK_FREE(slock_t *lock) +S_LOCK_FREE(slock_t * lock) { /* For Mach, we have to delve inside the entrails of `struct mutex'. Ick! */ - return (lock->lock == 0); + return (lock->lock == 0); } -#endif /* next */ +#endif /* next */ #if defined(irix5) /* * SGI IRIX 5 - * slock_t is defined as a struct abilock_t, which has a single unsigned long + * slock_t is defined as a struct abilock_t, which has a single unsigned long * member. - * + * * This stuff may be supplemented in the future with Masato Kataoka's MIPS-II * assembly from his NECEWS SVR4 port, but we probably ought to retain this * for the R3000 chips out there. */ void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { /* spin_lock(lock); */ while (!acquire_lock(lock)) - ; + ; } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { release_lock(lock); } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - init_lock(lock); + init_lock(lock); } /* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */ int -S_LOCK_FREE(slock_t *lock) +S_LOCK_FREE(slock_t * lock) { - return(stat_lock(lock)==UNLOCKED); + return (stat_lock(lock) == UNLOCKED); } -#endif /* irix5 */ +#endif /* irix5 */ /* @@ -127,62 +127,62 @@ S_LOCK_FREE(slock_t *lock) #if defined(__alpha__) || defined(__alpha) void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - while (msem_lock(lock, MSEM_IF_NOWAIT) < 0) - ; + while (msem_lock(lock, MSEM_IF_NOWAIT) < 0) + ; } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - msem_unlock(lock, 0); + msem_unlock(lock, 0); } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - msem_init(lock, MSEM_UNLOCKED); + msem_init(lock, MSEM_UNLOCKED); } int -S_LOCK_FREE(slock_t *lock) +S_LOCK_FREE(slock_t * lock) { - return(lock->msem_state ? 0 : 1); + return (lock->msem_state ? 0 : 1); } -#endif /* alpha */ +#endif /* alpha */ /* * Solaris 2 */ #if defined(i386_solaris) || \ - defined(sparc_solaris) + defined(sparc_solaris) /* for xxxxx_solaris, this is defined in port/.../tas.s */ -static int tas(slock_t *lock); +static int tas(slock_t * lock); void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - while (tas(lock)) - ; + while (tas(lock)) + ; } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - *lock = 0; + *lock = 0; } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } -#endif /* i86pc_solaris || sparc_solaris */ +#endif /* i86pc_solaris || sparc_solaris */ /* * AIX (POWER) @@ -194,25 +194,25 @@ S_INIT_LOCK(slock_t *lock) #if defined(aix) void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - while (cs((int *) lock, 0, 1)) - ; + while (cs((int *) lock, 0, 1)) + ; } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - *lock = 0; + *lock = 0; } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } -#endif /* aix */ +#endif /* aix */ /* * HP-UX (PA-RISC) @@ -224,90 +224,90 @@ S_INIT_LOCK(slock_t *lock) #if defined(hpux) /* -* a "set" slock_t has a single word cleared. a "clear" slock_t has +* a "set" slock_t has a single word cleared. a "clear" slock_t has * all words set to non-zero. */ -static slock_t clear_lock = { -1, -1, -1, -1 }; +static slock_t clear_lock = {-1, -1, -1, -1}; -static int tas(slock_t *lock); +static int tas(slock_t * lock); void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - while (tas(lock)) - ; + while (tas(lock)) + ; } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - *lock = clear_lock; /* struct assignment */ + *lock = clear_lock; /* struct assignment */ } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } int -S_LOCK_FREE(slock_t *lock) +S_LOCK_FREE(slock_t * lock) { - register int *lock_word = (int *) (((long) lock + 15) & ~15); + register int *lock_word = (int *) (((long) lock + 15) & ~15); - return(*lock_word != 0); + return (*lock_word != 0); } -#endif /* hpux */ +#endif /* hpux */ /* * sun3 */ - + #if defined(sun3) -static int tas(slock_t *lock); +static int tas(slock_t * lock); -void -S_LOCK(slock_t *lock) +void +S_LOCK(slock_t * lock) { - while (tas(lock)); + while (tas(lock)); } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - *lock = 0; + *lock = 0; } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } static int tas_dummy() { - asm("LLA0:"); - asm(" .data"); - asm(" .text"); - asm("|#PROC# 04"); - asm(" .globl _tas"); - asm("_tas:"); - asm("|#PROLOGUE# 1"); - asm(" movel sp@(0x4),a0"); - asm(" tas a0@"); - asm(" beq LLA1"); - asm(" moveq #-128,d0"); - asm(" rts"); - asm("LLA1:"); - asm(" moveq #0,d0"); - asm(" rts"); - asm(" .data"); -} - -#endif /* sun3 */ + asm("LLA0:"); + asm(" .data"); + asm(" .text"); + asm("|#PROC# 04"); + asm(" .globl _tas"); + asm("_tas:"); + asm("|#PROLOGUE# 1"); + asm(" movel sp@(0x4),a0"); + asm(" tas a0@"); + asm(" beq LLA1"); + asm(" moveq #-128,d0"); + asm(" rts"); + asm("LLA1:"); + asm(" moveq #0,d0"); + asm(" rts"); + asm(" .data"); +} + +#endif /* sun3 */ /* * sparc machines @@ -317,48 +317,48 @@ tas_dummy() /* if we're using -ansi w/ gcc, use __asm__ instead of asm */ #if defined(__STRICT_ANSI__) -#define asm(x) __asm__(x) -#endif +#define asm(x) __asm__(x) +#endif -static int tas(slock_t *lock); +static int tas(slock_t * lock); static int tas_dummy() { - asm(".seg \"data\""); - asm(".seg \"text\""); - asm(".global _tas"); - asm("_tas:"); - - /* - * Sparc atomic test and set (sparc calls it "atomic load-store") - */ - - asm("ldstub [%r8], %r8"); - - /* - * Did test and set actually do the set? - */ - - asm("tst %r8"); - - asm("be,a ReturnZero"); - - /* - * otherwise, just return. - */ - - asm("clr %r8"); - asm("mov 0x1, %r8"); - asm("ReturnZero:"); - asm("retl"); - asm("nop"); + asm(".seg \"data\""); + asm(".seg \"text\""); + asm(".global _tas"); + asm("_tas:"); + + /* + * Sparc atomic test and set (sparc calls it "atomic load-store") + */ + + asm("ldstub [%r8], %r8"); + + /* + * Did test and set actually do the set? + */ + + asm("tst %r8"); + + asm("be,a ReturnZero"); + + /* + * otherwise, just return. + */ + + asm("clr %r8"); + asm("mov 0x1, %r8"); + asm("ReturnZero:"); + asm("retl"); + asm("nop"); } void S_LOCK(unsigned char *addr) { - while (tas(addr)); + while (tas(addr)); } @@ -368,16 +368,16 @@ S_LOCK(unsigned char *addr) void S_UNLOCK(unsigned char *addr) { - *addr = 0; + *addr = 0; } void S_INIT_LOCK(unsigned char *addr) { - *addr = 0; + *addr = 0; } -#endif /* NEED_SPARC_TAS_ASM */ +#endif /* NEED_SPARC_TAS_ASM */ /* * i386 based things @@ -386,39 +386,41 @@ S_INIT_LOCK(unsigned char *addr) #if defined(NEED_I386_TAS_ASM) void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - slock_t res; + slock_t res; - do{ - __asm__("xchgb %0,%1":"=q" (res),"=m" (*lock):"0" (0x1)); - }while(res != 0); + do + { +__asm__("xchgb %0,%1": "=q"(res), "=m"(*lock):"0"(0x1)); + } while (res != 0); } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - *lock = 0; + *lock = 0; } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } -#endif /* NEED_I386_TAS_ASM */ +#endif /* NEED_I386_TAS_ASM */ #if defined(__alpha__) && defined(linux) void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - slock_t res; + slock_t res; - do{ - __asm__(" ldq $0, %0 \n\ + do + { +__asm__(" ldq $0, %0 \n\ bne $0, already_set \n\ ldq_l $0, %0 \n\ bne $0, already_set \n\ @@ -430,56 +432,58 @@ S_LOCK(slock_t *lock) jmp $31, end \n\ stqc_fail: or $31, 1, $0 \n\ already_set: bis $0, $0, %1 \n\ - end: nop " : "=m" (*lock), "=r" (res) :: "0" ); - }while(res != 0); + end: nop ": "=m"(*lock), "=r"(res): :"0"); + } while (res != 0); } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - __asm__("mb"); - *lock = 0; + __asm__("mb"); + *lock = 0; } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } -#endif /* defined(__alpha__) && defined(linux) */ +#endif /* defined(__alpha__) && defined(linux) */ #if defined(linux) && defined(sparc) - + void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - slock_t res; + slock_t res; - do{ - __asm__("ldstub [%1], %0" - : "=&r" (res) - : "r" (lock)); - }while(!res != 0); + do + { + __asm__("ldstub [%1], %0" +: "=&r"(res) +: "r"(lock)); + } while (!res != 0); } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - *lock = 0; + *lock = 0; } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } -#endif /* defined(linux) && defined(sparc) */ +#endif /* defined(linux) && defined(sparc) */ #if defined(linux) && defined(PPC) -static int tas_dummy() +static int +tas_dummy() { __asm__(" \n\ tas: \n\ @@ -496,26 +500,26 @@ success: \n\ blr \n\ "); } - + void -S_LOCK(slock_t *lock) +S_LOCK(slock_t * lock) { - while (tas(lock)) - ; + while (tas(lock)) + ; } void -S_UNLOCK(slock_t *lock) +S_UNLOCK(slock_t * lock) { - *lock = 0; + *lock = 0; } void -S_INIT_LOCK(slock_t *lock) +S_INIT_LOCK(slock_t * lock) { - S_UNLOCK(lock); + S_UNLOCK(lock); } -#endif /* defined(linux) && defined(PPC) */ +#endif /* defined(linux) && defined(PPC) */ -#endif /* HAS_TEST_AND_SET */ +#endif /* HAS_TEST_AND_SET */ diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index c839059ea9b..63848171a1f 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * shmem.c-- - * create shared memory and initialize shared memory data structures. + * create shared memory and initialize shared memory data structures. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.10 1997/08/12 22:53:56 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.11 1997/09/07 04:48:37 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -18,43 +18,43 @@ * allocating and binding to shared memory data structures. * * NOTES: - * (a) There are three kinds of shared memory data structures - * available to POSTGRES: fixed-size structures, queues and hash - * tables. Fixed-size structures contain things like global variables - * for a module and should never be allocated after the process - * initialization phase. Hash tables have a fixed maximum size, but - * their actual size can vary dynamically. When entries are added - * to the table, more space is allocated. Queues link data structures - * that have been allocated either as fixed size structures or as hash - * buckets. Each shared data structure has a string name to identify - * it (assigned in the module that declares it). + * (a) There are three kinds of shared memory data structures + * available to POSTGRES: fixed-size structures, queues and hash + * tables. Fixed-size structures contain things like global variables + * for a module and should never be allocated after the process + * initialization phase. Hash tables have a fixed maximum size, but + * their actual size can vary dynamically. When entries are added + * to the table, more space is allocated. Queues link data structures + * that have been allocated either as fixed size structures or as hash + * buckets. Each shared data structure has a string name to identify + * it (assigned in the module that declares it). * - * (b) During initialization, each module looks for its - * shared data structures in a hash table called the "Binding Table". - * If the data structure is not present, the caller can allocate - * a new one and initialize it. If the data structure is present, - * the caller "attaches" to the structure by initializing a pointer - * in the local address space. - * The binding table has two purposes: first, it gives us - * a simple model of how the world looks when a backend process - * initializes. If something is present in the binding table, - * it is initialized. If it is not, it is uninitialized. Second, - * the binding table allows us to allocate shared memory on demand - * instead of trying to preallocate structures and hard-wire the - * sizes and locations in header files. If you are using a lot - * of shared memory in a lot of different places (and changing - * things during development), this is important. + * (b) During initialization, each module looks for its + * shared data structures in a hash table called the "Binding Table". + * If the data structure is not present, the caller can allocate + * a new one and initialize it. If the data structure is present, + * the caller "attaches" to the structure by initializing a pointer + * in the local address space. + * The binding table has two purposes: first, it gives us + * a simple model of how the world looks when a backend process + * initializes. If something is present in the binding table, + * it is initialized. If it is not, it is uninitialized. Second, + * the binding table allows us to allocate shared memory on demand + * instead of trying to preallocate structures and hard-wire the + * sizes and locations in header files. If you are using a lot + * of shared memory in a lot of different places (and changing + * things during development), this is important. * - * (c) memory allocation model: shared memory can never be - * freed, once allocated. Each hash table has its own free list, - * so hash buckets can be reused when an item is deleted. However, - * if one hash table grows very large and then shrinks, its space - * cannot be redistributed to other tables. We could build a simple - * hash bucket garbage collector if need be. Right now, it seems - * unnecessary. + * (c) memory allocation model: shared memory can never be + * freed, once allocated. Each hash table has its own free list, + * so hash buckets can be reused when an item is deleted. However, + * if one hash table grows very large and then shrinks, its space + * cannot be redistributed to other tables. We could build a simple + * hash bucket garbage collector if need be. Right now, it seems + * unnecessary. * - * See InitSem() in sem.c for an example of how to use the - * binding table. + * See InitSem() in sem.c for an example of how to use the + * binding table. * */ #include <stdio.h> @@ -70,27 +70,23 @@ /* shared memory global variables */ -unsigned long ShmemBase = 0; /* start and end address of - * shared memory - */ -static unsigned long ShmemEnd = 0; -static unsigned long ShmemSize = 0; /* current size (and default) */ +unsigned long ShmemBase = 0; /* start and end address of shared memory */ +static unsigned long ShmemEnd = 0; +static unsigned long ShmemSize = 0; /* current size (and default) */ -SPINLOCK ShmemLock; /* lock for shared memory allocation */ +SPINLOCK ShmemLock; /* lock for shared memory allocation */ -SPINLOCK BindingLock; /* lock for binding table access */ +SPINLOCK BindingLock; /* lock for binding table access */ -static unsigned long *ShmemFreeStart = NULL; /* pointer to the OFFSET of - * first free shared memory - */ -static unsigned long *ShmemBindingTabOffset = NULL; /* start of the binding - * table (for bootstrap) - */ -static int ShmemBootstrap = FALSE; /* flag becomes true when shared mem - * is created by POSTMASTER - */ +static unsigned long *ShmemFreeStart = NULL; /* pointer to the OFFSET + * of first free shared + * memory */ +static unsigned long *ShmemBindingTabOffset = NULL; /* start of the binding + * table (for bootstrap) */ +static int ShmemBootstrap = FALSE; /* flag becomes true when shared + * mem is created by POSTMASTER */ -static HTAB *BindingTable = NULL; +static HTAB *BindingTable = NULL; /* --------------------- * ShmemBindingTabReset() - Resets the binding table to NULL.... @@ -101,16 +97,16 @@ static HTAB *BindingTable = NULL; void ShmemBindingTabReset(void) { - BindingTable = (HTAB *)NULL; + BindingTable = (HTAB *) NULL; } /* - * CreateSharedRegion() -- + * CreateSharedRegion() -- * - * This routine is called once by the postmaster to - * initialize the shared buffer pool. Assume there is - * only one postmaster so no synchronization is necessary - * until after this routine completes successfully. + * This routine is called once by the postmaster to + * initialize the shared buffer pool. Assume there is + * only one postmaster so no synchronization is necessary + * until after this routine completes successfully. * * key is a unique identifier for the shmem region. * size is the size of the region. @@ -120,202 +116,220 @@ static IpcMemoryId ShmemId; void ShmemCreate(unsigned int key, unsigned int size) { - if (size) - ShmemSize = size; - /* create shared mem region */ - if ((ShmemId=IpcMemoryCreate(key,ShmemSize,IPCProtection)) - ==IpcMemCreationFailed) { - elog(FATAL,"ShmemCreate: cannot create region"); - exit(1); - } - - /* ShmemBootstrap is true if shared memory has been - * created, but not yet initialized. Only the - * postmaster/creator-of-all-things should have - * this flag set. - */ - ShmemBootstrap = TRUE; + if (size) + ShmemSize = size; + /* create shared mem region */ + if ((ShmemId = IpcMemoryCreate(key, ShmemSize, IPCProtection)) + == IpcMemCreationFailed) + { + elog(FATAL, "ShmemCreate: cannot create region"); + exit(1); + } + + /* + * ShmemBootstrap is true if shared memory has been created, but not + * yet initialized. Only the postmaster/creator-of-all-things should + * have this flag set. + */ + ShmemBootstrap = TRUE; } /* - * InitShmem() -- map region into process address space - * and initialize shared data structures. + * InitShmem() -- map region into process address space + * and initialize shared data structures. * */ int InitShmem(unsigned int key, unsigned int size) { - Pointer sharedRegion; - unsigned long currFreeSpace; - - HASHCTL info; - int hash_flags; - BindingEnt * result,item; - bool found; - IpcMemoryId shmid; - - /* if zero key, use default memory size */ - if (size) - ShmemSize = size; - - /* default key is 0 */ - - /* attach to shared memory region (SysV or BSD OS specific) */ - if (ShmemBootstrap && key == PrivateIPCKey) - /* if we are running backend alone */ - shmid = ShmemId; - else - shmid = IpcMemoryIdGet(IPCKeyGetBufferMemoryKey(key), ShmemSize); - sharedRegion = IpcMemoryAttach(shmid); - if (sharedRegion == NULL) { - elog(FATAL,"AttachSharedRegion: couldn't attach to shmem\n"); - return(FALSE); - } - - /* get pointers to the dimensions of shared memory */ - ShmemBase = (unsigned long) sharedRegion; - ShmemEnd = (unsigned long) sharedRegion + ShmemSize; - currFreeSpace = 0; - - /* First long in shared memory is the count of available space */ - ShmemFreeStart = (unsigned long *) ShmemBase; - /* next is a shmem pointer to the binding table */ - ShmemBindingTabOffset = ShmemFreeStart + 1; - - currFreeSpace += - sizeof(ShmemFreeStart) + sizeof(ShmemBindingTabOffset); - - /* bootstrap initialize spin locks so we can start to use the - * allocator and binding table. - */ - if (! InitSpinLocks(ShmemBootstrap, IPCKeyGetSpinLockSemaphoreKey(key))) { - return(FALSE); - } - - /* We have just allocated additional space for two spinlocks. - * Now setup the global free space count - */ - if (ShmemBootstrap) { - *ShmemFreeStart = currFreeSpace; - } - - /* if ShmemFreeStart is NULL, then the allocator won't work */ - Assert(*ShmemFreeStart); - - /* create OR attach to the shared memory binding table */ - info.keysize = BTABLE_KEYSIZE; - info.datasize = BTABLE_DATASIZE; - hash_flags = (HASH_ELEM); - - /* This will acquire the binding table lock, but not release it. */ - BindingTable = ShmemInitHash("BindingTable", - BTABLE_SIZE,BTABLE_SIZE, - &info,hash_flags); - - if (! BindingTable) { - elog(FATAL,"InitShmem: couldn't initialize Binding Table"); - return(FALSE); - } - - /* Now, check the binding table for an entry to the binding - * table. If there is an entry there, someone else created - * the table. Otherwise, we did and we have to initialize it. - */ - memset(item.key, 0, BTABLE_KEYSIZE); - strncpy(item.key,"BindingTable",BTABLE_KEYSIZE); - - result = (BindingEnt *) - hash_search(BindingTable,(char *) &item,HASH_ENTER, &found); - - - if (! result ) { - elog(FATAL,"InitShmem: corrupted binding table"); - return(FALSE); - } - - if (! found) { - /* bootstrapping shmem: we have to initialize the - * binding table now. + Pointer sharedRegion; + unsigned long currFreeSpace; + + HASHCTL info; + int hash_flags; + BindingEnt *result, + item; + bool found; + IpcMemoryId shmid; + + /* if zero key, use default memory size */ + if (size) + ShmemSize = size; + + /* default key is 0 */ + + /* attach to shared memory region (SysV or BSD OS specific) */ + if (ShmemBootstrap && key == PrivateIPCKey) + /* if we are running backend alone */ + shmid = ShmemId; + else + shmid = IpcMemoryIdGet(IPCKeyGetBufferMemoryKey(key), ShmemSize); + sharedRegion = IpcMemoryAttach(shmid); + if (sharedRegion == NULL) + { + elog(FATAL, "AttachSharedRegion: couldn't attach to shmem\n"); + return (FALSE); + } + + /* get pointers to the dimensions of shared memory */ + ShmemBase = (unsigned long) sharedRegion; + ShmemEnd = (unsigned long) sharedRegion + ShmemSize; + currFreeSpace = 0; + + /* First long in shared memory is the count of available space */ + ShmemFreeStart = (unsigned long *) ShmemBase; + /* next is a shmem pointer to the binding table */ + ShmemBindingTabOffset = ShmemFreeStart + 1; + + currFreeSpace += + sizeof(ShmemFreeStart) + sizeof(ShmemBindingTabOffset); + + /* + * bootstrap initialize spin locks so we can start to use the + * allocator and binding table. */ - - Assert(ShmemBootstrap); - result->location = MAKE_OFFSET(BindingTable->hctl); - *ShmemBindingTabOffset = result->location; - result->size = BTABLE_SIZE; - - ShmemBootstrap = FALSE; - - } else { - Assert(! ShmemBootstrap); - } - /* now release the lock acquired in ShmemHashInit */ - SpinRelease (BindingLock); - - Assert (result->location == MAKE_OFFSET(BindingTable->hctl)); - - return(TRUE); + if (!InitSpinLocks(ShmemBootstrap, IPCKeyGetSpinLockSemaphoreKey(key))) + { + return (FALSE); + } + + /* + * We have just allocated additional space for two spinlocks. Now + * setup the global free space count + */ + if (ShmemBootstrap) + { + *ShmemFreeStart = currFreeSpace; + } + + /* if ShmemFreeStart is NULL, then the allocator won't work */ + Assert(*ShmemFreeStart); + + /* create OR attach to the shared memory binding table */ + info.keysize = BTABLE_KEYSIZE; + info.datasize = BTABLE_DATASIZE; + hash_flags = (HASH_ELEM); + + /* This will acquire the binding table lock, but not release it. */ + BindingTable = ShmemInitHash("BindingTable", + BTABLE_SIZE, BTABLE_SIZE, + &info, hash_flags); + + if (!BindingTable) + { + elog(FATAL, "InitShmem: couldn't initialize Binding Table"); + return (FALSE); + } + + /* + * Now, check the binding table for an entry to the binding table. If + * there is an entry there, someone else created the table. + * Otherwise, we did and we have to initialize it. + */ + memset(item.key, 0, BTABLE_KEYSIZE); + strncpy(item.key, "BindingTable", BTABLE_KEYSIZE); + + result = (BindingEnt *) + hash_search(BindingTable, (char *) &item, HASH_ENTER, &found); + + + if (!result) + { + elog(FATAL, "InitShmem: corrupted binding table"); + return (FALSE); + } + + if (!found) + { + + /* + * bootstrapping shmem: we have to initialize the binding table + * now. + */ + + Assert(ShmemBootstrap); + result->location = MAKE_OFFSET(BindingTable->hctl); + *ShmemBindingTabOffset = result->location; + result->size = BTABLE_SIZE; + + ShmemBootstrap = FALSE; + + } + else + { + Assert(!ShmemBootstrap); + } + /* now release the lock acquired in ShmemHashInit */ + SpinRelease(BindingLock); + + Assert(result->location == MAKE_OFFSET(BindingTable->hctl)); + + return (TRUE); } /* * ShmemAlloc -- allocate word-aligned byte string from - * shared memory + * shared memory * * Assumes ShmemLock and ShmemFreeStart are initialized. * Returns: real pointer to memory or NULL if we are out - * of space. Has to return a real pointer in order - * to be compatable with malloc(). + * of space. Has to return a real pointer in order + * to be compatable with malloc(). */ -long * +long * ShmemAlloc(unsigned long size) { - unsigned long tmpFree; - long *newSpace; - - /* - * ensure space is word aligned. - * - * Word-alignment is not good enough. We have to be more - * conservative: doubles need 8-byte alignment. (We probably only need - * this on RISC platforms but this is not a big waste of space.) - * - ay 12/94 - */ - if (size % sizeof(double)) - size += sizeof(double) - (size % sizeof(double)); - - Assert(*ShmemFreeStart); - - SpinAcquire(ShmemLock); - - tmpFree = *ShmemFreeStart + size; - if (tmpFree <= ShmemSize) { - newSpace = (long *)MAKE_PTR(*ShmemFreeStart); - *ShmemFreeStart += size; - } else { - newSpace = NULL; - } - - SpinRelease(ShmemLock); - - if (! newSpace) { - elog(NOTICE,"ShmemAlloc: out of memory "); - } - return(newSpace); + unsigned long tmpFree; + long *newSpace; + + /* + * ensure space is word aligned. + * + * Word-alignment is not good enough. We have to be more conservative: + * doubles need 8-byte alignment. (We probably only need this on RISC + * platforms but this is not a big waste of space.) - ay 12/94 + */ + if (size % sizeof(double)) + size += sizeof(double) - (size % sizeof(double)); + + Assert(*ShmemFreeStart); + + SpinAcquire(ShmemLock); + + tmpFree = *ShmemFreeStart + size; + if (tmpFree <= ShmemSize) + { + newSpace = (long *) MAKE_PTR(*ShmemFreeStart); + *ShmemFreeStart += size; + } + else + { + newSpace = NULL; + } + + SpinRelease(ShmemLock); + + if (!newSpace) + { + elog(NOTICE, "ShmemAlloc: out of memory "); + } + return (newSpace); } /* - * ShmemIsValid -- test if an offset refers to valid shared memory - * + * ShmemIsValid -- test if an offset refers to valid shared memory + * * Returns TRUE if the pointer is valid. */ int ShmemIsValid(unsigned long addr) { - return ((addr<ShmemEnd) && (addr>=ShmemBase)); + return ((addr < ShmemEnd) && (addr >= ShmemBase)); } /* - * ShmemInitHash -- Create/Attach to and initialize - * shared memory hash table. + * ShmemInitHash -- Create/Attach to and initialize + * shared memory hash table. * * Notes: * @@ -324,281 +338,308 @@ ShmemIsValid(unsigned long addr) * table at once. Use SpinAlloc() to create a spinlock * for the structure before creating the structure itself. */ -HTAB * -ShmemInitHash(char *name, /* table string name for binding */ - long init_size, /* initial size */ - long max_size, /* max size of the table */ - HASHCTL *infoP, /* info about key and bucket size */ - int hash_flags) /* info about infoP */ +HTAB * +ShmemInitHash(char *name, /* table string name for binding */ + long init_size, /* initial size */ + long max_size, /* max size of the table */ + HASHCTL * infoP, /* info about key and bucket size */ + int hash_flags) /* info about infoP */ { - bool found; - long * location; - - /* shared memory hash tables have a fixed max size so that the - * control structures don't try to grow. The segbase is for - * calculating pointer values. The shared memory allocator - * must be specified. - */ - infoP->segbase = (long *) ShmemBase; - infoP->alloc = ShmemAlloc; - infoP->max_size = max_size; - hash_flags |= HASH_SHARED_MEM; - - /* look it up in the binding table */ - location = - ShmemInitStruct(name,my_log2(max_size) + sizeof(HHDR),&found); - - /* binding table is corrupted. Let someone else give the - * error message since they have more information - */ - if (location == NULL) { - return(0); - } - - /* it already exists, attach to it rather than allocate and - * initialize new space - */ - if (found) { - hash_flags |= HASH_ATTACH; - } - - /* these structures were allocated or bound in ShmemInitStruct */ - /* control information and parameters */ - infoP->hctl = (long *) location; - /* directory for hash lookup */ - infoP->dir = (long *) (location + sizeof(HHDR)); - - return(hash_create(init_size, infoP, hash_flags));; + bool found; + long *location; + + /* + * shared memory hash tables have a fixed max size so that the control + * structures don't try to grow. The segbase is for calculating + * pointer values. The shared memory allocator must be specified. + */ + infoP->segbase = (long *) ShmemBase; + infoP->alloc = ShmemAlloc; + infoP->max_size = max_size; + hash_flags |= HASH_SHARED_MEM; + + /* look it up in the binding table */ + location = + ShmemInitStruct(name, my_log2(max_size) + sizeof(HHDR), &found); + + /* + * binding table is corrupted. Let someone else give the error + * message since they have more information + */ + if (location == NULL) + { + return (0); + } + + /* + * it already exists, attach to it rather than allocate and initialize + * new space + */ + if (found) + { + hash_flags |= HASH_ATTACH; + } + + /* these structures were allocated or bound in ShmemInitStruct */ + /* control information and parameters */ + infoP->hctl = (long *) location; + /* directory for hash lookup */ + infoP->dir = (long *) (location + sizeof(HHDR)); + + return (hash_create(init_size, infoP, hash_flags));; } /* * ShmemPIDLookup -- lookup process data structure using process id * * Returns: TRUE if no error. locationPtr is initialized if PID is - * found in the binding table. + * found in the binding table. * * NOTES: - * only information about success or failure is the value of - * locationPtr. + * only information about success or failure is the value of + * locationPtr. */ bool -ShmemPIDLookup(int pid, SHMEM_OFFSET* locationPtr) +ShmemPIDLookup(int pid, SHMEM_OFFSET * locationPtr) { - BindingEnt * result,item; - bool found; - - Assert (BindingTable); - memset(item.key, 0, BTABLE_KEYSIZE); - sprintf(item.key,"PID %d",pid); - - SpinAcquire(BindingLock); - result = (BindingEnt *) - hash_search(BindingTable,(char *) &item, HASH_ENTER, &found); - - if (! result) { - + BindingEnt *result, + item; + bool found; + + Assert(BindingTable); + memset(item.key, 0, BTABLE_KEYSIZE); + sprintf(item.key, "PID %d", pid); + + SpinAcquire(BindingLock); + result = (BindingEnt *) + hash_search(BindingTable, (char *) &item, HASH_ENTER, &found); + + if (!result) + { + + SpinRelease(BindingLock); + elog(WARN, "ShmemInitPID: BindingTable corrupted"); + return (FALSE); + + } + + if (found) + { + *locationPtr = result->location; + } + else + { + result->location = *locationPtr; + } + SpinRelease(BindingLock); - elog(WARN,"ShmemInitPID: BindingTable corrupted"); - return(FALSE); - - } - - if (found) { - *locationPtr = result->location; - } else { - result->location = *locationPtr; - } - - SpinRelease(BindingLock); - return (TRUE); + return (TRUE); } /* * ShmemPIDDestroy -- destroy binding table entry for process - * using process id + * using process id * * Returns: offset of the process struct in shared memory or - * INVALID_OFFSET if not found. + * INVALID_OFFSET if not found. * * Side Effect: removes the entry from the binding table */ SHMEM_OFFSET ShmemPIDDestroy(int pid) { - BindingEnt * result,item; - bool found; - SHMEM_OFFSET location = 0; - - Assert(BindingTable); - - memset(item.key, 0, BTABLE_KEYSIZE); - sprintf(item.key,"PID %d",pid); - - SpinAcquire(BindingLock); - result = (BindingEnt *) - hash_search(BindingTable,(char *) &item, HASH_REMOVE, &found); - - if (found) - location = result->location; - SpinRelease(BindingLock); - - if (! result) { - - elog(WARN,"ShmemPIDDestroy: PID table corrupted"); - return(INVALID_OFFSET); - - } - - if (found) - return (location); - else { - return(INVALID_OFFSET); - } + BindingEnt *result, + item; + bool found; + SHMEM_OFFSET location = 0; + + Assert(BindingTable); + + memset(item.key, 0, BTABLE_KEYSIZE); + sprintf(item.key, "PID %d", pid); + + SpinAcquire(BindingLock); + result = (BindingEnt *) + hash_search(BindingTable, (char *) &item, HASH_REMOVE, &found); + + if (found) + location = result->location; + SpinRelease(BindingLock); + + if (!result) + { + + elog(WARN, "ShmemPIDDestroy: PID table corrupted"); + return (INVALID_OFFSET); + + } + + if (found) + return (location); + else + { + return (INVALID_OFFSET); + } } /* * ShmemInitStruct -- Create/attach to a structure in shared - * memory. + * memory. * - * This is called during initialization to find or allocate - * a data structure in shared memory. If no other processes - * have created the structure, this routine allocates space - * for it. If it exists already, a pointer to the existing - * table is returned. + * This is called during initialization to find or allocate + * a data structure in shared memory. If no other processes + * have created the structure, this routine allocates space + * for it. If it exists already, a pointer to the existing + * table is returned. * - * Returns: real pointer to the object. FoundPtr is TRUE if - * the object is already in the binding table (hence, already - * initialized). + * Returns: real pointer to the object. FoundPtr is TRUE if + * the object is already in the binding table (hence, already + * initialized). */ -long * -ShmemInitStruct(char *name, unsigned long size, bool *foundPtr) +long * +ShmemInitStruct(char *name, unsigned long size, bool * foundPtr) { - BindingEnt * result,item; - long * structPtr; - - strncpy(item.key,name,BTABLE_KEYSIZE); - item.location = BAD_LOCATION; - - SpinAcquire(BindingLock); - - if (! BindingTable) { - /* Assert() is a macro now. substitutes inside quotes. */ -#ifndef NO_ASSERT_CHECKING - char *strname = "BindingTable"; + BindingEnt *result, + item; + long *structPtr; + + strncpy(item.key, name, BTABLE_KEYSIZE); + item.location = BAD_LOCATION; + + SpinAcquire(BindingLock); + + if (!BindingTable) + { + /* Assert() is a macro now. substitutes inside quotes. */ +#ifndef NO_ASSERT_CHECKING + char *strname = "BindingTable"; + #endif - - /* If the binding table doesnt exist, we fake it. - * - * If we are creating the first binding table, then let - * shmemalloc() allocate the space for a new HTAB. Otherwise, - * find the old one and return that. Notice that the - * BindingLock is held until the binding table has been completely - * initialized. - */ - Assert (! strcmp(name,strname)) ; - if (ShmemBootstrap) { - /* in POSTMASTER/Single process */ - - *foundPtr = FALSE; - return((long *)ShmemAlloc(size)); - - } else { - Assert (ShmemBindingTabOffset); - - *foundPtr = TRUE; - return((long *)MAKE_PTR(*ShmemBindingTabOffset)); + + /* + * If the binding table doesnt exist, we fake it. + * + * If we are creating the first binding table, then let shmemalloc() + * allocate the space for a new HTAB. Otherwise, find the old one + * and return that. Notice that the BindingLock is held until the + * binding table has been completely initialized. + */ + Assert(!strcmp(name, strname)); + if (ShmemBootstrap) + { + /* in POSTMASTER/Single process */ + + *foundPtr = FALSE; + return ((long *) ShmemAlloc(size)); + + } + else + { + Assert(ShmemBindingTabOffset); + + *foundPtr = TRUE; + return ((long *) MAKE_PTR(*ShmemBindingTabOffset)); + } + + } - - - } else { - /* look it up in the bindint table */ - result = (BindingEnt *) - hash_search(BindingTable,(char *) &item,HASH_ENTER, foundPtr); - } - - if (! result) { - - SpinRelease(BindingLock); - - elog(WARN,"ShmemInitStruct: Binding Table corrupted"); - return(NULL); - - } else if (*foundPtr) { - /* - * Structure is in the binding table so someone else has allocated - * it already. The size better be the same as the size we are - * trying to initialize to or there is a name conflict (or worse). - */ - if (result->size != size) { - SpinRelease(BindingLock); - - elog(NOTICE,"ShmemInitStruct: BindingTable entry size is wrong"); - /* let caller print its message too */ - return(NULL); + else + { + /* look it up in the bindint table */ + result = (BindingEnt *) + hash_search(BindingTable, (char *) &item, HASH_ENTER, foundPtr); + } + + if (!result) + { + + SpinRelease(BindingLock); + + elog(WARN, "ShmemInitStruct: Binding Table corrupted"); + return (NULL); + } - structPtr = (long *)MAKE_PTR(result->location); - } else { - - /* It isn't in the table yet. allocate and initialize it */ - structPtr = ShmemAlloc((long)size); - if (! structPtr) { - /* out of memory */ - Assert (BindingTable); - hash_search(BindingTable,(char *) &item,HASH_REMOVE, foundPtr); - SpinRelease(BindingLock); - *foundPtr = FALSE; - - elog(NOTICE,"ShmemInitStruct: cannot allocate '%s'", - name); - return(NULL); - } - result->size = size; - result->location = MAKE_OFFSET(structPtr); - } - Assert (ShmemIsValid((unsigned long)structPtr)); - - SpinRelease(BindingLock); - return(structPtr); + else if (*foundPtr) + { + + /* + * Structure is in the binding table so someone else has allocated + * it already. The size better be the same as the size we are + * trying to initialize to or there is a name conflict (or worse). + */ + if (result->size != size) + { + SpinRelease(BindingLock); + + elog(NOTICE, "ShmemInitStruct: BindingTable entry size is wrong"); + /* let caller print its message too */ + return (NULL); + } + structPtr = (long *) MAKE_PTR(result->location); + } + else + { + + /* It isn't in the table yet. allocate and initialize it */ + structPtr = ShmemAlloc((long) size); + if (!structPtr) + { + /* out of memory */ + Assert(BindingTable); + hash_search(BindingTable, (char *) &item, HASH_REMOVE, foundPtr); + SpinRelease(BindingLock); + *foundPtr = FALSE; + + elog(NOTICE, "ShmemInitStruct: cannot allocate '%s'", + name); + return (NULL); + } + result->size = size; + result->location = MAKE_OFFSET(structPtr); + } + Assert(ShmemIsValid((unsigned long) structPtr)); + + SpinRelease(BindingLock); + return (structPtr); } /* * TransactionIdIsInProgress -- is given transaction running by some backend * - * Strange place for this func, but we have to lookup process data structures + * Strange place for this func, but we have to lookup process data structures * for all running backends. - vadim 11/26/96 */ bool -TransactionIdIsInProgress (TransactionId xid) +TransactionIdIsInProgress(TransactionId xid) { - BindingEnt *result; - PROC *proc; - - Assert (BindingTable); - - SpinAcquire(BindingLock); - - hash_seq ((HTAB *)NULL); - while ( (result = (BindingEnt *) hash_seq (BindingTable)) != NULL ) - { - if ( result == (BindingEnt *) TRUE ) - { - SpinRelease(BindingLock); - return (false); - } - if ( result->location == INVALID_OFFSET || - strncmp (result->key, "PID ", 4) != 0 ) - continue; - proc = (PROC *) MAKE_PTR (result->location); - if ( proc->xid == xid ) - { - SpinRelease(BindingLock); - return (true); + BindingEnt *result; + PROC *proc; + + Assert(BindingTable); + + SpinAcquire(BindingLock); + + hash_seq((HTAB *) NULL); + while ((result = (BindingEnt *) hash_seq(BindingTable)) != NULL) + { + if (result == (BindingEnt *) TRUE) + { + SpinRelease(BindingLock); + return (false); + } + if (result->location == INVALID_OFFSET || + strncmp(result->key, "PID ", 4) != 0) + continue; + proc = (PROC *) MAKE_PTR(result->location); + if (proc->xid == xid) + { + SpinRelease(BindingLock); + return (true); + } } - } - - SpinRelease(BindingLock); - elog (WARN,"TransactionIdIsInProgress: BindingTable corrupted"); - return (false); -} + SpinRelease(BindingLock); + elog(WARN, "TransactionIdIsInProgress: BindingTable corrupted"); + return (false); +} diff --git a/src/backend/storage/ipc/shmqueue.c b/src/backend/storage/ipc/shmqueue.c index f727b5719f5..8080fc70208 100644 --- a/src/backend/storage/ipc/shmqueue.c +++ b/src/backend/storage/ipc/shmqueue.c @@ -1,19 +1,19 @@ /*------------------------------------------------------------------------- * * shmqueue.c-- - * shared memory linked lists + * shared memory linked lists * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmqueue.c,v 1.3 1997/08/19 21:33:06 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmqueue.c,v 1.4 1997/09/07 04:48:42 momjian Exp $ * * NOTES * * Package for managing doubly-linked lists in shared memory. - * The only tricky thing is that SHM_QUEUE will usually be a field - * in a larger record. SHMQueueGetFirst has to return a pointer + * The only tricky thing is that SHM_QUEUE will usually be a field + * in a larger record. SHMQueueGetFirst has to return a pointer * to the record itself instead of a pointer to the SHMQueue field * of the record. It takes an extra pointer and does some extra * pointer arithmetic to do this correctly. @@ -22,178 +22,181 @@ * *------------------------------------------------------------------------- */ -#include <stdio.h> /* for sprintf() */ +#include <stdio.h> /* for sprintf() */ #include "postgres.h" -#include "storage/shmem.h" /* where the declarations go */ +#include "storage/shmem.h" /* where the declarations go */ /*#define SHMQUEUE_DEBUG*/ #ifdef SHMQUEUE_DEBUG -#define SHMQUEUE_DEBUG_DEL /* deletions */ -#define SHMQUEUE_DEBUG_HD /* head inserts */ -#define SHMQUEUE_DEBUG_TL /* tail inserts */ +#define SHMQUEUE_DEBUG_DEL /* deletions */ +#define SHMQUEUE_DEBUG_HD /* head inserts */ +#define SHMQUEUE_DEBUG_TL /* tail inserts */ #define SHMQUEUE_DEBUG_ELOG NOTICE -#endif /* SHMQUEUE_DEBUG */ +#endif /* SHMQUEUE_DEBUG */ /* * ShmemQueueInit -- make the head of a new queue point - * to itself + * to itself */ void -SHMQueueInit(SHM_QUEUE *queue) +SHMQueueInit(SHM_QUEUE * queue) { - Assert(SHM_PTR_VALID(queue)); - (queue)->prev = (queue)->next = MAKE_OFFSET(queue); + Assert(SHM_PTR_VALID(queue)); + (queue)->prev = (queue)->next = MAKE_OFFSET(queue); } /* * SHMQueueIsDetached -- TRUE if element is not currently - * in a queue. + * in a queue. */ #ifdef NOT_USED bool -SHMQueueIsDetached(SHM_QUEUE *queue) +SHMQueueIsDetached(SHM_QUEUE * queue) { - Assert(SHM_PTR_VALID(queue)); - return ((queue)->prev == INVALID_OFFSET); + Assert(SHM_PTR_VALID(queue)); + return ((queue)->prev == INVALID_OFFSET); } + #endif /* * SHMQueueElemInit -- clear an element's links */ void -SHMQueueElemInit(SHM_QUEUE *queue) +SHMQueueElemInit(SHM_QUEUE * queue) { - Assert(SHM_PTR_VALID(queue)); - (queue)->prev = (queue)->next = INVALID_OFFSET; + Assert(SHM_PTR_VALID(queue)); + (queue)->prev = (queue)->next = INVALID_OFFSET; } /* * SHMQueueDelete -- remove an element from the queue and - * close the links + * close the links */ void -SHMQueueDelete(SHM_QUEUE *queue) +SHMQueueDelete(SHM_QUEUE * queue) { - SHM_QUEUE *nextElem = (SHM_QUEUE *) MAKE_PTR((queue)->next); - SHM_QUEUE *prevElem = (SHM_QUEUE *) MAKE_PTR((queue)->prev); - - Assert(SHM_PTR_VALID(queue)); - Assert(SHM_PTR_VALID(nextElem)); - Assert(SHM_PTR_VALID(prevElem)); - + SHM_QUEUE *nextElem = (SHM_QUEUE *) MAKE_PTR((queue)->next); + SHM_QUEUE *prevElem = (SHM_QUEUE *) MAKE_PTR((queue)->prev); + + Assert(SHM_PTR_VALID(queue)); + Assert(SHM_PTR_VALID(nextElem)); + Assert(SHM_PTR_VALID(prevElem)); + #ifdef SHMQUEUE_DEBUG_DEL - dumpQ(queue, "in SHMQueueDelete: begin"); -#endif /* SHMQUEUE_DEBUG_DEL */ - - prevElem->next = (queue)->next; - nextElem->prev = (queue)->prev; - + dumpQ(queue, "in SHMQueueDelete: begin"); +#endif /* SHMQUEUE_DEBUG_DEL */ + + prevElem->next = (queue)->next; + nextElem->prev = (queue)->prev; + #ifdef SHMQUEUE_DEBUG_DEL - dumpQ((SHM_QUEUE *)MAKE_PTR(queue->prev), "in SHMQueueDelete: end"); -#endif /* SHMQUEUE_DEBUG_DEL */ + dumpQ((SHM_QUEUE *) MAKE_PTR(queue->prev), "in SHMQueueDelete: end"); +#endif /* SHMQUEUE_DEBUG_DEL */ } #ifdef SHMQUEUE_DEBUG void -dumpQ(SHM_QUEUE *q, char *s) +dumpQ(SHM_QUEUE * q, char *s) { - char elem[16]; - char buf[1024]; - SHM_QUEUE *start = q; - int count = 0; - - sprintf(buf, "q prevs: %x", MAKE_OFFSET(q)); - q = (SHM_QUEUE *)MAKE_PTR(q->prev); - while (q != start) + char elem[16]; + char buf[1024]; + SHM_QUEUE *start = q; + int count = 0; + + sprintf(buf, "q prevs: %x", MAKE_OFFSET(q)); + q = (SHM_QUEUE *) MAKE_PTR(q->prev); + while (q != start) { - sprintf(elem, "--->%x", MAKE_OFFSET(q)); - strcat(buf, elem); - q = (SHM_QUEUE *)MAKE_PTR(q->prev); - if (q->prev == MAKE_OFFSET(q)) - break; - if (count++ > 40) + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + q = (SHM_QUEUE *) MAKE_PTR(q->prev); + if (q->prev == MAKE_OFFSET(q)) + break; + if (count++ > 40) { - strcat(buf, "BAD PREV QUEUE!!"); - break; + strcat(buf, "BAD PREV QUEUE!!"); + break; } } - sprintf(elem, "--->%x", MAKE_OFFSET(q)); - strcat(buf, elem); - elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf); - - sprintf(buf, "q nexts: %x", MAKE_OFFSET(q)); - count = 0; - q = (SHM_QUEUE *)MAKE_PTR(q->next); - while (q != start) + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf); + + sprintf(buf, "q nexts: %x", MAKE_OFFSET(q)); + count = 0; + q = (SHM_QUEUE *) MAKE_PTR(q->next); + while (q != start) { - sprintf(elem, "--->%x", MAKE_OFFSET(q)); - strcat(buf, elem); - q = (SHM_QUEUE *)MAKE_PTR(q->next); - if (q->next == MAKE_OFFSET(q)) - break; - if (count++ > 10) + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + q = (SHM_QUEUE *) MAKE_PTR(q->next); + if (q->next == MAKE_OFFSET(q)) + break; + if (count++ > 10) { - strcat(buf, "BAD NEXT QUEUE!!"); - break; + strcat(buf, "BAD NEXT QUEUE!!"); + break; } } - sprintf(elem, "--->%x", MAKE_OFFSET(q)); - strcat(buf, elem); - elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf); + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf); } -#endif /* SHMQUEUE_DEBUG */ + +#endif /* SHMQUEUE_DEBUG */ /* * SHMQueueInsertHD -- put elem in queue between the queue head - * and its "prev" element. + * and its "prev" element. */ #ifdef NOT_USED void -SHMQueueInsertHD(SHM_QUEUE *queue, SHM_QUEUE *elem) +SHMQueueInsertHD(SHM_QUEUE * queue, SHM_QUEUE * elem) { - SHM_QUEUE *prevPtr = (SHM_QUEUE *) MAKE_PTR((queue)->prev); - SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem); - - Assert(SHM_PTR_VALID(queue)); - Assert(SHM_PTR_VALID(elem)); - + SHM_QUEUE *prevPtr = (SHM_QUEUE *) MAKE_PTR((queue)->prev); + SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem); + + Assert(SHM_PTR_VALID(queue)); + Assert(SHM_PTR_VALID(elem)); + #ifdef SHMQUEUE_DEBUG_HD - dumpQ(queue, "in SHMQueueInsertHD: begin"); -#endif /* SHMQUEUE_DEBUG_HD */ - - (elem)->next = prevPtr->next; - (elem)->prev = queue->prev; - (queue)->prev = elemOffset; - prevPtr->next = elemOffset; - + dumpQ(queue, "in SHMQueueInsertHD: begin"); +#endif /* SHMQUEUE_DEBUG_HD */ + + (elem)->next = prevPtr->next; + (elem)->prev = queue->prev; + (queue)->prev = elemOffset; + prevPtr->next = elemOffset; + #ifdef SHMQUEUE_DEBUG_HD - dumpQ(queue, "in SHMQueueInsertHD: end"); -#endif /* SHMQUEUE_DEBUG_HD */ + dumpQ(queue, "in SHMQueueInsertHD: end"); +#endif /* SHMQUEUE_DEBUG_HD */ } + #endif void -SHMQueueInsertTL(SHM_QUEUE *queue, SHM_QUEUE *elem) +SHMQueueInsertTL(SHM_QUEUE * queue, SHM_QUEUE * elem) { - SHM_QUEUE *nextPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next); - SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem); - - Assert(SHM_PTR_VALID(queue)); - Assert(SHM_PTR_VALID(elem)); - + SHM_QUEUE *nextPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next); + SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem); + + Assert(SHM_PTR_VALID(queue)); + Assert(SHM_PTR_VALID(elem)); + #ifdef SHMQUEUE_DEBUG_TL - dumpQ(queue, "in SHMQueueInsertTL: begin"); -#endif /* SHMQUEUE_DEBUG_TL */ - - (elem)->prev = nextPtr->prev; - (elem)->next = queue->next; - (queue)->next = elemOffset; - nextPtr->prev = elemOffset; - + dumpQ(queue, "in SHMQueueInsertTL: begin"); +#endif /* SHMQUEUE_DEBUG_TL */ + + (elem)->prev = nextPtr->prev; + (elem)->next = queue->next; + (queue)->next = elemOffset; + nextPtr->prev = elemOffset; + #ifdef SHMQUEUE_DEBUG_TL - dumpQ(queue, "in SHMQueueInsertTL: end"); -#endif /* SHMQUEUE_DEBUG_TL */ + dumpQ(queue, "in SHMQueueInsertTL: end"); +#endif /* SHMQUEUE_DEBUG_TL */ } /* @@ -203,52 +206,51 @@ SHMQueueInsertTL(SHM_QUEUE *queue, SHM_QUEUE *elem) * a larger structure, we want to return a pointer to the * whole structure rather than a pointer to its SHMQueue field. * I.E. struct { - * int stuff; - * SHMQueue elem; - * } ELEMType; + * int stuff; + * SHMQueue elem; + * } ELEMType; * when this element is in a queue (queue->next) is struct.elem. * nextQueue allows us to calculate the offset of the SHMQueue * field in the structure. * * call to SHMQueueFirst should take these parameters: * - * &(queueHead),&firstElem,&(firstElem->next) + * &(queueHead),&firstElem,&(firstElem->next) * * Note that firstElem may well be uninitialized. if firstElem * is initially K, &(firstElem->next) will be K+ the offset to * next. */ void -SHMQueueFirst(SHM_QUEUE *queue, Pointer *nextPtrPtr, SHM_QUEUE *nextQueue) +SHMQueueFirst(SHM_QUEUE * queue, Pointer * nextPtrPtr, SHM_QUEUE * nextQueue) { - SHM_QUEUE *elemPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next); - - Assert(SHM_PTR_VALID(queue)); - *nextPtrPtr = (Pointer) (((unsigned long) *nextPtrPtr) + - ((unsigned long) elemPtr) - ((unsigned long) nextQueue)); - - /* - nextPtrPtr a ptr to a structure linked in the queue - nextQueue is the SHMQueue field of the structure - *nextPtrPtr - nextQueue is 0 minus the offset of the queue - field n the record - elemPtr + (*nextPtrPtr - nexQueue) is the start of the - structure containing elemPtr. - */ + SHM_QUEUE *elemPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next); + + Assert(SHM_PTR_VALID(queue)); + *nextPtrPtr = (Pointer) (((unsigned long) *nextPtrPtr) + + ((unsigned long) elemPtr) - ((unsigned long) nextQueue)); + + /* + * nextPtrPtr a ptr to a structure linked in the queue nextQueue is + * the SHMQueue field of the structure nextPtrPtr - nextQueue is 0 + * minus the offset of the queue field n the record elemPtr + + * (*nextPtrPtr - nexQueue) is the start of the structure containing + * elemPtr. + */ } /* * SHMQueueEmpty -- TRUE if queue head is only element, FALSE otherwise */ bool -SHMQueueEmpty(SHM_QUEUE *queue) +SHMQueueEmpty(SHM_QUEUE * queue) { - Assert(SHM_PTR_VALID(queue)); - - if (queue->prev == MAKE_OFFSET(queue)) + Assert(SHM_PTR_VALID(queue)); + + if (queue->prev == MAKE_OFFSET(queue)) { - Assert(queue->next = MAKE_OFFSET(queue)); - return(TRUE); + Assert(queue->next = MAKE_OFFSET(queue)); + return (TRUE); } - return(FALSE); + return (FALSE); } diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index 5f391669113..af16c8a7196 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -1,17 +1,17 @@ /*------------------------------------------------------------------------- * * sinval.c-- - * POSTGRES shared cache invalidation communication code. + * POSTGRES shared cache invalidation communication code. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.7 1997/08/12 22:53:58 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.8 1997/09/07 04:48:43 momjian Exp $ * *------------------------------------------------------------------------- */ -/* #define INVALIDDEBUG 1 */ +/* #define INVALIDDEBUG 1 */ #include <sys/types.h> @@ -22,150 +22,156 @@ #include "storage/sinvaladt.h" #include "storage/spin.h" -extern SISeg *shmInvalBuffer;/* the shared buffer segment, set by*/ - /* SISegmentAttach() */ -extern BackendId MyBackendId; -extern BackendTag MyBackendTag; +extern SISeg *shmInvalBuffer; /* the shared buffer segment, set by */ + + /* SISegmentAttach() */ +extern BackendId MyBackendId; +extern BackendTag MyBackendTag; SPINLOCK SInvalLock = (SPINLOCK) NULL; /****************************************************************************/ -/* CreateSharedInvalidationState(key) Create a buffer segment */ -/* */ -/* should be called only by the POSTMASTER */ +/* CreateSharedInvalidationState(key) Create a buffer segment */ +/* */ +/* should be called only by the POSTMASTER */ /****************************************************************************/ void CreateSharedInvalidationState(IPCKey key) { - int status; - - /* REMOVED - SISyncKill(IPCKeyGetSIBufferMemorySemaphoreKey(key)); - SISyncInit(IPCKeyGetSIBufferMemorySemaphoreKey(key)); - */ - - /* SInvalLock gets set in spin.c, during spinlock init */ - status = SISegmentInit(true, IPCKeyGetSIBufferMemoryBlock(key)); - - if (status == -1) { - elog(FATAL, "CreateSharedInvalidationState: failed segment init"); - } + int status; + + /* + * REMOVED SISyncKill(IPCKeyGetSIBufferMemorySemaphoreKey(key)); + * SISyncInit(IPCKeyGetSIBufferMemorySemaphoreKey(key)); + */ + + /* SInvalLock gets set in spin.c, during spinlock init */ + status = SISegmentInit(true, IPCKeyGetSIBufferMemoryBlock(key)); + + if (status == -1) + { + elog(FATAL, "CreateSharedInvalidationState: failed segment init"); + } } + /****************************************************************************/ -/* AttachSharedInvalidationState(key) Attach a buffer segment */ -/* */ -/* should be called only by the POSTMASTER */ +/* AttachSharedInvalidationState(key) Attach a buffer segment */ +/* */ +/* should be called only by the POSTMASTER */ /****************************************************************************/ void AttachSharedInvalidationState(IPCKey key) { - int status; - - if (key == PrivateIPCKey) { - CreateSharedInvalidationState(key); - return; - } - /* SInvalLock gets set in spin.c, during spinlock init */ - status = SISegmentInit(false, IPCKeyGetSIBufferMemoryBlock(key)); - - if (status == -1) { - elog(FATAL, "AttachSharedInvalidationState: failed segment init"); - } + int status; + + if (key == PrivateIPCKey) + { + CreateSharedInvalidationState(key); + return; + } + /* SInvalLock gets set in spin.c, during spinlock init */ + status = SISegmentInit(false, IPCKeyGetSIBufferMemoryBlock(key)); + + if (status == -1) + { + elog(FATAL, "AttachSharedInvalidationState: failed segment init"); + } } void InitSharedInvalidationState(void) { - SpinAcquire(SInvalLock); - if (!SIBackendInit(shmInvalBuffer)) + SpinAcquire(SInvalLock); + if (!SIBackendInit(shmInvalBuffer)) { - SpinRelease(SInvalLock); - elog(FATAL, "Backend cache invalidation initialization failed"); + SpinRelease(SInvalLock); + elog(FATAL, "Backend cache invalidation initialization failed"); } - SpinRelease(SInvalLock); + SpinRelease(SInvalLock); } /* * RegisterSharedInvalid -- - * Returns a new local cache invalidation state containing a new entry. + * Returns a new local cache invalidation state containing a new entry. * * Note: - * Assumes hash index is valid. - * Assumes item pointer is valid. + * Assumes hash index is valid. + * Assumes item pointer is valid. */ /****************************************************************************/ -/* RegisterSharedInvalid(cacheId, hashIndex, pointer) */ -/* */ -/* register a message in the buffer */ -/* should be called by a backend */ +/* RegisterSharedInvalid(cacheId, hashIndex, pointer) */ +/* */ +/* register a message in the buffer */ +/* should be called by a backend */ /****************************************************************************/ void -RegisterSharedInvalid(int cacheId, /* XXX */ - Index hashIndex, - ItemPointer pointer) +RegisterSharedInvalid(int cacheId, /* XXX */ + Index hashIndex, + ItemPointer pointer) { - SharedInvalidData newInvalid; - - /* - * This code has been hacked to accept two types of messages. This might - * be treated more generally in the future. - * - * (1) - * cacheId= system cache id - * hashIndex= system cache hash index for a (possibly) cached tuple - * pointer= pointer of (possibly) cached tuple - * - * (2) - * cacheId= special non-syscache id - * hashIndex= object id contained in (possibly) cached relation descriptor - * pointer= null - */ - - newInvalid.cacheId = cacheId; - newInvalid.hashIndex = hashIndex; - - if (ItemPointerIsValid(pointer)) { - ItemPointerCopy(pointer, &newInvalid.pointerData); - } else { - ItemPointerSetInvalid(&newInvalid.pointerData); - } - - SpinAcquire(SInvalLock); - if (!SISetDataEntry(shmInvalBuffer, &newInvalid)) { - /* buffer full */ - /* release a message, mark process cache states to be invalid */ - SISetProcStateInvalid(shmInvalBuffer); - - if (!SIDelDataEntry(shmInvalBuffer)) { - /* inconsistent buffer state -- shd never happen */ - SpinRelease(SInvalLock); - elog(FATAL, "RegisterSharedInvalid: inconsistent buffer state"); - } - - /* write again */ - SISetDataEntry(shmInvalBuffer, &newInvalid); - } - SpinRelease(SInvalLock); + SharedInvalidData newInvalid; + + /* + * This code has been hacked to accept two types of messages. This + * might be treated more generally in the future. + * + * (1) cacheId= system cache id hashIndex= system cache hash index for a + * (possibly) cached tuple pointer= pointer of (possibly) cached tuple + * + * (2) cacheId= special non-syscache id hashIndex= object id contained in + * (possibly) cached relation descriptor pointer= null + */ + + newInvalid.cacheId = cacheId; + newInvalid.hashIndex = hashIndex; + + if (ItemPointerIsValid(pointer)) + { + ItemPointerCopy(pointer, &newInvalid.pointerData); + } + else + { + ItemPointerSetInvalid(&newInvalid.pointerData); + } + + SpinAcquire(SInvalLock); + if (!SISetDataEntry(shmInvalBuffer, &newInvalid)) + { + /* buffer full */ + /* release a message, mark process cache states to be invalid */ + SISetProcStateInvalid(shmInvalBuffer); + + if (!SIDelDataEntry(shmInvalBuffer)) + { + /* inconsistent buffer state -- shd never happen */ + SpinRelease(SInvalLock); + elog(FATAL, "RegisterSharedInvalid: inconsistent buffer state"); + } + + /* write again */ + SISetDataEntry(shmInvalBuffer, &newInvalid); + } + SpinRelease(SInvalLock); } /* * InvalidateSharedInvalid -- - * Processes all entries in a shared cache invalidation state. + * Processes all entries in a shared cache invalidation state. */ /****************************************************************************/ -/* InvalidateSharedInvalid(invalFunction, resetFunction) */ -/* */ -/* invalidate a message in the buffer (read and clean up) */ -/* should be called by a backend */ +/* InvalidateSharedInvalid(invalFunction, resetFunction) */ +/* */ +/* invalidate a message in the buffer (read and clean up) */ +/* should be called by a backend */ /****************************************************************************/ void -InvalidateSharedInvalid(void (*invalFunction)(), - void (*resetFunction)()) + InvalidateSharedInvalid(void (*invalFunction) (), + void (*resetFunction) ()) { - SpinAcquire(SInvalLock); - SIReadEntryData(shmInvalBuffer, MyBackendId, - invalFunction, resetFunction); - - SIDelExpiredDataEntries(shmInvalBuffer); - SpinRelease(SInvalLock); + SpinAcquire(SInvalLock); + SIReadEntryData(shmInvalBuffer, MyBackendId, + invalFunction, resetFunction); + + SIDelExpiredDataEntries(shmInvalBuffer); + SpinRelease(SInvalLock); } diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 9400e872617..43c4d7c0ac2 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * sinvaladt.c-- - * POSTGRES shared cache invalidation segment definitions. + * POSTGRES shared cache invalidation segment definitions. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.5 1997/08/12 22:54:01 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.6 1997/09/07 04:48:44 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -22,420 +22,445 @@ #include "utils/palloc.h" /* ---------------- - * global variable notes + * global variable notes * - * SharedInvalidationSemaphore + * SharedInvalidationSemaphore * - * shmInvalBuffer - * the shared buffer segment, set by SISegmentAttach() + * shmInvalBuffer + * the shared buffer segment, set by SISegmentAttach() * - * MyBackendId - * might be removed later, used only for - * debugging in debug routines (end of file) + * MyBackendId + * might be removed later, used only for + * debugging in debug routines (end of file) * - * SIDbId - * identification of buffer (disappears) + * SIDbId + * identification of buffer (disappears) * - * SIRelId \ - * SIDummyOid \ identification of buffer - * SIXidData / - * SIXid / + * SIRelId \ + * SIDummyOid \ identification of buffer + * SIXidData / + * SIXid / * - * XXX This file really needs to be cleaned up. We switched to using - * spinlocks to protect critical sections (as opposed to using fake - * relations and going through the lock manager) and some of the old - * cruft was 'ifdef'ed out, while other parts (now unused) are still - * compiled into the system. -mer 5/24/92 + * XXX This file really needs to be cleaned up. We switched to using + * spinlocks to protect critical sections (as opposed to using fake + * relations and going through the lock manager) and some of the old + * cruft was 'ifdef'ed out, while other parts (now unused) are still + * compiled into the system. -mer 5/24/92 * ---------------- */ #ifdef HAS_TEST_AND_SET -int SharedInvalidationLockId; +int SharedInvalidationLockId; + #else IpcSemaphoreId SharedInvalidationSemaphore; + #endif -SISeg *shmInvalBuffer; +SISeg *shmInvalBuffer; extern BackendId MyBackendId; -static void CleanupInvalidationState(int status, SISeg *segInOutP); -static BackendId SIAssignBackendId(SISeg *segInOutP, BackendTag backendTag); -static int SIGetNumEntries(SISeg *segP); +static void CleanupInvalidationState(int status, SISeg * segInOutP); +static BackendId SIAssignBackendId(SISeg * segInOutP, BackendTag backendTag); +static int SIGetNumEntries(SISeg * segP); /************************************************************************/ /* SISetActiveProcess(segP, backendId) set the backend status active */ -/* should be called only by the postmaster when creating a backend */ +/* should be called only by the postmaster when creating a backend */ /************************************************************************/ /* XXX I suspect that the segP parameter is extraneous. -hirohama */ static void -SISetActiveProcess(SISeg *segInOutP, BackendId backendId) +SISetActiveProcess(SISeg * segInOutP, BackendId backendId) { - /* mark all messages as read */ - - /* Assert(segP->procState[backendId - 1].tag == MyBackendTag); */ - - segInOutP->procState[backendId - 1].resetState = false; - segInOutP->procState[backendId - 1].limit = SIGetNumEntries(segInOutP); + /* mark all messages as read */ + + /* Assert(segP->procState[backendId - 1].tag == MyBackendTag); */ + + segInOutP->procState[backendId - 1].resetState = false; + segInOutP->procState[backendId - 1].limit = SIGetNumEntries(segInOutP); } /****************************************************************************/ -/* SIBackendInit() initializes a backend to operate on the buffer */ +/* SIBackendInit() initializes a backend to operate on the buffer */ /****************************************************************************/ int -SIBackendInit(SISeg *segInOutP) -{ - LRelId LtCreateRelId(); - TransactionId LMITransactionIdCopy(); - - Assert(MyBackendTag > 0); - - MyBackendId = SIAssignBackendId(segInOutP, MyBackendTag); - if (MyBackendId == InvalidBackendTag) - return 0; - +SIBackendInit(SISeg * segInOutP) +{ + LRelId LtCreateRelId(); + TransactionId LMITransactionIdCopy(); + + Assert(MyBackendTag > 0); + + MyBackendId = SIAssignBackendId(segInOutP, MyBackendTag); + if (MyBackendId == InvalidBackendTag) + return 0; + #ifdef INVALIDDEBUG - elog(DEBUG, "SIBackendInit: backend tag %d; backend id %d.", - MyBackendTag, MyBackendId); -#endif /* INVALIDDEBUG */ - - SISetActiveProcess(segInOutP, MyBackendId); - on_exitpg(CleanupInvalidationState, (caddr_t)segInOutP); - return 1; + elog(DEBUG, "SIBackendInit: backend tag %d; backend id %d.", + MyBackendTag, MyBackendId); +#endif /* INVALIDDEBUG */ + + SISetActiveProcess(segInOutP, MyBackendId); + on_exitpg(CleanupInvalidationState, (caddr_t) segInOutP); + return 1; } /* ---------------- - * SIAssignBackendId + * SIAssignBackendId * ---------------- */ -static BackendId -SIAssignBackendId(SISeg *segInOutP, BackendTag backendTag) -{ - Index index; - ProcState *stateP; - - stateP = NULL; - - for (index = 0; index < MaxBackendId; index += 1) { - if (segInOutP->procState[index].tag == InvalidBackendTag || - segInOutP->procState[index].tag == backendTag) - { - stateP = &segInOutP->procState[index]; - break; - } - - if (!PointerIsValid(stateP) || - (segInOutP->procState[index].resetState && - (!stateP->resetState || - stateP->tag < backendTag)) || - (!stateP->resetState && - (segInOutP->procState[index].limit < - stateP->limit || - stateP->tag < backendTag))) - { - stateP = &segInOutP->procState[index]; - } - } - - /* verify that all "procState" entries checked for matching tags */ - - for (index += 1; index < MaxBackendId; index += 1) { - if (segInOutP->procState[index].tag == backendTag) { - elog (FATAL, "SIAssignBackendId: tag %d found twice", - backendTag); +static BackendId +SIAssignBackendId(SISeg * segInOutP, BackendTag backendTag) +{ + Index index; + ProcState *stateP; + + stateP = NULL; + + for (index = 0; index < MaxBackendId; index += 1) + { + if (segInOutP->procState[index].tag == InvalidBackendTag || + segInOutP->procState[index].tag == backendTag) + { + stateP = &segInOutP->procState[index]; + break; + } + + if (!PointerIsValid(stateP) || + (segInOutP->procState[index].resetState && + (!stateP->resetState || + stateP->tag < backendTag)) || + (!stateP->resetState && + (segInOutP->procState[index].limit < + stateP->limit || + stateP->tag < backendTag))) + { + stateP = &segInOutP->procState[index]; + } + } + + /* verify that all "procState" entries checked for matching tags */ + + for (index += 1; index < MaxBackendId; index += 1) + { + if (segInOutP->procState[index].tag == backendTag) + { + elog(FATAL, "SIAssignBackendId: tag %d found twice", + backendTag); + } } - } - - if (stateP->tag != InvalidBackendTag) { - if (stateP->tag == backendTag) { - elog(NOTICE, "SIAssignBackendId: reusing tag %d", - backendTag); - } else { - elog(NOTICE, - "SIAssignBackendId: discarding tag %d", - stateP->tag); - return InvalidBackendTag; + + if (stateP->tag != InvalidBackendTag) + { + if (stateP->tag == backendTag) + { + elog(NOTICE, "SIAssignBackendId: reusing tag %d", + backendTag); + } + else + { + elog(NOTICE, + "SIAssignBackendId: discarding tag %d", + stateP->tag); + return InvalidBackendTag; + } } - } - - stateP->tag = backendTag; - - return (1 + stateP - &segInOutP->procState[0]); + + stateP->tag = backendTag; + + return (1 + stateP - &segInOutP->procState[0]); } /************************************************************************/ -/* The following function should be called only by the postmaster !! */ +/* The following function should be called only by the postmaster !! */ /************************************************************************/ /************************************************************************/ -/* SISetDeadProcess(segP, backendId) set the backend status DEAD */ -/* should be called only by the postmaster when a backend died */ +/* SISetDeadProcess(segP, backendId) set the backend status DEAD */ +/* should be called only by the postmaster when a backend died */ /************************************************************************/ static void -SISetDeadProcess(SISeg *segP, int backendId) +SISetDeadProcess(SISeg * segP, int backendId) { - /* XXX call me.... */ - - segP->procState[backendId - 1].resetState = false; - segP->procState[backendId - 1].limit = -1; - segP->procState[backendId - 1].tag = InvalidBackendTag; + /* XXX call me.... */ + + segP->procState[backendId - 1].resetState = false; + segP->procState[backendId - 1].limit = -1; + segP->procState[backendId - 1].tag = InvalidBackendTag; } /* * CleanupInvalidationState -- * Note: - * This is a temporary hack. ExitBackend should call this instead - * of exit (via on_exitpg). + * This is a temporary hack. ExitBackend should call this instead + * of exit (via on_exitpg). */ static void -CleanupInvalidationState(int status, /* XXX */ - SISeg *segInOutP) /* XXX style */ +CleanupInvalidationState(int status, /* XXX */ + SISeg * segInOutP) /* XXX style */ { - Assert(PointerIsValid(segInOutP)); - - SISetDeadProcess(segInOutP, MyBackendId); + Assert(PointerIsValid(segInOutP)); + + SISetDeadProcess(segInOutP, MyBackendId); } /************************************************************************/ -/* SIComputeSize() - retuns the size of a buffer segment */ +/* SIComputeSize() - retuns the size of a buffer segment */ /************************************************************************/ static SISegOffsets * SIComputeSize(int *segSize) { - int A, B, a, b, totalSize; - SISegOffsets *oP; - - A = 0; - a = SizeSISeg; /* offset to first data entry */ - b = SizeOfOneSISegEntry * MAXNUMMESSAGES; - B = A + a + b; - totalSize = B - A; - *segSize = totalSize; - - oP = (SISegOffsets *) palloc(sizeof(SISegOffsets)); - oP->startSegment = A; - oP->offsetToFirstEntry = a; /* relatiove to A */ - oP->offsetToEndOfSegemnt = totalSize; /* relative to A */ - return(oP); + int A, + B, + a, + b, + totalSize; + SISegOffsets *oP; + + A = 0; + a = SizeSISeg; /* offset to first data entry */ + b = SizeOfOneSISegEntry * MAXNUMMESSAGES; + B = A + a + b; + totalSize = B - A; + *segSize = totalSize; + + oP = (SISegOffsets *) palloc(sizeof(SISegOffsets)); + oP->startSegment = A; + oP->offsetToFirstEntry = a; /* relatiove to A */ + oP->offsetToEndOfSegemnt = totalSize; /* relative to A */ + return (oP); } /************************************************************************/ -/* SISetStartEntrySection(segP, offset) - sets the offset */ +/* SISetStartEntrySection(segP, offset) - sets the offset */ /************************************************************************/ static void -SISetStartEntrySection(SISeg *segP, Offset offset) +SISetStartEntrySection(SISeg * segP, Offset offset) { - segP->startEntrySection = offset; + segP->startEntrySection = offset; } /************************************************************************/ -/* SIGetStartEntrySection(segP) - returnss the offset */ +/* SIGetStartEntrySection(segP) - returnss the offset */ /************************************************************************/ -static Offset -SIGetStartEntrySection(SISeg *segP) +static Offset +SIGetStartEntrySection(SISeg * segP) { - return(segP->startEntrySection); + return (segP->startEntrySection); } /************************************************************************/ -/* SISetEndEntrySection(segP, offset) - sets the offset */ +/* SISetEndEntrySection(segP, offset) - sets the offset */ /************************************************************************/ static void -SISetEndEntrySection(SISeg *segP, Offset offset) +SISetEndEntrySection(SISeg * segP, Offset offset) { - segP->endEntrySection = offset; + segP->endEntrySection = offset; } /************************************************************************/ -/* SISetEndEntryChain(segP, offset) - sets the offset */ +/* SISetEndEntryChain(segP, offset) - sets the offset */ /************************************************************************/ static void -SISetEndEntryChain(SISeg *segP, Offset offset) +SISetEndEntryChain(SISeg * segP, Offset offset) { - segP->endEntryChain = offset; + segP->endEntryChain = offset; } /************************************************************************/ -/* SIGetEndEntryChain(segP) - returnss the offset */ +/* SIGetEndEntryChain(segP) - returnss the offset */ /************************************************************************/ -static Offset -SIGetEndEntryChain(SISeg *segP) +static Offset +SIGetEndEntryChain(SISeg * segP) { - return(segP->endEntryChain); + return (segP->endEntryChain); } /************************************************************************/ -/* SISetStartEntryChain(segP, offset) - sets the offset */ +/* SISetStartEntryChain(segP, offset) - sets the offset */ /************************************************************************/ static void -SISetStartEntryChain(SISeg *segP, Offset offset) +SISetStartEntryChain(SISeg * segP, Offset offset) { - segP->startEntryChain = offset; + segP->startEntryChain = offset; } /************************************************************************/ -/* SIGetStartEntryChain(segP) - returns the offset */ +/* SIGetStartEntryChain(segP) - returns the offset */ /************************************************************************/ -static Offset -SIGetStartEntryChain(SISeg *segP) +static Offset +SIGetStartEntryChain(SISeg * segP) { - return(segP->startEntryChain); + return (segP->startEntryChain); } /************************************************************************/ -/* SISetNumEntries(segP, num) sets the current nuber of entries */ +/* SISetNumEntries(segP, num) sets the current nuber of entries */ /************************************************************************/ -static bool -SISetNumEntries(SISeg *segP, int num) +static bool +SISetNumEntries(SISeg * segP, int num) { - if ( num <= MAXNUMMESSAGES) { - segP->numEntries = num; - return(true); - } else { - return(false); /* table full */ - } + if (num <= MAXNUMMESSAGES) + { + segP->numEntries = num; + return (true); + } + else + { + return (false); /* table full */ + } } /************************************************************************/ -/* SIGetNumEntries(segP) - returns the current nuber of entries */ +/* SIGetNumEntries(segP) - returns the current nuber of entries */ /************************************************************************/ static int -SIGetNumEntries(SISeg *segP) +SIGetNumEntries(SISeg * segP) { - return(segP->numEntries); + return (segP->numEntries); } /************************************************************************/ -/* SISetMaxNumEntries(segP, num) sets the maximal number of entries */ +/* SISetMaxNumEntries(segP, num) sets the maximal number of entries */ /************************************************************************/ -static bool -SISetMaxNumEntries(SISeg *segP, int num) +static bool +SISetMaxNumEntries(SISeg * segP, int num) { - if ( num <= MAXNUMMESSAGES) { - segP->maxNumEntries = num; - return(true); - } else { - return(false); /* wrong number */ - } + if (num <= MAXNUMMESSAGES) + { + segP->maxNumEntries = num; + return (true); + } + else + { + return (false); /* wrong number */ + } } /************************************************************************/ -/* SIGetProcStateLimit(segP, i) returns the limit of read messages */ +/* SIGetProcStateLimit(segP, i) returns the limit of read messages */ /************************************************************************/ static int -SIGetProcStateLimit(SISeg *segP, int i) +SIGetProcStateLimit(SISeg * segP, int i) { - return(segP->procState[i].limit); + return (segP->procState[i].limit); } /************************************************************************/ -/* SIIncNumEntries(segP, num) increments the current nuber of entries */ +/* SIIncNumEntries(segP, num) increments the current nuber of entries */ /************************************************************************/ -static bool -SIIncNumEntries(SISeg *segP, int num) +static bool +SIIncNumEntries(SISeg * segP, int num) { - if ((segP->numEntries + num) <= MAXNUMMESSAGES) { - segP->numEntries = segP->numEntries + num; - return(true); - } else { - return(false); /* table full */ - } + if ((segP->numEntries + num) <= MAXNUMMESSAGES) + { + segP->numEntries = segP->numEntries + num; + return (true); + } + else + { + return (false); /* table full */ + } } /************************************************************************/ -/* SIDecNumEntries(segP, num) decrements the current nuber of entries */ +/* SIDecNumEntries(segP, num) decrements the current nuber of entries */ /************************************************************************/ -static bool -SIDecNumEntries(SISeg *segP, int num) +static bool +SIDecNumEntries(SISeg * segP, int num) { - if ((segP->numEntries - num) >= 0) { - segP->numEntries = segP->numEntries - num; - return(true); - } else { - return(false); /* not enough entries in table */ - } + if ((segP->numEntries - num) >= 0) + { + segP->numEntries = segP->numEntries - num; + return (true); + } + else + { + return (false); /* not enough entries in table */ + } } /************************************************************************/ -/* SISetStartFreeSpace(segP, offset) - sets the offset */ +/* SISetStartFreeSpace(segP, offset) - sets the offset */ /************************************************************************/ static void -SISetStartFreeSpace(SISeg *segP, Offset offset) +SISetStartFreeSpace(SISeg * segP, Offset offset) { - segP->startFreeSpace = offset; + segP->startFreeSpace = offset; } /************************************************************************/ -/* SIGetStartFreeSpace(segP) - returns the offset */ +/* SIGetStartFreeSpace(segP) - returns the offset */ /************************************************************************/ -static Offset -SIGetStartFreeSpace(SISeg *segP) +static Offset +SIGetStartFreeSpace(SISeg * segP) { - return(segP->startFreeSpace); + return (segP->startFreeSpace); } /************************************************************************/ -/* SIGetFirstDataEntry(segP) returns first data entry */ +/* SIGetFirstDataEntry(segP) returns first data entry */ /************************************************************************/ static SISegEntry * -SIGetFirstDataEntry(SISeg *segP) +SIGetFirstDataEntry(SISeg * segP) { - SISegEntry *eP; - Offset startChain; - - startChain = SIGetStartEntryChain(segP); - - if (startChain == InvalidOffset) - return(NULL); - - eP = (SISegEntry *) ((Pointer) segP + - SIGetStartEntrySection(segP) + - startChain ); - return(eP); + SISegEntry *eP; + Offset startChain; + + startChain = SIGetStartEntryChain(segP); + + if (startChain == InvalidOffset) + return (NULL); + + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + startChain); + return (eP); } /************************************************************************/ -/* SIGetLastDataEntry(segP) returns last data entry in the chain */ +/* SIGetLastDataEntry(segP) returns last data entry in the chain */ /************************************************************************/ static SISegEntry * -SIGetLastDataEntry(SISeg *segP) +SIGetLastDataEntry(SISeg * segP) { - SISegEntry *eP; - Offset endChain; - - endChain = SIGetEndEntryChain(segP); - - if (endChain == InvalidOffset) - return(NULL); - - eP = (SISegEntry *) ((Pointer) segP + - SIGetStartEntrySection(segP) + - endChain ); - return(eP); + SISegEntry *eP; + Offset endChain; + + endChain = SIGetEndEntryChain(segP); + + if (endChain == InvalidOffset) + return (NULL); + + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + endChain); + return (eP); } /************************************************************************/ -/* SIGetNextDataEntry(segP, offset) returns next data entry */ +/* SIGetNextDataEntry(segP, offset) returns next data entry */ /************************************************************************/ static SISegEntry * -SIGetNextDataEntry(SISeg *segP, Offset offset) +SIGetNextDataEntry(SISeg * segP, Offset offset) { - SISegEntry *eP; - - if (offset == InvalidOffset) - return(NULL); - - eP = (SISegEntry *) ((Pointer) segP + - SIGetStartEntrySection(segP) + - offset); - return(eP); + SISegEntry *eP; + + if (offset == InvalidOffset) + return (NULL); + + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + offset); + return (eP); } @@ -443,352 +468,396 @@ SIGetNextDataEntry(SISeg *segP, Offset offset) /* SIGetNthDataEntry(segP, n) returns the n-th data entry in chain */ /************************************************************************/ static SISegEntry * -SIGetNthDataEntry(SISeg *segP, - int n) /* must range from 1 to MaxMessages */ +SIGetNthDataEntry(SISeg * segP, + int n) /* must range from 1 to MaxMessages */ { - SISegEntry *eP; - int i; - - if (n <= 0) return(NULL); - - eP = SIGetFirstDataEntry(segP); - for (i = 1; i < n; i++) { - /* skip one and get the next */ - eP = SIGetNextDataEntry(segP, eP->next); - } - - return(eP); + SISegEntry *eP; + int i; + + if (n <= 0) + return (NULL); + + eP = SIGetFirstDataEntry(segP); + for (i = 1; i < n; i++) + { + /* skip one and get the next */ + eP = SIGetNextDataEntry(segP, eP->next); + } + + return (eP); } /************************************************************************/ -/* SIEntryOffset(segP, entryP) returns the offset for an pointer */ +/* SIEntryOffset(segP, entryP) returns the offset for an pointer */ /************************************************************************/ -static Offset -SIEntryOffset(SISeg *segP, SISegEntry *entryP) +static Offset +SIEntryOffset(SISeg * segP, SISegEntry * entryP) { - /* relative to B !! */ - return ((Offset) ((Pointer) entryP - - (Pointer) segP - - SIGetStartEntrySection(segP) )); + /* relative to B !! */ + return ((Offset) ((Pointer) entryP - + (Pointer) segP - + SIGetStartEntrySection(segP))); } /************************************************************************/ -/* SISetDataEntry(segP, data) - sets a message in the segemnt */ +/* SISetDataEntry(segP, data) - sets a message in the segemnt */ /************************************************************************/ bool -SISetDataEntry(SISeg *segP, SharedInvalidData *data) -{ - Offset offsetToNewData; - SISegEntry *eP, *lastP; - - if (!SIIncNumEntries(segP, 1)) - return(false); /* no space */ - - /* get a free entry */ - offsetToNewData = SIGetStartFreeSpace(segP); - eP = SIGetNextDataEntry(segP, offsetToNewData); /* it's a free one */ - SISetStartFreeSpace(segP, eP->next); - /* fill it up */ - eP->entryData = *data; - eP->isfree = false; - eP->next = InvalidOffset; - - /* handle insertion point at the end of the chain !!*/ - lastP = SIGetLastDataEntry(segP); - if (lastP == NULL) { - /* there is no chain, insert the first entry */ - SISetStartEntryChain(segP, SIEntryOffset(segP, eP)); - } else { - /* there is a last entry in the chain */ - lastP->next = SIEntryOffset(segP, eP); - } - SISetEndEntryChain(segP, SIEntryOffset(segP, eP)); - return(true); -} - - -/************************************************************************/ -/* SIDecProcLimit(segP, num) decrements all process limits */ +SISetDataEntry(SISeg * segP, SharedInvalidData * data) +{ + Offset offsetToNewData; + SISegEntry *eP, + *lastP; + + if (!SIIncNumEntries(segP, 1)) + return (false); /* no space */ + + /* get a free entry */ + offsetToNewData = SIGetStartFreeSpace(segP); + eP = SIGetNextDataEntry(segP, offsetToNewData); /* it's a free one */ + SISetStartFreeSpace(segP, eP->next); + /* fill it up */ + eP->entryData = *data; + eP->isfree = false; + eP->next = InvalidOffset; + + /* handle insertion point at the end of the chain !! */ + lastP = SIGetLastDataEntry(segP); + if (lastP == NULL) + { + /* there is no chain, insert the first entry */ + SISetStartEntryChain(segP, SIEntryOffset(segP, eP)); + } + else + { + /* there is a last entry in the chain */ + lastP->next = SIEntryOffset(segP, eP); + } + SISetEndEntryChain(segP, SIEntryOffset(segP, eP)); + return (true); +} + + +/************************************************************************/ +/* SIDecProcLimit(segP, num) decrements all process limits */ /************************************************************************/ static void -SIDecProcLimit(SISeg *segP, int num) -{ - int i; - for (i=0; i < MaxBackendId; i++) { - /* decrement only, if there is a limit > 0 */ - if (segP->procState[i].limit > 0) { - segP->procState[i].limit = segP->procState[i].limit - num; - if (segP->procState[i].limit < 0) { - /* limit was not high enough, reset to zero */ - /* negative means it's a dead backend */ - segP->procState[i].limit = 0; - } - } - } +SIDecProcLimit(SISeg * segP, int num) +{ + int i; + + for (i = 0; i < MaxBackendId; i++) + { + /* decrement only, if there is a limit > 0 */ + if (segP->procState[i].limit > 0) + { + segP->procState[i].limit = segP->procState[i].limit - num; + if (segP->procState[i].limit < 0) + { + /* limit was not high enough, reset to zero */ + /* negative means it's a dead backend */ + segP->procState[i].limit = 0; + } + } + } } /************************************************************************/ -/* SIDelDataEntry(segP) - free the FIRST entry */ +/* SIDelDataEntry(segP) - free the FIRST entry */ /************************************************************************/ bool -SIDelDataEntry(SISeg *segP) +SIDelDataEntry(SISeg * segP) { - SISegEntry *e1P; - - if (!SIDecNumEntries(segP, 1)) { - /* no entries in buffer */ - return(false); - } - - e1P = SIGetFirstDataEntry(segP); - SISetStartEntryChain(segP, e1P->next); - if (SIGetStartEntryChain(segP) == InvalidOffset) { - /* it was the last entry */ - SISetEndEntryChain(segP, InvalidOffset); - } - /* free the entry */ - e1P->isfree = true; - e1P->next = SIGetStartFreeSpace(segP); - SISetStartFreeSpace(segP, SIEntryOffset(segP, e1P)); - SIDecProcLimit(segP, 1); - return(true); + SISegEntry *e1P; + + if (!SIDecNumEntries(segP, 1)) + { + /* no entries in buffer */ + return (false); + } + + e1P = SIGetFirstDataEntry(segP); + SISetStartEntryChain(segP, e1P->next); + if (SIGetStartEntryChain(segP) == InvalidOffset) + { + /* it was the last entry */ + SISetEndEntryChain(segP, InvalidOffset); + } + /* free the entry */ + e1P->isfree = true; + e1P->next = SIGetStartFreeSpace(segP); + SISetStartFreeSpace(segP, SIEntryOffset(segP, e1P)); + SIDecProcLimit(segP, 1); + return (true); } /************************************************************************/ -/* SISetProcStateInvalid(segP) checks and marks a backends state as */ -/* invalid */ +/* SISetProcStateInvalid(segP) checks and marks a backends state as */ +/* invalid */ /************************************************************************/ void -SISetProcStateInvalid(SISeg *segP) -{ - int i; - - for (i=0; i < MaxBackendId; i++) { - if (segP->procState[i].limit == 0) { - /* backend i didn't read any message */ - segP->procState[i].resetState = true; - /*XXX signal backend that it has to reset its internal cache ? */ - } - } +SISetProcStateInvalid(SISeg * segP) +{ + int i; + + for (i = 0; i < MaxBackendId; i++) + { + if (segP->procState[i].limit == 0) + { + /* backend i didn't read any message */ + segP->procState[i].resetState = true; + + /* + * XXX signal backend that it has to reset its internal cache + * ? + */ + } + } } /************************************************************************/ -/* SIReadEntryData(segP, backendId, function) */ -/* - marks messages to be read by id */ -/* and executes function */ +/* SIReadEntryData(segP, backendId, function) */ +/* - marks messages to be read by id */ +/* and executes function */ /************************************************************************/ void -SIReadEntryData(SISeg *segP, - int backendId, - void (*invalFunction)(), - void (*resetFunction)()) -{ - int i = 0; - SISegEntry *data; - - Assert(segP->procState[backendId - 1].tag == MyBackendTag); - - if (!segP->procState[backendId - 1].resetState) { - /* invalidate data, but only those, you have not seen yet !!*/ - /* therefore skip read messages */ - data = SIGetNthDataEntry(segP, - SIGetProcStateLimit(segP, backendId - 1) + 1); - while (data != NULL) { - i++; - segP->procState[backendId - 1].limit++; /* one more message read */ - invalFunction(data->entryData.cacheId, - data->entryData.hashIndex, - &data->entryData.pointerData); - data = SIGetNextDataEntry(segP, data->next); - } - /* SIDelExpiredDataEntries(segP); */ - } else { - /*backend must not read messages, its own state has to be reset */ - elog(NOTICE, "SIMarkEntryData: cache state reset"); - resetFunction(); /* XXXX call it here, parameters? */ - - /* new valid state--mark all messages "read" */ - segP->procState[backendId - 1].resetState = false; - segP->procState[backendId - 1].limit = SIGetNumEntries(segP); - } - /* check whether we can remove dead messages */ - if (i > MAXNUMMESSAGES) { - elog(FATAL, "SIReadEntryData: Invalid segment state"); - } +SIReadEntryData(SISeg * segP, + int backendId, + void (*invalFunction) (), + void (*resetFunction) ()) +{ + int i = 0; + SISegEntry *data; + + Assert(segP->procState[backendId - 1].tag == MyBackendTag); + + if (!segP->procState[backendId - 1].resetState) + { + /* invalidate data, but only those, you have not seen yet !! */ + /* therefore skip read messages */ + data = SIGetNthDataEntry(segP, + SIGetProcStateLimit(segP, backendId - 1) + 1); + while (data != NULL) + { + i++; + segP->procState[backendId - 1].limit++; /* one more message read */ + invalFunction(data->entryData.cacheId, + data->entryData.hashIndex, + &data->entryData.pointerData); + data = SIGetNextDataEntry(segP, data->next); + } + /* SIDelExpiredDataEntries(segP); */ + } + else + { + /* backend must not read messages, its own state has to be reset */ + elog(NOTICE, "SIMarkEntryData: cache state reset"); + resetFunction(); /* XXXX call it here, parameters? */ + + /* new valid state--mark all messages "read" */ + segP->procState[backendId - 1].resetState = false; + segP->procState[backendId - 1].limit = SIGetNumEntries(segP); + } + /* check whether we can remove dead messages */ + if (i > MAXNUMMESSAGES) + { + elog(FATAL, "SIReadEntryData: Invalid segment state"); + } } /************************************************************************/ -/* SIDelExpiredDataEntries (segP) - removes irrelevant messages */ +/* SIDelExpiredDataEntries (segP) - removes irrelevant messages */ /************************************************************************/ void -SIDelExpiredDataEntries(SISeg *segP) -{ - int min, i, h; - - min = 9999999; - for (i = 0; i < MaxBackendId; i++) { - h = SIGetProcStateLimit(segP, i); - if (h >= 0) { /* backend active */ - if (h < min ) min = h; - } - } - if (min != 9999999) { - /* we can remove min messages */ - for (i = 1; i <= min; i++) { - /* this adjusts also the state limits!*/ - if (!SIDelDataEntry(segP)) { - elog(FATAL, "SIDelExpiredDataEntries: Invalid segment state"); - } - } - } +SIDelExpiredDataEntries(SISeg * segP) +{ + int min, + i, + h; + + min = 9999999; + for (i = 0; i < MaxBackendId; i++) + { + h = SIGetProcStateLimit(segP, i); + if (h >= 0) + { /* backend active */ + if (h < min) + min = h; + } + } + if (min != 9999999) + { + /* we can remove min messages */ + for (i = 1; i <= min; i++) + { + /* this adjusts also the state limits! */ + if (!SIDelDataEntry(segP)) + { + elog(FATAL, "SIDelExpiredDataEntries: Invalid segment state"); + } + } + } } /************************************************************************/ -/* SISegInit(segP) - initializes the segment */ +/* SISegInit(segP) - initializes the segment */ /************************************************************************/ static void -SISegInit(SISeg *segP) -{ - SISegOffsets *oP; - int segSize, i; - SISegEntry *eP; - - oP = SIComputeSize(&segSize); - /* set sempahore ids in the segment */ - /* XXX */ - SISetStartEntrySection(segP, oP->offsetToFirstEntry); - SISetEndEntrySection(segP, oP->offsetToEndOfSegemnt); - SISetStartFreeSpace(segP, 0); - SISetStartEntryChain(segP, InvalidOffset); - SISetEndEntryChain(segP, InvalidOffset); - SISetNumEntries(segP, 0); - SISetMaxNumEntries(segP, MAXNUMMESSAGES); - for (i = 0; i < MaxBackendId; i++) { - segP->procState[i].limit = -1; /* no backend active !!*/ - segP->procState[i].resetState = false; - segP->procState[i].tag = InvalidBackendTag; - } - /* construct a chain of free entries */ - for (i = 1; i < MAXNUMMESSAGES; i++) { - eP = (SISegEntry *) ((Pointer) segP + - SIGetStartEntrySection(segP) + - (i - 1) * sizeof(SISegEntry)); - eP->isfree = true; - eP->next = i * sizeof(SISegEntry); /* relative to B */ - } - /* handle the last free entry separate */ - eP = (SISegEntry *) ((Pointer) segP + - SIGetStartEntrySection(segP) + - (MAXNUMMESSAGES - 1) * sizeof(SISegEntry)); - eP->isfree = true; - eP->next = InvalidOffset; /* it's the end of the chain !! */ - /* - * Be tidy - */ - pfree(oP); - -} - - - -/************************************************************************/ -/* SISegmentKill(key) - kill any segment */ +SISegInit(SISeg * segP) +{ + SISegOffsets *oP; + int segSize, + i; + SISegEntry *eP; + + oP = SIComputeSize(&segSize); + /* set sempahore ids in the segment */ + /* XXX */ + SISetStartEntrySection(segP, oP->offsetToFirstEntry); + SISetEndEntrySection(segP, oP->offsetToEndOfSegemnt); + SISetStartFreeSpace(segP, 0); + SISetStartEntryChain(segP, InvalidOffset); + SISetEndEntryChain(segP, InvalidOffset); + SISetNumEntries(segP, 0); + SISetMaxNumEntries(segP, MAXNUMMESSAGES); + for (i = 0; i < MaxBackendId; i++) + { + segP->procState[i].limit = -1; /* no backend active !! */ + segP->procState[i].resetState = false; + segP->procState[i].tag = InvalidBackendTag; + } + /* construct a chain of free entries */ + for (i = 1; i < MAXNUMMESSAGES; i++) + { + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + (i - 1) * sizeof(SISegEntry)); + eP->isfree = true; + eP->next = i * sizeof(SISegEntry); /* relative to B */ + } + /* handle the last free entry separate */ + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + (MAXNUMMESSAGES - 1) * sizeof(SISegEntry)); + eP->isfree = true; + eP->next = InvalidOffset; /* it's the end of the chain !! */ + + /* + * Be tidy + */ + pfree(oP); + +} + + + +/************************************************************************/ +/* SISegmentKill(key) - kill any segment */ /************************************************************************/ static void -SISegmentKill(int key) /* the corresponding key for the segment */ -{ - IpcMemoryKill(key); -} +SISegmentKill(int key) /* the corresponding key for the segment */ +{ + IpcMemoryKill(key); +} /************************************************************************/ -/* SISegmentGet(key, size) - get a shared segment of size <size> */ -/* returns a segment id */ +/* SISegmentGet(key, size) - get a shared segment of size <size> */ +/* returns a segment id */ /************************************************************************/ -static IpcMemoryId -SISegmentGet(int key, /* the corresponding key for the segment */ - int size, /* size of segment in bytes */ - bool create) +static IpcMemoryId +SISegmentGet(int key, /* the corresponding key for the segment */ + int size, /* size of segment in bytes */ + bool create) { - IpcMemoryId shmid; - - if (create) { - shmid = IpcMemoryCreate(key, size, IPCProtection); - } else { - shmid = IpcMemoryIdGet(key, size); - } - return(shmid); + IpcMemoryId shmid; + + if (create) + { + shmid = IpcMemoryCreate(key, size, IPCProtection); + } + else + { + shmid = IpcMemoryIdGet(key, size); + } + return (shmid); } /************************************************************************/ -/* SISegmentAttach(shmid) - attach a shared segment with id shmid */ +/* SISegmentAttach(shmid) - attach a shared segment with id shmid */ /************************************************************************/ static void SISegmentAttach(IpcMemoryId shmid) { - shmInvalBuffer = (struct SISeg *) IpcMemoryAttach(shmid); - if (shmInvalBuffer == IpcMemAttachFailed) { - /* XXX use validity function */ - elog(NOTICE, "SISegmentAttach: Could not attach segment"); - elog(FATAL, "SISegmentAttach: %m"); - } + shmInvalBuffer = (struct SISeg *) IpcMemoryAttach(shmid); + if (shmInvalBuffer == IpcMemAttachFailed) + { + /* XXX use validity function */ + elog(NOTICE, "SISegmentAttach: Could not attach segment"); + elog(FATAL, "SISegmentAttach: %m"); + } } /************************************************************************/ -/* SISegmentInit(killExistingSegment, key) initialize segment */ +/* SISegmentInit(killExistingSegment, key) initialize segment */ /************************************************************************/ int SISegmentInit(bool killExistingSegment, IPCKey key) -{ - SISegOffsets *oP; - int segSize; - IpcMemoryId shmId; - bool create; - - if (killExistingSegment) { - /* Kill existing segment */ - /* set semaphore */ - SISegmentKill(key); - - /* Get a shared segment */ - - oP = SIComputeSize(&segSize); - /* - * Be tidy - */ - pfree(oP); - - create = true; - shmId = SISegmentGet(key,segSize, create); - if (shmId < 0) { - perror("SISegmentGet: failed"); - return(-1); /* an error */ - } - - /* Attach the shared cache invalidation segment */ - /* sets the global variable shmInvalBuffer */ - SISegmentAttach(shmId); - - /* Init shared memory table */ - SISegInit(shmInvalBuffer); - } else { - /* use an existing segment */ - create = false; - shmId = SISegmentGet(key, 0, create); - if (shmId < 0) { - perror("SISegmentGet: getting an existent segment failed"); - return(-1); /* an error */ - } - /* Attach the shared cache invalidation segment */ - SISegmentAttach(shmId); - } - return(1); -} +{ + SISegOffsets *oP; + int segSize; + IpcMemoryId shmId; + bool create; + + if (killExistingSegment) + { + /* Kill existing segment */ + /* set semaphore */ + SISegmentKill(key); + + /* Get a shared segment */ + + oP = SIComputeSize(&segSize); + + /* + * Be tidy + */ + pfree(oP); + create = true; + shmId = SISegmentGet(key, segSize, create); + if (shmId < 0) + { + perror("SISegmentGet: failed"); + return (-1); /* an error */ + } + + /* Attach the shared cache invalidation segment */ + /* sets the global variable shmInvalBuffer */ + SISegmentAttach(shmId); + + /* Init shared memory table */ + SISegInit(shmInvalBuffer); + } + else + { + /* use an existing segment */ + create = false; + shmId = SISegmentGet(key, 0, create); + if (shmId < 0) + { + perror("SISegmentGet: getting an existent segment failed"); + return (-1); /* an error */ + } + /* Attach the shared cache invalidation segment */ + SISegmentAttach(shmId); + } + return (1); +} diff --git a/src/backend/storage/ipc/spin.c b/src/backend/storage/ipc/spin.c index b50d5d9500f..e93d5894a58 100644 --- a/src/backend/storage/ipc/spin.c +++ b/src/backend/storage/ipc/spin.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * spin.c-- - * routines for managing spin locks + * routines for managing spin locks * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.6 1997/08/21 13:43:46 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.7 1997/09/07 04:48:45 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -21,8 +21,8 @@ * term semaphores separately anyway. * * NOTE: These routines are not supposed to be widely used in Postgres. - * They are preserved solely for the purpose of porting Mark Sullivan's - * buffer manager to Postgres. + * They are preserved solely for the purpose of porting Mark Sullivan's + * buffer manager to Postgres. */ #include <errno.h> #include "postgres.h" @@ -43,61 +43,62 @@ IpcSemaphoreId SpinLockId; bool CreateSpinlocks(IPCKey key) -{ - /* the spin lock shared memory must have been created by now */ - return(TRUE); +{ + /* the spin lock shared memory must have been created by now */ + return (TRUE); } bool InitSpinLocks(int init, IPCKey key) { - extern SPINLOCK ShmemLock; - extern SPINLOCK BindingLock; - extern SPINLOCK BufMgrLock; - extern SPINLOCK LockMgrLock; - extern SPINLOCK ProcStructLock; - extern SPINLOCK SInvalLock; - extern SPINLOCK OidGenLockId; - + extern SPINLOCK ShmemLock; + extern SPINLOCK BindingLock; + extern SPINLOCK BufMgrLock; + extern SPINLOCK LockMgrLock; + extern SPINLOCK ProcStructLock; + extern SPINLOCK SInvalLock; + extern SPINLOCK OidGenLockId; + #ifdef MAIN_MEMORY - extern SPINLOCK MMCacheLock; -#endif /* SONY_JUKEBOX */ - - /* These six spinlocks have fixed location is shmem */ - ShmemLock = (SPINLOCK) SHMEMLOCKID; - BindingLock = (SPINLOCK) BINDINGLOCKID; - BufMgrLock = (SPINLOCK) BUFMGRLOCKID; - LockMgrLock = (SPINLOCK) LOCKMGRLOCKID; - ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID; - SInvalLock = (SPINLOCK) SINVALLOCKID; - OidGenLockId = (SPINLOCK) OIDGENLOCKID; - + extern SPINLOCK MMCacheLock; + +#endif /* SONY_JUKEBOX */ + + /* These six spinlocks have fixed location is shmem */ + ShmemLock = (SPINLOCK) SHMEMLOCKID; + BindingLock = (SPINLOCK) BINDINGLOCKID; + BufMgrLock = (SPINLOCK) BUFMGRLOCKID; + LockMgrLock = (SPINLOCK) LOCKMGRLOCKID; + ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID; + SInvalLock = (SPINLOCK) SINVALLOCKID; + OidGenLockId = (SPINLOCK) OIDGENLOCKID; + #ifdef MAIN_MEMORY - MMCacheLock = (SPINLOCK) MMCACHELOCKID; -#endif /* MAIN_MEMORY */ - - return(TRUE); + MMCacheLock = (SPINLOCK) MMCACHELOCKID; +#endif /* MAIN_MEMORY */ + + return (TRUE); } void SpinAcquire(SPINLOCK lock) { - ExclusiveLock(lock); - PROC_INCR_SLOCK(lock); + ExclusiveLock(lock); + PROC_INCR_SLOCK(lock); } void SpinRelease(SPINLOCK lock) { - PROC_DECR_SLOCK(lock); - ExclusiveUnlock(lock); + PROC_DECR_SLOCK(lock); + ExclusiveUnlock(lock); } -#else /* HAS_TEST_AND_SET */ +#else /* HAS_TEST_AND_SET */ /* Spinlocks are implemented using SysV semaphores */ -static bool AttachSpinLocks(IPCKey key); -static bool SpinIsLocked(SPINLOCK lock); +static bool AttachSpinLocks(IPCKey key); +static bool SpinIsLocked(SPINLOCK lock); /* * SpinAcquire -- try to grab a spinlock @@ -107,86 +108,91 @@ static bool SpinIsLocked(SPINLOCK lock); void SpinAcquire(SPINLOCK lock) { - IpcSemaphoreLock(SpinLockId, lock, IpcExclusiveLock); - PROC_INCR_SLOCK(lock); + IpcSemaphoreLock(SpinLockId, lock, IpcExclusiveLock); + PROC_INCR_SLOCK(lock); } /* * SpinRelease -- release a spin lock - * + * * FAILS if the semaphore is corrupted */ void SpinRelease(SPINLOCK lock) { - Assert(SpinIsLocked(lock)) + Assert(SpinIsLocked(lock)) PROC_DECR_SLOCK(lock); - IpcSemaphoreUnlock(SpinLockId, lock, IpcExclusiveLock); + IpcSemaphoreUnlock(SpinLockId, lock, IpcExclusiveLock); } -static bool +static bool SpinIsLocked(SPINLOCK lock) { - int semval; - - semval = IpcSemaphoreGetValue(SpinLockId, lock); - return(semval < IpcSemaphoreDefaultStartValue); + int semval; + + semval = IpcSemaphoreGetValue(SpinLockId, lock); + return (semval < IpcSemaphoreDefaultStartValue); } /* * CreateSpinlocks -- Create a sysV semaphore array for - * the spinlocks + * the spinlocks * */ bool CreateSpinlocks(IPCKey key) { - - int status; - IpcSemaphoreId semid; - semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection, - IpcSemaphoreDefaultStartValue, 1, &status); - if (status == IpcSemIdExist) { - IpcSemaphoreKill(key); - elog(NOTICE,"Destroying old spinlock semaphore"); - semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection, - IpcSemaphoreDefaultStartValue, 1, &status); - } - - if (semid >= 0) { - SpinLockId = semid; - return(TRUE); - } - /* cannot create spinlocks */ - elog(FATAL,"CreateSpinlocks: cannot create spin locks"); - return(FALSE); + + int status; + IpcSemaphoreId semid; + + semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection, + IpcSemaphoreDefaultStartValue, 1, &status); + if (status == IpcSemIdExist) + { + IpcSemaphoreKill(key); + elog(NOTICE, "Destroying old spinlock semaphore"); + semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection, + IpcSemaphoreDefaultStartValue, 1, &status); + } + + if (semid >= 0) + { + SpinLockId = semid; + return (TRUE); + } + /* cannot create spinlocks */ + elog(FATAL, "CreateSpinlocks: cannot create spin locks"); + return (FALSE); } /* * Attach to existing spinlock set */ -static bool +static bool AttachSpinLocks(IPCKey key) { - IpcSemaphoreId id; - - id = semget (key, MAX_SPINS, 0); - if (id < 0) { - if (errno == EEXIST) { - /* key is the name of someone else's semaphore */ - elog (FATAL,"AttachSpinlocks: SPIN_KEY belongs to someone else"); + IpcSemaphoreId id; + + id = semget(key, MAX_SPINS, 0); + if (id < 0) + { + if (errno == EEXIST) + { + /* key is the name of someone else's semaphore */ + elog(FATAL, "AttachSpinlocks: SPIN_KEY belongs to someone else"); + } + /* cannot create spinlocks */ + elog(FATAL, "AttachSpinlocks: cannot create spin locks"); + return (FALSE); } - /* cannot create spinlocks */ - elog(FATAL,"AttachSpinlocks: cannot create spin locks"); - return(FALSE); - } - SpinLockId = id; - return(TRUE); + SpinLockId = id; + return (TRUE); } /* * InitSpinLocks -- Spinlock bootstrapping - * + * * We need several spinlocks for bootstrapping: * BindingLock (for the shmem binding table) and * ShmemLock (for the shmem allocator), BufMgrLock (for buffer @@ -199,41 +205,47 @@ AttachSpinLocks(IPCKey key) bool InitSpinLocks(int init, IPCKey key) { - extern SPINLOCK ShmemLock; - extern SPINLOCK BindingLock; - extern SPINLOCK BufMgrLock; - extern SPINLOCK LockMgrLock; - extern SPINLOCK ProcStructLock; - extern SPINLOCK SInvalLock; - extern SPINLOCK OidGenLockId; - + extern SPINLOCK ShmemLock; + extern SPINLOCK BindingLock; + extern SPINLOCK BufMgrLock; + extern SPINLOCK LockMgrLock; + extern SPINLOCK ProcStructLock; + extern SPINLOCK SInvalLock; + extern SPINLOCK OidGenLockId; + #ifdef MAIN_MEMORY - extern SPINLOCK MMCacheLock; -#endif /* MAIN_MEMORY */ - - if (!init || key != IPC_PRIVATE) { - /* if bootstrap and key is IPC_PRIVATE, it means that we are running - * backend by itself. no need to attach spinlocks - */ - if (! AttachSpinLocks(key)) { - elog(FATAL,"InitSpinLocks: couldnt attach spin locks"); - return(FALSE); + extern SPINLOCK MMCacheLock; + +#endif /* MAIN_MEMORY */ + + if (!init || key != IPC_PRIVATE) + { + + /* + * if bootstrap and key is IPC_PRIVATE, it means that we are + * running backend by itself. no need to attach spinlocks + */ + if (!AttachSpinLocks(key)) + { + elog(FATAL, "InitSpinLocks: couldnt attach spin locks"); + return (FALSE); + } } - } - - /* These five (or six) spinlocks have fixed location is shmem */ - ShmemLock = (SPINLOCK) SHMEMLOCKID; - BindingLock = (SPINLOCK) BINDINGLOCKID; - BufMgrLock = (SPINLOCK) BUFMGRLOCKID; - LockMgrLock = (SPINLOCK) LOCKMGRLOCKID; - ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID; - SInvalLock = (SPINLOCK) SINVALLOCKID; - OidGenLockId = (SPINLOCK) OIDGENLOCKID; - + + /* These five (or six) spinlocks have fixed location is shmem */ + ShmemLock = (SPINLOCK) SHMEMLOCKID; + BindingLock = (SPINLOCK) BINDINGLOCKID; + BufMgrLock = (SPINLOCK) BUFMGRLOCKID; + LockMgrLock = (SPINLOCK) LOCKMGRLOCKID; + ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID; + SInvalLock = (SPINLOCK) SINVALLOCKID; + OidGenLockId = (SPINLOCK) OIDGENLOCKID; + #ifdef MAIN_MEMORY - MMCacheLock = (SPINLOCK) MMCACHELOCKID; -#endif /* MAIN_MEMORY */ - - return(TRUE); + MMCacheLock = (SPINLOCK) MMCACHELOCKID; +#endif /* MAIN_MEMORY */ + + return (TRUE); } -#endif /* HAS_TEST_AND_SET */ + +#endif /* HAS_TEST_AND_SET */ diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c index ddf69a6527e..dfde8f469c5 100644 --- a/src/backend/storage/large_object/inv_api.c +++ b/src/backend/storage/large_object/inv_api.c @@ -1,19 +1,19 @@ /*------------------------------------------------------------------------- * * inv_api.c-- - * routines for manipulating inversion fs large objects. This file - * contains the user-level large object application interface routines. + * routines for manipulating inversion fs large objects. This file + * contains the user-level large object application interface routines. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/large_object/inv_api.c,v 1.13 1997/08/19 21:33:10 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/large_object/inv_api.c,v 1.14 1997/09/07 04:48:46 momjian Exp $ * *------------------------------------------------------------------------- */ #include <sys/types.h> -#include <stdio.h> /* for sprintf() */ +#include <stdio.h> /* for sprintf() */ #include <string.h> #include <sys/file.h> #include <sys/stat.h> @@ -28,11 +28,11 @@ #include "access/xact.h" #include "access/nbtree.h" #include "access/tupdesc.h" -#include "catalog/index.h" /* for index_create() */ +#include "catalog/index.h" /* for index_create() */ #include "catalog/catalog.h" /* for newoid() */ -#include "catalog/pg_am.h" /* for BTREE_AM_OID */ +#include "catalog/pg_am.h" /* for BTREE_AM_OID */ #include "catalog/pg_opclass.h" /* for INT4_OPS_OID */ -#include "catalog/pg_proc.h" /* for INT4GE_PROC_OID */ +#include "catalog/pg_proc.h" /* for INT4GE_PROC_OID */ #include "storage/itemptr.h" #include "storage/bufpage.h" #include "storage/bufmgr.h" @@ -43,226 +43,241 @@ #include "storage/large_object.h" #include "storage/lmgr.h" #include "utils/syscache.h" -#include "utils/builtins.h" /* for namestrcpy() */ +#include "utils/builtins.h" /* for namestrcpy() */ #include "catalog/heap.h" #include "nodes/pg_list.h" /* - * Warning, Will Robinson... In order to pack data into an inversion - * file as densely as possible, we violate the class abstraction here. - * When we're appending a new tuple to the end of the table, we check - * the last page to see how much data we can put on it. If it's more - * than IMINBLK, we write enough to fill the page. This limits external - * fragmentation. In no case can we write more than IMAXBLK, since - * the 8K postgres page size less overhead leaves only this much space - * for data. + * Warning, Will Robinson... In order to pack data into an inversion + * file as densely as possible, we violate the class abstraction here. + * When we're appending a new tuple to the end of the table, we check + * the last page to see how much data we can put on it. If it's more + * than IMINBLK, we write enough to fill the page. This limits external + * fragmentation. In no case can we write more than IMAXBLK, since + * the 8K postgres page size less overhead leaves only this much space + * for data. */ -#define IFREESPC(p) (PageGetFreeSpace(p) - sizeof(HeapTupleData) - sizeof(struct varlena) - sizeof(int32)) -#define IMAXBLK 8092 -#define IMINBLK 512 +#define IFREESPC(p) (PageGetFreeSpace(p) - sizeof(HeapTupleData) - sizeof(struct varlena) - sizeof(int32)) +#define IMAXBLK 8092 +#define IMINBLK 512 /* non-export function prototypes */ -static HeapTuple inv_newtuple(LargeObjectDesc *obj_desc, Buffer buffer, - Page page, char *dbuf, int nwrite); -static HeapTuple inv_fetchtup(LargeObjectDesc *obj_desc, Buffer *bufP); -static int inv_wrnew(LargeObjectDesc *obj_desc, char *buf, int nbytes); -static int inv_wrold(LargeObjectDesc *obj_desc, char *dbuf, int nbytes, - HeapTuple htup, Buffer buffer); -static void inv_indextup(LargeObjectDesc *obj_desc, HeapTuple htup); -static int _inv_getsize(Relation hreln, TupleDesc hdesc, Relation ireln); +static HeapTuple +inv_newtuple(LargeObjectDesc * obj_desc, Buffer buffer, + Page page, char *dbuf, int nwrite); +static HeapTuple inv_fetchtup(LargeObjectDesc * obj_desc, Buffer * bufP); +static int inv_wrnew(LargeObjectDesc * obj_desc, char *buf, int nbytes); +static int +inv_wrold(LargeObjectDesc * obj_desc, char *dbuf, int nbytes, + HeapTuple htup, Buffer buffer); +static void inv_indextup(LargeObjectDesc * obj_desc, HeapTuple htup); +static int _inv_getsize(Relation hreln, TupleDesc hdesc, Relation ireln); /* - * inv_create -- create a new large object. + * inv_create -- create a new large object. * - * Arguments: - * flags -- storage manager to use, archive mode, etc. + * Arguments: + * flags -- storage manager to use, archive mode, etc. * - * Returns: - * large object descriptor, appropriately filled in. + * Returns: + * large object descriptor, appropriately filled in. */ LargeObjectDesc * inv_create(int flags) { - int file_oid; - LargeObjectDesc *retval; - Relation r; - Relation indr; - int smgr; - char archchar; - TupleDesc tupdesc; - AttrNumber attNums[1]; - Oid classObjectId[1]; - char objname[NAMEDATALEN]; - char indname[NAMEDATALEN]; - - /* parse flags */ - smgr = flags & INV_SMGRMASK; - if (flags & INV_ARCHIVE) - archchar = 'h'; - else - archchar = 'n'; - - /* add one here since the pg_class tuple created - will have the next oid and we want to have the relation name - to correspond to the tuple OID */ - file_oid = newoid()+1; - - /* come up with some table names */ - sprintf(objname, "xinv%d", file_oid); - sprintf(indname, "xinx%d", file_oid); - - if (SearchSysCacheTuple(RELNAME, PointerGetDatum(objname), - 0,0,0) != NULL) { - elog(WARN, - "internal error: %s already exists -- cannot create large obj", - objname); - } - if (SearchSysCacheTuple(RELNAME, PointerGetDatum(indname), - 0,0,0) != NULL) { - elog(WARN, - "internal error: %s already exists -- cannot create large obj", - indname); - } - - /* this is pretty painful... want a tuple descriptor */ - tupdesc = CreateTemplateTupleDesc(2); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, - "olastbye", - "int4", - 0, false); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, - "odata", - "bytea", - 0, false); - /* - * First create the table to hold the inversion large object. It - * will be located on whatever storage manager the user requested. - */ - - heap_create(objname, - objname, - (int) archchar, smgr, - tupdesc); - - /* make the relation visible in this transaction */ - CommandCounterIncrement(); - r = heap_openr(objname); - - if (!RelationIsValid(r)) { - elog(WARN, "cannot create large object on %s under inversion", - smgrout(smgr)); - } - - /* - * Now create a btree index on the relation's olastbyte attribute to - * make seeks go faster. The hardwired constants are embarassing - * to me, and are symptomatic of the pressure under which this code - * was written. - * - * ok, mao, let's put in some symbolic constants - jolly - */ - - attNums[0] = 1; - classObjectId[0] = INT4_OPS_OID; - index_create(objname, indname, NULL, NULL, BTREE_AM_OID, - 1, &attNums[0], &classObjectId[0], - 0, (Datum) NULL, NULL, FALSE, FALSE); - - /* make the index visible in this transaction */ - CommandCounterIncrement(); - indr = index_openr(indname); - - if (!RelationIsValid(indr)) { - elog(WARN, "cannot create index for large obj on %s under inversion", - smgrout(smgr)); - } - - retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); - - retval->heap_r = r; - retval->index_r = indr; - retval->iscan = (IndexScanDesc) NULL; - retval->hdesc = RelationGetTupleDescriptor(r); - retval->idesc = RelationGetTupleDescriptor(indr); - retval->offset = retval->lowbyte = - retval->highbyte = 0; - ItemPointerSetInvalid(&(retval->htid)); - - if (flags & INV_WRITE) { - RelationSetLockForWrite(r); - retval->flags = IFS_WRLOCK|IFS_RDLOCK; - } else if (flags & INV_READ) { - RelationSetLockForRead(r); - retval->flags = IFS_RDLOCK; - } - retval->flags |= IFS_ATEOF; - - return(retval); + int file_oid; + LargeObjectDesc *retval; + Relation r; + Relation indr; + int smgr; + char archchar; + TupleDesc tupdesc; + AttrNumber attNums[1]; + Oid classObjectId[1]; + char objname[NAMEDATALEN]; + char indname[NAMEDATALEN]; + + /* parse flags */ + smgr = flags & INV_SMGRMASK; + if (flags & INV_ARCHIVE) + archchar = 'h'; + else + archchar = 'n'; + + /* + * add one here since the pg_class tuple created will have the next + * oid and we want to have the relation name to correspond to the + * tuple OID + */ + file_oid = newoid() + 1; + + /* come up with some table names */ + sprintf(objname, "xinv%d", file_oid); + sprintf(indname, "xinx%d", file_oid); + + if (SearchSysCacheTuple(RELNAME, PointerGetDatum(objname), + 0, 0, 0) != NULL) + { + elog(WARN, + "internal error: %s already exists -- cannot create large obj", + objname); + } + if (SearchSysCacheTuple(RELNAME, PointerGetDatum(indname), + 0, 0, 0) != NULL) + { + elog(WARN, + "internal error: %s already exists -- cannot create large obj", + indname); + } + + /* this is pretty painful... want a tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(2); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, + "olastbye", + "int4", + 0, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, + "odata", + "bytea", + 0, false); + + /* + * First create the table to hold the inversion large object. It will + * be located on whatever storage manager the user requested. + */ + + heap_create(objname, + objname, + (int) archchar, smgr, + tupdesc); + + /* make the relation visible in this transaction */ + CommandCounterIncrement(); + r = heap_openr(objname); + + if (!RelationIsValid(r)) + { + elog(WARN, "cannot create large object on %s under inversion", + smgrout(smgr)); + } + + /* + * Now create a btree index on the relation's olastbyte attribute to + * make seeks go faster. The hardwired constants are embarassing to + * me, and are symptomatic of the pressure under which this code was + * written. + * + * ok, mao, let's put in some symbolic constants - jolly + */ + + attNums[0] = 1; + classObjectId[0] = INT4_OPS_OID; + index_create(objname, indname, NULL, NULL, BTREE_AM_OID, + 1, &attNums[0], &classObjectId[0], + 0, (Datum) NULL, NULL, FALSE, FALSE); + + /* make the index visible in this transaction */ + CommandCounterIncrement(); + indr = index_openr(indname); + + if (!RelationIsValid(indr)) + { + elog(WARN, "cannot create index for large obj on %s under inversion", + smgrout(smgr)); + } + + retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); + + retval->heap_r = r; + retval->index_r = indr; + retval->iscan = (IndexScanDesc) NULL; + retval->hdesc = RelationGetTupleDescriptor(r); + retval->idesc = RelationGetTupleDescriptor(indr); + retval->offset = retval->lowbyte = + retval->highbyte = 0; + ItemPointerSetInvalid(&(retval->htid)); + + if (flags & INV_WRITE) + { + RelationSetLockForWrite(r); + retval->flags = IFS_WRLOCK | IFS_RDLOCK; + } + else if (flags & INV_READ) + { + RelationSetLockForRead(r); + retval->flags = IFS_RDLOCK; + } + retval->flags |= IFS_ATEOF; + + return (retval); } LargeObjectDesc * inv_open(Oid lobjId, int flags) { - LargeObjectDesc *retval; - Relation r; - char *indname; - Relation indrel; - - r = heap_open(lobjId); - - if (!RelationIsValid(r)) - return ((LargeObjectDesc *) NULL); - - indname = pstrdup((r->rd_rel->relname).data); - - /* - * hack hack hack... we know that the fourth character of the relation - * name is a 'v', and that the fourth character of the index name is an - * 'x', and that they're otherwise identical. - */ - indname[3] = 'x'; - indrel = index_openr(indname); - - if (!RelationIsValid(indrel)) - return ((LargeObjectDesc *) NULL); - - retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); - - retval->heap_r = r; - retval->index_r = indrel; - retval->iscan = (IndexScanDesc) NULL; - retval->hdesc = RelationGetTupleDescriptor(r); - retval->idesc = RelationGetTupleDescriptor(indrel); - retval->offset = retval->lowbyte = retval->highbyte = 0; - ItemPointerSetInvalid(&(retval->htid)); - - if (flags & INV_WRITE) { - RelationSetLockForWrite(r); - retval->flags = IFS_WRLOCK|IFS_RDLOCK; - } else if (flags & INV_READ) { - RelationSetLockForRead(r); - retval->flags = IFS_RDLOCK; - } - - return(retval); + LargeObjectDesc *retval; + Relation r; + char *indname; + Relation indrel; + + r = heap_open(lobjId); + + if (!RelationIsValid(r)) + return ((LargeObjectDesc *) NULL); + + indname = pstrdup((r->rd_rel->relname).data); + + /* + * hack hack hack... we know that the fourth character of the + * relation name is a 'v', and that the fourth character of the index + * name is an 'x', and that they're otherwise identical. + */ + indname[3] = 'x'; + indrel = index_openr(indname); + + if (!RelationIsValid(indrel)) + return ((LargeObjectDesc *) NULL); + + retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); + + retval->heap_r = r; + retval->index_r = indrel; + retval->iscan = (IndexScanDesc) NULL; + retval->hdesc = RelationGetTupleDescriptor(r); + retval->idesc = RelationGetTupleDescriptor(indrel); + retval->offset = retval->lowbyte = retval->highbyte = 0; + ItemPointerSetInvalid(&(retval->htid)); + + if (flags & INV_WRITE) + { + RelationSetLockForWrite(r); + retval->flags = IFS_WRLOCK | IFS_RDLOCK; + } + else if (flags & INV_READ) + { + RelationSetLockForRead(r); + retval->flags = IFS_RDLOCK; + } + + return (retval); } /* * Closes an existing large object descriptor. */ void -inv_close(LargeObjectDesc *obj_desc) +inv_close(LargeObjectDesc * obj_desc) { - Assert(PointerIsValid(obj_desc)); + Assert(PointerIsValid(obj_desc)); - if (obj_desc->iscan != (IndexScanDesc) NULL) - index_endscan(obj_desc->iscan); + if (obj_desc->iscan != (IndexScanDesc) NULL) + index_endscan(obj_desc->iscan); - heap_close(obj_desc->heap_r); - index_close(obj_desc->index_r); + heap_close(obj_desc->heap_r); + index_close(obj_desc->index_r); - pfree(obj_desc); + pfree(obj_desc); } /* @@ -273,897 +288,941 @@ inv_close(LargeObjectDesc *obj_desc) int inv_destroy(Oid lobjId) { - Relation r; + Relation r; - r = (Relation) RelationIdGetRelation(lobjId); - if (!RelationIsValid(r) || r->rd_rel->relkind == RELKIND_INDEX) - return -1; + r = (Relation) RelationIdGetRelation(lobjId); + if (!RelationIsValid(r) || r->rd_rel->relkind == RELKIND_INDEX) + return -1; - heap_destroy(r->rd_rel->relname.data); - return 1; + heap_destroy(r->rd_rel->relname.data); + return 1; } /* - * inv_stat() -- do a stat on an inversion file. + * inv_stat() -- do a stat on an inversion file. * - * For the time being, this is an insanely expensive operation. In - * order to find the size of the file, we seek to the last block in - * it and compute the size from that. We scan pg_class to determine - * the file's owner and create time. We don't maintain mod time or - * access time, yet. + * For the time being, this is an insanely expensive operation. In + * order to find the size of the file, we seek to the last block in + * it and compute the size from that. We scan pg_class to determine + * the file's owner and create time. We don't maintain mod time or + * access time, yet. * - * These fields aren't stored in a table anywhere because they're - * updated so frequently, and postgres only appends tuples at the - * end of relations. Once clustering works, we should fix this. + * These fields aren't stored in a table anywhere because they're + * updated so frequently, and postgres only appends tuples at the + * end of relations. Once clustering works, we should fix this. */ #ifdef NOT_USED int -inv_stat(LargeObjectDesc *obj_desc, struct pgstat *stbuf) +inv_stat(LargeObjectDesc * obj_desc, struct pgstat * stbuf) { - Assert(PointerIsValid(obj_desc)); - Assert(stbuf != NULL); - - /* need read lock for stat */ - if (!(obj_desc->flags & IFS_RDLOCK)) { - RelationSetLockForRead(obj_desc->heap_r); - obj_desc->flags |= IFS_RDLOCK; - } + Assert(PointerIsValid(obj_desc)); + Assert(stbuf != NULL); + + /* need read lock for stat */ + if (!(obj_desc->flags & IFS_RDLOCK)) + { + RelationSetLockForRead(obj_desc->heap_r); + obj_desc->flags |= IFS_RDLOCK; + } - stbuf->st_ino = obj_desc->heap_r->rd_id; + stbuf->st_ino = obj_desc->heap_r->rd_id; #if 1 - stbuf->st_mode = (S_IFREG | 0666); /* IFREG|rw-rw-rw- */ + stbuf->st_mode = (S_IFREG | 0666); /* IFREG|rw-rw-rw- */ #else - stbuf->st_mode = 100666; /* IFREG|rw-rw-rw- */ + stbuf->st_mode = 100666; /* IFREG|rw-rw-rw- */ #endif - stbuf->st_size = _inv_getsize(obj_desc->heap_r, - obj_desc->hdesc, - obj_desc->index_r); + stbuf->st_size = _inv_getsize(obj_desc->heap_r, + obj_desc->hdesc, + obj_desc->index_r); - stbuf->st_uid = obj_desc->heap_r->rd_rel->relowner; + stbuf->st_uid = obj_desc->heap_r->rd_rel->relowner; - /* we have no good way of computing access times right now */ - stbuf->st_atime_s = stbuf->st_mtime_s = stbuf->st_ctime_s = 0; + /* we have no good way of computing access times right now */ + stbuf->st_atime_s = stbuf->st_mtime_s = stbuf->st_ctime_s = 0; - return (0); + return (0); } + #endif int -inv_seek(LargeObjectDesc *obj_desc, int offset, int whence) +inv_seek(LargeObjectDesc * obj_desc, int offset, int whence) { - int oldOffset; - Datum d; - ScanKeyData skey; - - Assert(PointerIsValid(obj_desc)); - - if (whence == SEEK_CUR) { - offset += obj_desc->offset; /* calculate absolute position */ - return (inv_seek(obj_desc, offset, SEEK_SET)); - } - - /* - * if you seek past the end (offset > 0) I have - * no clue what happens :-( B.L. 9/1/93 - */ - if (whence == SEEK_END) { - /* need read lock for getsize */ - if (!(obj_desc->flags & IFS_RDLOCK)) { - RelationSetLockForRead(obj_desc->heap_r); - obj_desc->flags |= IFS_RDLOCK; - } - offset += _inv_getsize(obj_desc->heap_r, - obj_desc->hdesc, - obj_desc->index_r ); - return (inv_seek(obj_desc, offset, SEEK_SET)); - } - - /* - * Whenever we do a seek, we turn off the EOF flag bit to force - * ourselves to check for real on the next read. - */ - - obj_desc->flags &= ~IFS_ATEOF; - oldOffset = obj_desc->offset; - obj_desc->offset = offset; - - /* try to avoid doing any work, if we can manage it */ - if (offset >= obj_desc->lowbyte - && offset <= obj_desc->highbyte - && oldOffset <= obj_desc->highbyte - && obj_desc->iscan != (IndexScanDesc) NULL) - return (offset); - - /* - * To do a seek on an inversion file, we start an index scan that - * will bring us to the right place. Each tuple in an inversion file - * stores the offset of the last byte that appears on it, and we have - * an index on this. - */ - - - /* right now, just assume that the operation is SEEK_SET */ - if (obj_desc->iscan != (IndexScanDesc) NULL) { - d = Int32GetDatum(offset); - btmovescan(obj_desc->iscan, d); - } else { - - ScanKeyEntryInitialize(&skey, 0x0, 1, INT4GE_PROC_OID, - Int32GetDatum(offset)); - - obj_desc->iscan = index_beginscan(obj_desc->index_r, - (bool) 0, (uint16) 1, - &skey); - } - - return (offset); -} + int oldOffset; + Datum d; + ScanKeyData skey; -int -inv_tell(LargeObjectDesc *obj_desc) -{ - Assert(PointerIsValid(obj_desc)); + Assert(PointerIsValid(obj_desc)); - return (obj_desc->offset); -} + if (whence == SEEK_CUR) + { + offset += obj_desc->offset; /* calculate absolute position */ + return (inv_seek(obj_desc, offset, SEEK_SET)); + } -int -inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) -{ - HeapTuple htup; - Buffer b; - int nread; - int off; - int ncopy; - Datum d; - struct varlena *fsblock; - bool isNull; - - Assert(PointerIsValid(obj_desc)); - Assert(buf != NULL); - - /* if we're already at EOF, we don't need to do any work here */ - if (obj_desc->flags & IFS_ATEOF) - return (0); + /* + * if you seek past the end (offset > 0) I have no clue what happens + * :-( B.L. 9/1/93 + */ + if (whence == SEEK_END) + { + /* need read lock for getsize */ + if (!(obj_desc->flags & IFS_RDLOCK)) + { + RelationSetLockForRead(obj_desc->heap_r); + obj_desc->flags |= IFS_RDLOCK; + } + offset += _inv_getsize(obj_desc->heap_r, + obj_desc->hdesc, + obj_desc->index_r); + return (inv_seek(obj_desc, offset, SEEK_SET)); + } - /* make sure we obey two-phase locking */ - if (!(obj_desc->flags & IFS_RDLOCK)) { - RelationSetLockForRead(obj_desc->heap_r); - obj_desc->flags |= IFS_RDLOCK; - } + /* + * Whenever we do a seek, we turn off the EOF flag bit to force + * ourselves to check for real on the next read. + */ - nread = 0; + obj_desc->flags &= ~IFS_ATEOF; + oldOffset = obj_desc->offset; + obj_desc->offset = offset; - /* fetch a block at a time */ - while (nread < nbytes) { + /* try to avoid doing any work, if we can manage it */ + if (offset >= obj_desc->lowbyte + && offset <= obj_desc->highbyte + && oldOffset <= obj_desc->highbyte + && obj_desc->iscan != (IndexScanDesc) NULL) + return (offset); + + /* + * To do a seek on an inversion file, we start an index scan that will + * bring us to the right place. Each tuple in an inversion file + * stores the offset of the last byte that appears on it, and we have + * an index on this. + */ - /* fetch an inversion file system block */ - htup = inv_fetchtup(obj_desc, &b); - if (!HeapTupleIsValid(htup)) { - obj_desc->flags |= IFS_ATEOF; - break; + /* right now, just assume that the operation is SEEK_SET */ + if (obj_desc->iscan != (IndexScanDesc) NULL) + { + d = Int32GetDatum(offset); + btmovescan(obj_desc->iscan, d); } + else + { - /* copy the data from this block into the buffer */ - d = (Datum) heap_getattr(htup, b, 2, obj_desc->hdesc, &isNull); - fsblock = (struct varlena *) DatumGetPointer(d); + ScanKeyEntryInitialize(&skey, 0x0, 1, INT4GE_PROC_OID, + Int32GetDatum(offset)); - off = obj_desc->offset - obj_desc->lowbyte; - ncopy = obj_desc->highbyte - obj_desc->offset + 1; - if (ncopy > (nbytes - nread)) - ncopy = (nbytes - nread); - memmove(buf, &(fsblock->vl_dat[off]), ncopy); + obj_desc->iscan = index_beginscan(obj_desc->index_r, + (bool) 0, (uint16) 1, + &skey); + } - /* be a good citizen */ - ReleaseBuffer(b); + return (offset); +} - /* move pointers past the amount we just read */ - buf += ncopy; - nread += ncopy; - obj_desc->offset += ncopy; - } +int +inv_tell(LargeObjectDesc * obj_desc) +{ + Assert(PointerIsValid(obj_desc)); - /* that's it */ - return (nread); + return (obj_desc->offset); } int -inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) +inv_read(LargeObjectDesc * obj_desc, char *buf, int nbytes) { - HeapTuple htup; - Buffer b; - int nwritten; - int tuplen; + HeapTuple htup; + Buffer b; + int nread; + int off; + int ncopy; + Datum d; + struct varlena *fsblock; + bool isNull; + + Assert(PointerIsValid(obj_desc)); + Assert(buf != NULL); + + /* if we're already at EOF, we don't need to do any work here */ + if (obj_desc->flags & IFS_ATEOF) + return (0); + + /* make sure we obey two-phase locking */ + if (!(obj_desc->flags & IFS_RDLOCK)) + { + RelationSetLockForRead(obj_desc->heap_r); + obj_desc->flags |= IFS_RDLOCK; + } + + nread = 0; + + /* fetch a block at a time */ + while (nread < nbytes) + { + + /* fetch an inversion file system block */ + htup = inv_fetchtup(obj_desc, &b); - Assert(PointerIsValid(obj_desc)); - Assert(buf != NULL); + if (!HeapTupleIsValid(htup)) + { + obj_desc->flags |= IFS_ATEOF; + break; + } - /* - * Make sure we obey two-phase locking. A write lock entitles you - * to read the relation, as well. - */ + /* copy the data from this block into the buffer */ + d = (Datum) heap_getattr(htup, b, 2, obj_desc->hdesc, &isNull); + fsblock = (struct varlena *) DatumGetPointer(d); - if (!(obj_desc->flags & IFS_WRLOCK)) { - RelationSetLockForRead(obj_desc->heap_r); - obj_desc->flags |= (IFS_WRLOCK|IFS_RDLOCK); - } + off = obj_desc->offset - obj_desc->lowbyte; + ncopy = obj_desc->highbyte - obj_desc->offset + 1; + if (ncopy > (nbytes - nread)) + ncopy = (nbytes - nread); + memmove(buf, &(fsblock->vl_dat[off]), ncopy); - nwritten = 0; + /* be a good citizen */ + ReleaseBuffer(b); - /* write a block at a time */ - while (nwritten < nbytes) { + /* move pointers past the amount we just read */ + buf += ncopy; + nread += ncopy; + obj_desc->offset += ncopy; + } + + /* that's it */ + return (nread); +} + +int +inv_write(LargeObjectDesc * obj_desc, char *buf, int nbytes) +{ + HeapTuple htup; + Buffer b; + int nwritten; + int tuplen; + + Assert(PointerIsValid(obj_desc)); + Assert(buf != NULL); /* - * Fetch the current inversion file system block. If the - * class storing the inversion file is empty, we don't want - * to do an index lookup, since index lookups choke on empty - * files (should be fixed someday). + * Make sure we obey two-phase locking. A write lock entitles you to + * read the relation, as well. */ - if ((obj_desc->flags & IFS_ATEOF) - || obj_desc->heap_r->rd_nblocks == 0) - htup = (HeapTuple) NULL; - else - htup = inv_fetchtup(obj_desc, &b); - - /* either append or replace a block, as required */ - if (!HeapTupleIsValid(htup)) { - tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); - } else { - if (obj_desc->offset > obj_desc->highbyte) - tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); - else - tuplen = inv_wrold(obj_desc, buf, nbytes - nwritten, htup, b); + if (!(obj_desc->flags & IFS_WRLOCK)) + { + RelationSetLockForRead(obj_desc->heap_r); + obj_desc->flags |= (IFS_WRLOCK | IFS_RDLOCK); } - /* move pointers past the amount we just wrote */ - buf += tuplen; - nwritten += tuplen; - obj_desc->offset += tuplen; - } + nwritten = 0; + + /* write a block at a time */ + while (nwritten < nbytes) + { + + /* + * Fetch the current inversion file system block. If the class + * storing the inversion file is empty, we don't want to do an + * index lookup, since index lookups choke on empty files (should + * be fixed someday). + */ + + if ((obj_desc->flags & IFS_ATEOF) + || obj_desc->heap_r->rd_nblocks == 0) + htup = (HeapTuple) NULL; + else + htup = inv_fetchtup(obj_desc, &b); + + /* either append or replace a block, as required */ + if (!HeapTupleIsValid(htup)) + { + tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); + } + else + { + if (obj_desc->offset > obj_desc->highbyte) + tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); + else + tuplen = inv_wrold(obj_desc, buf, nbytes - nwritten, htup, b); + } + + /* move pointers past the amount we just wrote */ + buf += tuplen; + nwritten += tuplen; + obj_desc->offset += tuplen; + } - /* that's it */ - return (nwritten); + /* that's it */ + return (nwritten); } /* - * inv_fetchtup -- Fetch an inversion file system block. + * inv_fetchtup -- Fetch an inversion file system block. * - * This routine finds the file system block containing the offset - * recorded in the obj_desc structure. Later, we need to think about - * the effects of non-functional updates (can you rewrite the same - * block twice in a single transaction?), but for now, we won't bother. + * This routine finds the file system block containing the offset + * recorded in the obj_desc structure. Later, we need to think about + * the effects of non-functional updates (can you rewrite the same + * block twice in a single transaction?), but for now, we won't bother. * - * Parameters: - * obj_desc -- the object descriptor. - * bufP -- pointer to a buffer in the buffer cache; caller - * must free this. + * Parameters: + * obj_desc -- the object descriptor. + * bufP -- pointer to a buffer in the buffer cache; caller + * must free this. * - * Returns: - * A heap tuple containing the desired block, or NULL if no - * such tuple exists. + * Returns: + * A heap tuple containing the desired block, or NULL if no + * such tuple exists. */ -static HeapTuple -inv_fetchtup(LargeObjectDesc *obj_desc, Buffer *bufP) +static HeapTuple +inv_fetchtup(LargeObjectDesc * obj_desc, Buffer * bufP) { - HeapTuple htup; - RetrieveIndexResult res; - Datum d; - int firstbyte, lastbyte; - struct varlena *fsblock; - bool isNull; - - /* - * If we've exhausted the current block, we need to get the next one. - * When we support time travel and non-functional updates, we will - * need to loop over the blocks, rather than just have an 'if', in - * order to find the one we're really interested in. - */ - - if (obj_desc->offset > obj_desc->highbyte - || obj_desc->offset < obj_desc->lowbyte - || !ItemPointerIsValid(&(obj_desc->htid))) { - - /* initialize scan key if not done */ - if (obj_desc->iscan==(IndexScanDesc)NULL) { - ScanKeyData skey; - - ScanKeyEntryInitialize(&skey, 0x0, 1, INT4GE_PROC_OID, - Int32GetDatum(0)); - obj_desc->iscan = - index_beginscan(obj_desc->index_r, - (bool) 0, (uint16) 1, - &skey); - } + HeapTuple htup; + RetrieveIndexResult res; + Datum d; + int firstbyte, + lastbyte; + struct varlena *fsblock; + bool isNull; - do { - res = index_getnext(obj_desc->iscan, ForwardScanDirection); + /* + * If we've exhausted the current block, we need to get the next one. + * When we support time travel and non-functional updates, we will + * need to loop over the blocks, rather than just have an 'if', in + * order to find the one we're really interested in. + */ + + if (obj_desc->offset > obj_desc->highbyte + || obj_desc->offset < obj_desc->lowbyte + || !ItemPointerIsValid(&(obj_desc->htid))) + { - if (res == (RetrieveIndexResult) NULL) { - ItemPointerSetInvalid(&(obj_desc->htid)); - return ((HeapTuple) NULL); - } + /* initialize scan key if not done */ + if (obj_desc->iscan == (IndexScanDesc) NULL) + { + ScanKeyData skey; - /* - * For time travel, we need to use the actual time qual here, - * rather that NowTimeQual. We currently have no way to pass - * a time qual in. - */ + ScanKeyEntryInitialize(&skey, 0x0, 1, INT4GE_PROC_OID, + Int32GetDatum(0)); + obj_desc->iscan = + index_beginscan(obj_desc->index_r, + (bool) 0, (uint16) 1, + &skey); + } - htup = heap_fetch(obj_desc->heap_r, NowTimeQual, - &(res->heap_iptr), bufP); + do + { + res = index_getnext(obj_desc->iscan, ForwardScanDirection); - } while (htup == (HeapTuple) NULL); + if (res == (RetrieveIndexResult) NULL) + { + ItemPointerSetInvalid(&(obj_desc->htid)); + return ((HeapTuple) NULL); + } - /* remember this tid -- we may need it for later reads/writes */ - ItemPointerCopy(&(res->heap_iptr), &(obj_desc->htid)); + /* + * For time travel, we need to use the actual time qual here, + * rather that NowTimeQual. We currently have no way to pass + * a time qual in. + */ - } else { - htup = heap_fetch(obj_desc->heap_r, NowTimeQual, - &(obj_desc->htid), bufP); - } + htup = heap_fetch(obj_desc->heap_r, NowTimeQual, + &(res->heap_iptr), bufP); - /* - * By here, we have the heap tuple we're interested in. We cache - * the upper and lower bounds for this block in the object descriptor - * and return the tuple. - */ + } while (htup == (HeapTuple) NULL); + + /* remember this tid -- we may need it for later reads/writes */ + ItemPointerCopy(&(res->heap_iptr), &(obj_desc->htid)); + + } + else + { + htup = heap_fetch(obj_desc->heap_r, NowTimeQual, + &(obj_desc->htid), bufP); + } + + /* + * By here, we have the heap tuple we're interested in. We cache the + * upper and lower bounds for this block in the object descriptor and + * return the tuple. + */ - d = (Datum)heap_getattr(htup, *bufP, 1, obj_desc->hdesc, &isNull); - lastbyte = (int32) DatumGetInt32(d); - d = (Datum)heap_getattr(htup, *bufP, 2, obj_desc->hdesc, &isNull); - fsblock = (struct varlena *) DatumGetPointer(d); + d = (Datum) heap_getattr(htup, *bufP, 1, obj_desc->hdesc, &isNull); + lastbyte = (int32) DatumGetInt32(d); + d = (Datum) heap_getattr(htup, *bufP, 2, obj_desc->hdesc, &isNull); + fsblock = (struct varlena *) DatumGetPointer(d); - /* order of + and - is important -- these are unsigned quantites near 0 */ - firstbyte = (lastbyte + 1 + sizeof(fsblock->vl_len)) - fsblock->vl_len; + /* + * order of + and - is important -- these are unsigned quantites near + * 0 + */ + firstbyte = (lastbyte + 1 + sizeof(fsblock->vl_len)) - fsblock->vl_len; - obj_desc->lowbyte = firstbyte; - obj_desc->highbyte = lastbyte; + obj_desc->lowbyte = firstbyte; + obj_desc->highbyte = lastbyte; - /* done */ - return (htup); + /* done */ + return (htup); } /* - * inv_wrnew() -- append a new filesystem block tuple to the inversion - * file. + * inv_wrnew() -- append a new filesystem block tuple to the inversion + * file. * - * In response to an inv_write, we append one or more file system - * blocks to the class containing the large object. We violate the - * class abstraction here in order to pack things as densely as we - * are able. We examine the last page in the relation, and write - * just enough to fill it, assuming that it has above a certain - * threshold of space available. If the space available is less than - * the threshold, we allocate a new page by writing a big tuple. + * In response to an inv_write, we append one or more file system + * blocks to the class containing the large object. We violate the + * class abstraction here in order to pack things as densely as we + * are able. We examine the last page in the relation, and write + * just enough to fill it, assuming that it has above a certain + * threshold of space available. If the space available is less than + * the threshold, we allocate a new page by writing a big tuple. * - * By the time we get here, we know all the parameters passed in - * are valid, and that we hold the appropriate lock on the heap - * relation. + * By the time we get here, we know all the parameters passed in + * are valid, and that we hold the appropriate lock on the heap + * relation. * - * Parameters: - * obj_desc: large object descriptor for which to append block. - * buf: buffer containing data to write. - * nbytes: amount to write + * Parameters: + * obj_desc: large object descriptor for which to append block. + * buf: buffer containing data to write. + * nbytes: amount to write * - * Returns: - * number of bytes actually written to the new tuple. + * Returns: + * number of bytes actually written to the new tuple. */ static int -inv_wrnew(LargeObjectDesc *obj_desc, char *buf, int nbytes) +inv_wrnew(LargeObjectDesc * obj_desc, char *buf, int nbytes) { - Relation hr; - HeapTuple ntup; - Buffer buffer; - Page page; - int nblocks; - int nwritten; - - hr = obj_desc->heap_r; - - /* - * Get the last block in the relation. If there's no data in the - * relation at all, then we just get a new block. Otherwise, we - * check the last block to see whether it has room to accept some - * or all of the data that the user wants to write. If it doesn't, - * then we allocate a new block. - */ - - nblocks = RelationGetNumberOfBlocks(hr); - - if (nblocks > 0) - buffer = ReadBuffer(hr, nblocks - 1); - else - buffer = ReadBuffer(hr, P_NEW); - - page = BufferGetPage(buffer); - - /* - * If the last page is too small to hold all the data, and it's too - * small to hold IMINBLK, then we allocate a new page. If it will - * hold at least IMINBLK, but less than all the data requested, then - * we write IMINBLK here. The caller is responsible for noticing that - * less than the requested number of bytes were written, and calling - * this routine again. - */ - - nwritten = IFREESPC(page); - if (nwritten < nbytes) { - if (nwritten < IMINBLK) { - ReleaseBuffer(buffer); - buffer = ReadBuffer(hr, P_NEW); - page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); - if (nbytes > IMAXBLK) - nwritten = IMAXBLK; - else + Relation hr; + HeapTuple ntup; + Buffer buffer; + Page page; + int nblocks; + int nwritten; + + hr = obj_desc->heap_r; + + /* + * Get the last block in the relation. If there's no data in the + * relation at all, then we just get a new block. Otherwise, we check + * the last block to see whether it has room to accept some or all of + * the data that the user wants to write. If it doesn't, then we + * allocate a new block. + */ + + nblocks = RelationGetNumberOfBlocks(hr); + + if (nblocks > 0) + buffer = ReadBuffer(hr, nblocks - 1); + else + buffer = ReadBuffer(hr, P_NEW); + + page = BufferGetPage(buffer); + + /* + * If the last page is too small to hold all the data, and it's too + * small to hold IMINBLK, then we allocate a new page. If it will + * hold at least IMINBLK, but less than all the data requested, then + * we write IMINBLK here. The caller is responsible for noticing that + * less than the requested number of bytes were written, and calling + * this routine again. + */ + + nwritten = IFREESPC(page); + if (nwritten < nbytes) + { + if (nwritten < IMINBLK) + { + ReleaseBuffer(buffer); + buffer = ReadBuffer(hr, P_NEW); + page = BufferGetPage(buffer); + PageInit(page, BufferGetPageSize(buffer), 0); + if (nbytes > IMAXBLK) + nwritten = IMAXBLK; + else + nwritten = nbytes; + } + } + else + { nwritten = nbytes; } - } else { - nwritten = nbytes; - } - /* - * Insert a new file system block tuple, index it, and write it out. - */ + /* + * Insert a new file system block tuple, index it, and write it out. + */ - ntup = inv_newtuple(obj_desc, buffer, page, buf, nwritten); - inv_indextup(obj_desc, ntup); + ntup = inv_newtuple(obj_desc, buffer, page, buf, nwritten); + inv_indextup(obj_desc, ntup); - /* new tuple is inserted */ - WriteBuffer(buffer); + /* new tuple is inserted */ + WriteBuffer(buffer); - return (nwritten); + return (nwritten); } static int -inv_wrold(LargeObjectDesc *obj_desc, - char *dbuf, - int nbytes, - HeapTuple htup, - Buffer buffer) +inv_wrold(LargeObjectDesc * obj_desc, + char *dbuf, + int nbytes, + HeapTuple htup, + Buffer buffer) { - Relation hr; - HeapTuple ntup; - Buffer newbuf; - Page page; - Page newpage; - int tupbytes; - Datum d; - struct varlena *fsblock; - int nwritten, nblocks, freespc; - bool isNull; - int keep_offset; - - /* - * Since we're using a no-overwrite storage manager, the way we - * overwrite blocks is to mark the old block invalid and append - * a new block. First mark the old block invalid. This violates - * the tuple abstraction. - */ - - TransactionIdStore(GetCurrentTransactionId(), &(htup->t_xmax)); - htup->t_cmax = GetCurrentCommandId(); - - /* - * If we're overwriting the entire block, we're lucky. All we need - * to do is to insert a new block. - */ - - if (obj_desc->offset == obj_desc->lowbyte - && obj_desc->lowbyte + nbytes >= obj_desc->highbyte) { - WriteBuffer(buffer); - return (inv_wrnew(obj_desc, dbuf, nbytes)); - } - - /* - * By here, we need to overwrite part of the data in the current - * tuple. In order to reduce the degree to which we fragment blocks, - * we guarantee that no block will be broken up due to an overwrite. - * This means that we need to allocate a tuple on a new page, if - * there's not room for the replacement on this one. - */ - - newbuf = buffer; - page = BufferGetPage(buffer); - newpage = BufferGetPage(newbuf); - hr = obj_desc->heap_r; - freespc = IFREESPC(page); - d = (Datum)heap_getattr(htup, buffer, 2, obj_desc->hdesc, &isNull); - fsblock = (struct varlena *) DatumGetPointer(d); - tupbytes = fsblock->vl_len - sizeof(fsblock->vl_len); - - if (freespc < tupbytes) { + Relation hr; + HeapTuple ntup; + Buffer newbuf; + Page page; + Page newpage; + int tupbytes; + Datum d; + struct varlena *fsblock; + int nwritten, + nblocks, + freespc; + bool isNull; + int keep_offset; /* - * First see if there's enough space on the last page of the - * table to put this tuple. + * Since we're using a no-overwrite storage manager, the way we + * overwrite blocks is to mark the old block invalid and append a new + * block. First mark the old block invalid. This violates the tuple + * abstraction. */ - nblocks = RelationGetNumberOfBlocks(hr); + TransactionIdStore(GetCurrentTransactionId(), &(htup->t_xmax)); + htup->t_cmax = GetCurrentCommandId(); - if (nblocks > 0) - newbuf = ReadBuffer(hr, nblocks - 1); - else - newbuf = ReadBuffer(hr, P_NEW); + /* + * If we're overwriting the entire block, we're lucky. All we need to + * do is to insert a new block. + */ - newpage = BufferGetPage(newbuf); - freespc = IFREESPC(newpage); + if (obj_desc->offset == obj_desc->lowbyte + && obj_desc->lowbyte + nbytes >= obj_desc->highbyte) + { + WriteBuffer(buffer); + return (inv_wrnew(obj_desc, dbuf, nbytes)); + } /* - * If there's no room on the last page, allocate a new last - * page for the table, and put it there. + * By here, we need to overwrite part of the data in the current + * tuple. In order to reduce the degree to which we fragment blocks, + * we guarantee that no block will be broken up due to an overwrite. + * This means that we need to allocate a tuple on a new page, if + * there's not room for the replacement on this one. */ - if (freespc < tupbytes) { - ReleaseBuffer(newbuf); - newbuf = ReadBuffer(hr, P_NEW); - newpage = BufferGetPage(newbuf); - PageInit(newpage, BufferGetPageSize(newbuf), 0); + newbuf = buffer; + page = BufferGetPage(buffer); + newpage = BufferGetPage(newbuf); + hr = obj_desc->heap_r; + freespc = IFREESPC(page); + d = (Datum) heap_getattr(htup, buffer, 2, obj_desc->hdesc, &isNull); + fsblock = (struct varlena *) DatumGetPointer(d); + tupbytes = fsblock->vl_len - sizeof(fsblock->vl_len); + + if (freespc < tupbytes) + { + + /* + * First see if there's enough space on the last page of the table + * to put this tuple. + */ + + nblocks = RelationGetNumberOfBlocks(hr); + + if (nblocks > 0) + newbuf = ReadBuffer(hr, nblocks - 1); + else + newbuf = ReadBuffer(hr, P_NEW); + + newpage = BufferGetPage(newbuf); + freespc = IFREESPC(newpage); + + /* + * If there's no room on the last page, allocate a new last page + * for the table, and put it there. + */ + + if (freespc < tupbytes) + { + ReleaseBuffer(newbuf); + newbuf = ReadBuffer(hr, P_NEW); + newpage = BufferGetPage(newbuf); + PageInit(newpage, BufferGetPageSize(newbuf), 0); + } } - } - - nwritten = nbytes; - if (nwritten > obj_desc->highbyte - obj_desc->offset + 1) - nwritten = obj_desc->highbyte - obj_desc->offset + 1; - memmove(VARDATA(fsblock)+ (obj_desc->offset - obj_desc->lowbyte), - dbuf,nwritten); - /* we are rewriting the entire old block, therefore - we reset offset to the lowbyte of the original block - before jumping into inv_newtuple() */ - keep_offset = obj_desc->offset; - obj_desc->offset = obj_desc->lowbyte; - ntup = inv_newtuple(obj_desc, newbuf, newpage, VARDATA(fsblock), - tupbytes); - /* after we are done, we restore to the true offset */ - obj_desc->offset = keep_offset; - - /* - * By here, we have a page (newpage) that's guaranteed to have - * enough space on it to put the new tuple. Call inv_newtuple - * to do the work. Passing NULL as a buffer to inv_newtuple() - * keeps it from copying any data into the new tuple. When it - * returns, the tuple is ready to receive data from the old - * tuple and the user's data buffer. - */ -/* - ntup = inv_newtuple(obj_desc, newbuf, newpage, (char *) NULL, tupbytes); - dptr = ((char *) ntup) + ntup->t_hoff - sizeof(ntup->t_bits) + sizeof(int4) - + sizeof(fsblock->vl_len); - if (obj_desc->offset > obj_desc->lowbyte) { - memmove(dptr, - &(fsblock->vl_dat[0]), - obj_desc->offset - obj_desc->lowbyte); - dptr += obj_desc->offset - obj_desc->lowbyte; - } + nwritten = nbytes; + if (nwritten > obj_desc->highbyte - obj_desc->offset + 1) + nwritten = obj_desc->highbyte - obj_desc->offset + 1; + memmove(VARDATA(fsblock) + (obj_desc->offset - obj_desc->lowbyte), + dbuf, nwritten); + + /* + * we are rewriting the entire old block, therefore we reset offset to + * the lowbyte of the original block before jumping into + * inv_newtuple() + */ + keep_offset = obj_desc->offset; + obj_desc->offset = obj_desc->lowbyte; + ntup = inv_newtuple(obj_desc, newbuf, newpage, VARDATA(fsblock), + tupbytes); + /* after we are done, we restore to the true offset */ + obj_desc->offset = keep_offset; + + /* + * By here, we have a page (newpage) that's guaranteed to have enough + * space on it to put the new tuple. Call inv_newtuple to do the + * work. Passing NULL as a buffer to inv_newtuple() keeps it from + * copying any data into the new tuple. When it returns, the tuple is + * ready to receive data from the old tuple and the user's data + * buffer. + */ +/* + ntup = inv_newtuple(obj_desc, newbuf, newpage, (char *) NULL, tupbytes); + dptr = ((char *) ntup) + ntup->t_hoff - sizeof(ntup->t_bits) + sizeof(int4) + + sizeof(fsblock->vl_len); + + if (obj_desc->offset > obj_desc->lowbyte) { + memmove(dptr, + &(fsblock->vl_dat[0]), + obj_desc->offset - obj_desc->lowbyte); + dptr += obj_desc->offset - obj_desc->lowbyte; + } - nwritten = nbytes; - if (nwritten > obj_desc->highbyte - obj_desc->offset + 1) - nwritten = obj_desc->highbyte - obj_desc->offset + 1; + nwritten = nbytes; + if (nwritten > obj_desc->highbyte - obj_desc->offset + 1) + nwritten = obj_desc->highbyte - obj_desc->offset + 1; - memmove(dptr, dbuf, nwritten); - dptr += nwritten; + memmove(dptr, dbuf, nwritten); + dptr += nwritten; - if (obj_desc->offset + nwritten < obj_desc->highbyte + 1) { + if (obj_desc->offset + nwritten < obj_desc->highbyte + 1) { */ /* - loc = (obj_desc->highbyte - obj_desc->offset) - + nwritten; - sz = obj_desc->highbyte - (obj_desc->lowbyte + loc); + loc = (obj_desc->highbyte - obj_desc->offset) + + nwritten; + sz = obj_desc->highbyte - (obj_desc->lowbyte + loc); - what's going on here?? - jolly + what's going on here?? - jolly */ /* - sz = (obj_desc->highbyte + 1) - (obj_desc->offset + nwritten); - memmove(&(fsblock->vl_dat[0]), dptr, sz); - } + sz = (obj_desc->highbyte + 1) - (obj_desc->offset + nwritten); + memmove(&(fsblock->vl_dat[0]), dptr, sz); + } */ - /* index the new tuple */ - inv_indextup(obj_desc, ntup); + /* index the new tuple */ + inv_indextup(obj_desc, ntup); - /* move the scandesc forward so we don't reread the newly inserted - tuple on the next index scan */ - if (obj_desc->iscan) - index_getnext(obj_desc->iscan, ForwardScanDirection); + /* + * move the scandesc forward so we don't reread the newly inserted + * tuple on the next index scan + */ + if (obj_desc->iscan) + index_getnext(obj_desc->iscan, ForwardScanDirection); - /* - * Okay, by here, a tuple for the new block is correctly placed, - * indexed, and filled. Write the changed pages out. - */ + /* + * Okay, by here, a tuple for the new block is correctly placed, + * indexed, and filled. Write the changed pages out. + */ - WriteBuffer(buffer); - if (newbuf != buffer) - WriteBuffer(newbuf); + WriteBuffer(buffer); + if (newbuf != buffer) + WriteBuffer(newbuf); - /* done */ - return (nwritten); + /* done */ + return (nwritten); } -static HeapTuple -inv_newtuple(LargeObjectDesc *obj_desc, - Buffer buffer, - Page page, - char *dbuf, - int nwrite) +static HeapTuple +inv_newtuple(LargeObjectDesc * obj_desc, + Buffer buffer, + Page page, + char *dbuf, + int nwrite) { - HeapTuple ntup; - PageHeader ph; - int tupsize; - int hoff; - Offset lower; - Offset upper; - ItemId itemId; - OffsetNumber off; - OffsetNumber limit; - char *attptr; - - /* compute tuple size -- no nulls */ - hoff = sizeof(HeapTupleData) - sizeof(ntup->t_bits); - - /* add in olastbyte, varlena.vl_len, varlena.vl_dat */ - tupsize = hoff + (2 * sizeof(int32)) + nwrite; - tupsize = LONGALIGN(tupsize); - - /* - * Allocate the tuple on the page, violating the page abstraction. - * This code was swiped from PageAddItem(). - */ - - ph = (PageHeader) page; - limit = OffsetNumberNext(PageGetMaxOffsetNumber(page)); - - /* look for "recyclable" (unused & deallocated) ItemId */ - for (off = FirstOffsetNumber; off < limit; off = OffsetNumberNext(off)) { + HeapTuple ntup; + PageHeader ph; + int tupsize; + int hoff; + Offset lower; + Offset upper; + ItemId itemId; + OffsetNumber off; + OffsetNumber limit; + char *attptr; + + /* compute tuple size -- no nulls */ + hoff = sizeof(HeapTupleData) - sizeof(ntup->t_bits); + + /* add in olastbyte, varlena.vl_len, varlena.vl_dat */ + tupsize = hoff + (2 * sizeof(int32)) + nwrite; + tupsize = LONGALIGN(tupsize); + + /* + * Allocate the tuple on the page, violating the page abstraction. + * This code was swiped from PageAddItem(). + */ + + ph = (PageHeader) page; + limit = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + + /* look for "recyclable" (unused & deallocated) ItemId */ + for (off = FirstOffsetNumber; off < limit; off = OffsetNumberNext(off)) + { + itemId = &ph->pd_linp[off - 1]; + if ((((*itemId).lp_flags & LP_USED) == 0) && + ((*itemId).lp_len == 0)) + break; + } + + if (off > limit) + lower = (Offset) (((char *) (&ph->pd_linp[off])) - ((char *) page)); + else if (off == limit) + lower = ph->pd_lower + sizeof(ItemIdData); + else + lower = ph->pd_lower; + + upper = ph->pd_upper - tupsize; + itemId = &ph->pd_linp[off - 1]; - if ((((*itemId).lp_flags & LP_USED) == 0) && - ((*itemId).lp_len == 0)) - break; - } - - if (off > limit) - lower = (Offset) (((char *) (&ph->pd_linp[off])) - ((char *) page)); - else if (off == limit) - lower = ph->pd_lower + sizeof (ItemIdData); - else - lower = ph->pd_lower; - - upper = ph->pd_upper - tupsize; - - itemId = &ph->pd_linp[off - 1]; - (*itemId).lp_off = upper; - (*itemId).lp_len = tupsize; - (*itemId).lp_flags = LP_USED; - ph->pd_lower = lower; - ph->pd_upper = upper; - - ntup = (HeapTuple) ((char *) page + upper); - - /* - * Tuple is now allocated on the page. Next, fill in the tuple - * header. This block of code violates the tuple abstraction. - */ - - ntup->t_len = tupsize; - ItemPointerSet(&(ntup->t_ctid), BufferGetBlockNumber(buffer), off); - ItemPointerSetInvalid(&(ntup->t_chain)); - LastOidProcessed = ntup->t_oid = newoid(); - TransactionIdStore(GetCurrentTransactionId(), &(ntup->t_xmin)); - ntup->t_cmin = GetCurrentCommandId(); - StoreInvalidTransactionId(&(ntup->t_xmax)); - ntup->t_cmax = 0; - ntup->t_tmin = INVALID_ABSTIME; - ntup->t_tmax = CURRENT_ABSTIME; - ntup->t_natts = 2; - ntup->t_hoff = hoff; - ntup->t_vtype = 0; - ntup->t_infomask = 0x0; - - /* if a NULL is passed in, avoid the calculations below */ - if (dbuf == NULL) - return ntup; - - /* - * Finally, copy the user's data buffer into the tuple. This violates - * the tuple and class abstractions. - */ - - attptr = ((char *) ntup) + hoff; - *((int32 *) attptr) = obj_desc->offset + nwrite - 1; - attptr += sizeof(int32); - - /* - ** mer fixed disk layout of varlenas to get rid of the need for this. - ** - ** *((int32 *) attptr) = nwrite + sizeof(int32); - ** attptr += sizeof(int32); - */ - - *((int32 *) attptr) = nwrite + sizeof(int32); - attptr += sizeof(int32); - - /* - * If a data buffer was passed in, then copy the data from the buffer - * to the tuple. Some callers (eg, inv_wrold()) may not pass in a - * buffer, since they have to copy part of the old tuple data and - * part of the user's new data into the new tuple. - */ - - if (dbuf != (char *) NULL) - memmove(attptr, dbuf, nwrite); - - /* keep track of boundary of current tuple */ - obj_desc->lowbyte = obj_desc->offset; - obj_desc->highbyte = obj_desc->offset + nwrite - 1; - - /* new tuple is filled -- return it */ - return (ntup); + (*itemId).lp_off = upper; + (*itemId).lp_len = tupsize; + (*itemId).lp_flags = LP_USED; + ph->pd_lower = lower; + ph->pd_upper = upper; + + ntup = (HeapTuple) ((char *) page + upper); + + /* + * Tuple is now allocated on the page. Next, fill in the tuple + * header. This block of code violates the tuple abstraction. + */ + + ntup->t_len = tupsize; + ItemPointerSet(&(ntup->t_ctid), BufferGetBlockNumber(buffer), off); + ItemPointerSetInvalid(&(ntup->t_chain)); + LastOidProcessed = ntup->t_oid = newoid(); + TransactionIdStore(GetCurrentTransactionId(), &(ntup->t_xmin)); + ntup->t_cmin = GetCurrentCommandId(); + StoreInvalidTransactionId(&(ntup->t_xmax)); + ntup->t_cmax = 0; + ntup->t_tmin = INVALID_ABSTIME; + ntup->t_tmax = CURRENT_ABSTIME; + ntup->t_natts = 2; + ntup->t_hoff = hoff; + ntup->t_vtype = 0; + ntup->t_infomask = 0x0; + + /* if a NULL is passed in, avoid the calculations below */ + if (dbuf == NULL) + return ntup; + + /* + * Finally, copy the user's data buffer into the tuple. This violates + * the tuple and class abstractions. + */ + + attptr = ((char *) ntup) + hoff; + *((int32 *) attptr) = obj_desc->offset + nwrite - 1; + attptr += sizeof(int32); + + /* + * * mer fixed disk layout of varlenas to get rid of the need for + * this. * + * + * *((int32 *) attptr) = nwrite + sizeof(int32); * attptr += + * sizeof(int32); + */ + + *((int32 *) attptr) = nwrite + sizeof(int32); + attptr += sizeof(int32); + + /* + * If a data buffer was passed in, then copy the data from the buffer + * to the tuple. Some callers (eg, inv_wrold()) may not pass in a + * buffer, since they have to copy part of the old tuple data and part + * of the user's new data into the new tuple. + */ + + if (dbuf != (char *) NULL) + memmove(attptr, dbuf, nwrite); + + /* keep track of boundary of current tuple */ + obj_desc->lowbyte = obj_desc->offset; + obj_desc->highbyte = obj_desc->offset + nwrite - 1; + + /* new tuple is filled -- return it */ + return (ntup); } static void -inv_indextup(LargeObjectDesc *obj_desc, HeapTuple htup) +inv_indextup(LargeObjectDesc * obj_desc, HeapTuple htup) { - InsertIndexResult res; - Datum v[1]; - char n[1]; + InsertIndexResult res; + Datum v[1]; + char n[1]; - n[0] = ' '; - v[0] = Int32GetDatum(obj_desc->highbyte); - res = index_insert(obj_desc->index_r, &v[0], &n[0], - &(htup->t_ctid), obj_desc->heap_r); + n[0] = ' '; + v[0] = Int32GetDatum(obj_desc->highbyte); + res = index_insert(obj_desc->index_r, &v[0], &n[0], + &(htup->t_ctid), obj_desc->heap_r); - if (res) - pfree(res); + if (res) + pfree(res); } /* static void DumpPage(Page page, int blkno) { - ItemId lp; - HeapTuple tup; - int flags, i, nline; - ItemPointerData pointerData; + ItemId lp; + HeapTuple tup; + int flags, i, nline; + ItemPointerData pointerData; + + printf("\t[subblock=%d]:lower=%d:upper=%d:special=%d\n", 0, + ((PageHeader)page)->pd_lower, ((PageHeader)page)->pd_upper, + ((PageHeader)page)->pd_special); - printf("\t[subblock=%d]:lower=%d:upper=%d:special=%d\n", 0, - ((PageHeader)page)->pd_lower, ((PageHeader)page)->pd_upper, - ((PageHeader)page)->pd_special); + printf("\t:MaxOffsetNumber=%d\n", + (int16) PageGetMaxOffsetNumber(page)); - printf("\t:MaxOffsetNumber=%d\n", - (int16) PageGetMaxOffsetNumber(page)); - - nline = (int16) PageGetMaxOffsetNumber(page); + nline = (int16) PageGetMaxOffsetNumber(page); { - int i; - char *cp; + int i; + char *cp; - i = PageGetSpecialSize(page); - cp = PageGetSpecialPointer(page); + i = PageGetSpecialSize(page); + cp = PageGetSpecialPointer(page); - printf("\t:SpecialData="); + printf("\t:SpecialData="); - while (i > 0) { - printf(" 0x%02x", *cp); - cp += 1; - i -= 1; - } - printf("\n"); + while (i > 0) { + printf(" 0x%02x", *cp); + cp += 1; + i -= 1; + } + printf("\n"); } - for (i = 0; i < nline; i++) { - lp = ((PageHeader)page)->pd_linp + i; - flags = (*lp).lp_flags; - ItemPointerSet(&pointerData, blkno, 1 + i); - printf("%s:off=%d:flags=0x%x:len=%d", - ItemPointerFormExternal(&pointerData), (*lp).lp_off, - flags, (*lp).lp_len); + for (i = 0; i < nline; i++) { + lp = ((PageHeader)page)->pd_linp + i; + flags = (*lp).lp_flags; + ItemPointerSet(&pointerData, blkno, 1 + i); + printf("%s:off=%d:flags=0x%x:len=%d", + ItemPointerFormExternal(&pointerData), (*lp).lp_off, + flags, (*lp).lp_len); - if (flags & LP_USED) { - HeapTupleData htdata; + if (flags & LP_USED) { + HeapTupleData htdata; - printf(":USED"); + printf(":USED"); - memmove((char *) &htdata, - (char *) &((char *)page)[(*lp).lp_off], - sizeof(htdata)); + memmove((char *) &htdata, + (char *) &((char *)page)[(*lp).lp_off], + sizeof(htdata)); - tup = &htdata; + tup = &htdata; - printf("\n\t:ctid=%s:oid=%d", - ItemPointerFormExternal(&tup->t_ctid), - tup->t_oid); - printf(":natts=%d:thoff=%d:vtype=`%c' (0x%02x):", - tup->t_natts, - tup->t_hoff, tup->t_vtype, tup->t_vtype); + printf("\n\t:ctid=%s:oid=%d", + ItemPointerFormExternal(&tup->t_ctid), + tup->t_oid); + printf(":natts=%d:thoff=%d:vtype=`%c' (0x%02x):", + tup->t_natts, + tup->t_hoff, tup->t_vtype, tup->t_vtype); - printf("\n\t:tmin=%d:cmin=%u:", - tup->t_tmin, tup->t_cmin); + printf("\n\t:tmin=%d:cmin=%u:", + tup->t_tmin, tup->t_cmin); - printf("xmin=%u:", tup->t_xmin); + printf("xmin=%u:", tup->t_xmin); - printf("\n\t:tmax=%d:cmax=%u:", - tup->t_tmax, tup->t_cmax); + printf("\n\t:tmax=%d:cmax=%u:", + tup->t_tmax, tup->t_cmax); - printf("xmax=%u:", tup->t_xmax); + printf("xmax=%u:", tup->t_xmax); - printf("\n\t:chain=%s:\n", - ItemPointerFormExternal(&tup->t_chain)); - } else - putchar('\n'); - } + printf("\n\t:chain=%s:\n", + ItemPointerFormExternal(&tup->t_chain)); + } else + putchar('\n'); + } } static char* ItemPointerFormExternal(ItemPointer pointer) { - static char itemPointerString[32]; - - if (!ItemPointerIsValid(pointer)) { - memmove(itemPointerString, "<-,-,->", sizeof "<-,-,->"); - } else { - sprintf(itemPointerString, "<%u,%u>", - ItemPointerGetBlockNumber(pointer), - ItemPointerGetOffsetNumber(pointer)); - } + static char itemPointerString[32]; + + if (!ItemPointerIsValid(pointer)) { + memmove(itemPointerString, "<-,-,->", sizeof "<-,-,->"); + } else { + sprintf(itemPointerString, "<%u,%u>", + ItemPointerGetBlockNumber(pointer), + ItemPointerGetOffsetNumber(pointer)); + } - return (itemPointerString); + return (itemPointerString); } */ static int _inv_getsize(Relation hreln, TupleDesc hdesc, Relation ireln) { - IndexScanDesc iscan; - RetrieveIndexResult res; - Buffer buf; - HeapTuple htup; - Datum d; - long size; - bool isNull; + IndexScanDesc iscan; + RetrieveIndexResult res; + Buffer buf; + HeapTuple htup; + Datum d; + long size; + bool isNull; - /* scan backwards from end */ - iscan = index_beginscan(ireln, (bool) 1, 0, (ScanKey) NULL); + /* scan backwards from end */ + iscan = index_beginscan(ireln, (bool) 1, 0, (ScanKey) NULL); - buf = InvalidBuffer; + buf = InvalidBuffer; - do { - res = index_getnext(iscan, BackwardScanDirection); + do + { + res = index_getnext(iscan, BackwardScanDirection); - /* - * If there are no more index tuples, then the relation is empty, - * so the file's size is zero. - */ + /* + * If there are no more index tuples, then the relation is empty, + * so the file's size is zero. + */ - if (res == (RetrieveIndexResult) NULL) { - index_endscan(iscan); - return (0); - } + if (res == (RetrieveIndexResult) NULL) + { + index_endscan(iscan); + return (0); + } - /* - * For time travel, we need to use the actual time qual here, - * rather that NowTimeQual. We currently have no way to pass - * a time qual in. - */ + /* + * For time travel, we need to use the actual time qual here, + * rather that NowTimeQual. We currently have no way to pass a + * time qual in. + */ - if (buf != InvalidBuffer) - ReleaseBuffer(buf); + if (buf != InvalidBuffer) + ReleaseBuffer(buf); - htup = heap_fetch(hreln, NowTimeQual, &(res->heap_iptr), &buf); + htup = heap_fetch(hreln, NowTimeQual, &(res->heap_iptr), &buf); - } while (!HeapTupleIsValid(htup)); + } while (!HeapTupleIsValid(htup)); - /* don't need the index scan anymore */ - index_endscan(iscan); + /* don't need the index scan anymore */ + index_endscan(iscan); - /* get olastbyte attribute */ - d = (Datum) heap_getattr(htup, buf, 1, hdesc, &isNull); - size = DatumGetInt32(d) + 1; + /* get olastbyte attribute */ + d = (Datum) heap_getattr(htup, buf, 1, hdesc, &isNull); + size = DatumGetInt32(d) + 1; - /* wei hates it if you forget to do this */ - ReleaseBuffer(buf); + /* wei hates it if you forget to do this */ + ReleaseBuffer(buf); - return (size); + return (size); } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 0f34f500596..555303fa14e 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -1,22 +1,22 @@ /*------------------------------------------------------------------------- * * lmgr.c-- - * POSTGRES lock manager code + * POSTGRES lock manager code * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lmgr.c,v 1.5 1997/08/19 21:33:15 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lmgr.c,v 1.6 1997/09/07 04:48:51 momjian Exp $ * *------------------------------------------------------------------------- */ -/* #define LOCKDEBUGALL 1 */ +/* #define LOCKDEBUGALL 1 */ /* #define LOCKDEBUG 1 */ #ifdef LOCKDEBUGALL -#define LOCKDEBUG 1 -#endif /* LOCKDEBUGALL */ +#define LOCKDEBUG 1 +#endif /* LOCKDEBUGALL */ #include <string.h> @@ -46,239 +46,246 @@ #include "nodes/memnodes.h" #include "storage/bufmgr.h" -#include "access/transam.h" /* for AmiTransactionId */ +#include "access/transam.h" /* for AmiTransactionId */ -static void LRelIdAssign(LRelId *lRelId, Oid dbId, Oid relId); +static void LRelIdAssign(LRelId * lRelId, Oid dbId, Oid relId); /* ---------------- - * + * * ---------------- */ -#define MaxRetries 4 /* XXX about 1/4 minute--a hack */ +#define MaxRetries 4 /* XXX about 1/4 minute--a hack */ #define IntentReadRelationLock 0x0100 -#define ReadRelationLock 0x0200 -#define IntentWriteRelationLock 0x0400 -#define WriteRelationLock 0x0800 -#define IntentReadPageLock 0x1000 -#define ReadTupleLock 0x2000 +#define ReadRelationLock 0x0200 +#define IntentWriteRelationLock 0x0400 +#define WriteRelationLock 0x0800 +#define IntentReadPageLock 0x1000 +#define ReadTupleLock 0x2000 -#define TupleLevelLockCountMask 0x000f +#define TupleLevelLockCountMask 0x000f -#define TupleLevelLockLimit 10 +#define TupleLevelLockLimit 10 -extern Oid MyDatabaseId; +extern Oid MyDatabaseId; static LRelId VariableRelationLRelId = { - RelOid_pg_variable, - InvalidOid + RelOid_pg_variable, + InvalidOid }; /* ---------------- - * RelationGetLRelId + * RelationGetLRelId * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_10 \ elog(NOTICE, "RelationGetLRelId(%s) invalid lockInfo", \ - RelationGetRelationName(relation)); + RelationGetRelationName(relation)); #else #define LOCKDEBUG_10 -#endif /* LOCKDEBUG */ - +#endif /* LOCKDEBUG */ + /* * RelationGetLRelId -- - * Returns "lock" relation identifier for a relation. + * Returns "lock" relation identifier for a relation. */ LRelId RelationGetLRelId(Relation relation) { - LockInfo linfo; - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - linfo = (LockInfo) relation->lockInfo; - - /* ---------------- - * initialize lock info if necessary - * ---------------- - */ - if (! LockInfoIsValid(linfo)) { - LOCKDEBUG_10; - RelationInitLockInfo(relation); + LockInfo linfo; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); linfo = (LockInfo) relation->lockInfo; - } - - /* ---------------- - * XXX hack to prevent problems during - * VARIABLE relation initialization - * ---------------- - */ - if (strcmp(RelationGetRelationName(relation)->data, - VariableRelationName) == 0) { - return (VariableRelationLRelId); - } - - return (linfo->lRelId); + + /* ---------------- + * initialize lock info if necessary + * ---------------- + */ + if (!LockInfoIsValid(linfo)) + { + LOCKDEBUG_10; + RelationInitLockInfo(relation); + linfo = (LockInfo) relation->lockInfo; + } + + /* ---------------- + * XXX hack to prevent problems during + * VARIABLE relation initialization + * ---------------- + */ + if (strcmp(RelationGetRelationName(relation)->data, + VariableRelationName) == 0) + { + return (VariableRelationLRelId); + } + + return (linfo->lRelId); } /* * LRelIdGetDatabaseId -- - * Returns database identifier for a "lock" relation identifier. + * Returns database identifier for a "lock" relation identifier. */ /* ---------------- - * LRelIdGetDatabaseId + * LRelIdGetDatabaseId * * Note: The argument may not be correct, if it is not used soon - * after it is created. + * after it is created. * ---------------- */ #ifdef NOT_USED Oid LRelIdGetDatabaseId(LRelId lRelId) { - return (lRelId.dbId); + return (lRelId.dbId); } + #endif /* * LRelIdGetRelationId -- - * Returns relation identifier for a "lock" relation identifier. + * Returns relation identifier for a "lock" relation identifier. */ -Oid +Oid LRelIdGetRelationId(LRelId lRelId) { - return (lRelId.relId); + return (lRelId.relId); } /* * DatabaseIdIsMyDatabaseId -- - * True iff database object identifier is valid in my present database. + * True iff database object identifier is valid in my present database. */ #ifdef NOT_USED bool DatabaseIdIsMyDatabaseId(Oid databaseId) { - return (bool) - (!OidIsValid(databaseId) || databaseId == MyDatabaseId); + return (bool) + (!OidIsValid(databaseId) || databaseId == MyDatabaseId); } + #endif /* * LRelIdContainsMyDatabaseId -- - * True iff "lock" relation identifier is valid in my present database. + * True iff "lock" relation identifier is valid in my present database. */ #ifdef NOT_USED bool LRelIdContainsMyDatabaseId(LRelId lRelId) { - return (bool) - (!OidIsValid(lRelId.dbId) || lRelId.dbId == MyDatabaseId); + return (bool) + (!OidIsValid(lRelId.dbId) || lRelId.dbId == MyDatabaseId); } + #endif /* * RelationInitLockInfo -- - * Initializes the lock information in a relation descriptor. + * Initializes the lock information in a relation descriptor. */ /* ---------------- - * RelationInitLockInfo + * RelationInitLockInfo * - * XXX processingVariable is a hack to prevent problems during - * VARIABLE relation initialization. + * XXX processingVariable is a hack to prevent problems during + * VARIABLE relation initialization. * ---------------- */ void RelationInitLockInfo(Relation relation) { - LockInfo info; - char *relname; - Oid relationid; - bool processingVariable; - extern Oid MyDatabaseId; /* XXX use include */ - extern GlobalMemory CacheCxt; - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - Assert(OidIsValid(RelationGetRelationId(relation))); - - /* ---------------- - * get information from relation descriptor - * ---------------- - */ - info = (LockInfo) relation->lockInfo; - relname = (char *) RelationGetRelationName(relation); - relationid = RelationGetRelationId(relation); - processingVariable = (strcmp(relname, VariableRelationName) == 0); - - /* ---------------- - * create a new lockinfo if not already done - * ---------------- - */ - if (! PointerIsValid(info)) + LockInfo info; + char *relname; + Oid relationid; + bool processingVariable; + extern Oid MyDatabaseId; /* XXX use include */ + extern GlobalMemory CacheCxt; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + Assert(OidIsValid(RelationGetRelationId(relation))); + + /* ---------------- + * get information from relation descriptor + * ---------------- + */ + info = (LockInfo) relation->lockInfo; + relname = (char *) RelationGetRelationName(relation); + relationid = RelationGetRelationId(relation); + processingVariable = (strcmp(relname, VariableRelationName) == 0); + + /* ---------------- + * create a new lockinfo if not already done + * ---------------- + */ + if (!PointerIsValid(info)) { - MemoryContext oldcxt; - - oldcxt = MemoryContextSwitchTo((MemoryContext)CacheCxt); - info = (LockInfo)palloc(sizeof(LockInfoData)); - MemoryContextSwitchTo(oldcxt); + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo((MemoryContext) CacheCxt); + info = (LockInfo) palloc(sizeof(LockInfoData)); + MemoryContextSwitchTo(oldcxt); } - else if (processingVariable) { - if (IsTransactionState()) { - TransactionIdStore(GetCurrentTransactionId(), - &info->transactionIdData); + else if (processingVariable) + { + if (IsTransactionState()) + { + TransactionIdStore(GetCurrentTransactionId(), + &info->transactionIdData); + } + info->flags = 0x0; + return; /* prevent an infinite loop--still true? */ } - info->flags = 0x0; - return; /* prevent an infinite loop--still true? */ - } - else if (info->initialized) + else if (info->initialized) { - /* ------------ - * If we've already initialized we're done. - * ------------ - */ - return; + /* ------------ + * If we've already initialized we're done. + * ------------ + */ + return; } - - /* ---------------- - * initialize lockinfo.dbId and .relId appropriately - * ---------------- - */ - if (IsSharedSystemRelationName(relname)) - LRelIdAssign(&info->lRelId, InvalidOid, relationid); - else - LRelIdAssign(&info->lRelId, MyDatabaseId, relationid); - - /* ---------------- - * store the transaction id in the lockInfo field - * ---------------- - */ - if (processingVariable) - TransactionIdStore(AmiTransactionId, - &info->transactionIdData); - else if (IsTransactionState()) - TransactionIdStore(GetCurrentTransactionId(), - &info->transactionIdData); - else - StoreInvalidTransactionId(&(info->transactionIdData)); - - /* ---------------- - * initialize rest of lockinfo - * ---------------- - */ - info->flags = 0x0; - info->initialized = (bool)true; - relation->lockInfo = (Pointer) info; + + /* ---------------- + * initialize lockinfo.dbId and .relId appropriately + * ---------------- + */ + if (IsSharedSystemRelationName(relname)) + LRelIdAssign(&info->lRelId, InvalidOid, relationid); + else + LRelIdAssign(&info->lRelId, MyDatabaseId, relationid); + + /* ---------------- + * store the transaction id in the lockInfo field + * ---------------- + */ + if (processingVariable) + TransactionIdStore(AmiTransactionId, + &info->transactionIdData); + else if (IsTransactionState()) + TransactionIdStore(GetCurrentTransactionId(), + &info->transactionIdData); + else + StoreInvalidTransactionId(&(info->transactionIdData)); + + /* ---------------- + * initialize rest of lockinfo + * ---------------- + */ + info->flags = 0x0; + info->initialized = (bool) true; + relation->lockInfo = (Pointer) info; } /* ---------------- - * RelationDiscardLockInfo + * RelationDiscardLockInfo * ---------------- */ #ifdef LOCKDEBUG @@ -286,456 +293,464 @@ RelationInitLockInfo(Relation relation) elog(DEBUG, "DiscardLockInfo: NULL relation->lockInfo") #else #define LOCKDEBUG_20 -#endif /* LOCKDEBUG */ - +#endif /* LOCKDEBUG */ + /* * RelationDiscardLockInfo -- - * Discards the lock information in a relation descriptor. + * Discards the lock information in a relation descriptor. */ #ifdef NOT_USED void RelationDiscardLockInfo(Relation relation) { - if (! LockInfoIsValid(relation->lockInfo)) { - LOCKDEBUG_20; - return; - } - - pfree(relation->lockInfo); - relation->lockInfo = NULL; + if (!LockInfoIsValid(relation->lockInfo)) + { + LOCKDEBUG_20; + return; + } + + pfree(relation->lockInfo); + relation->lockInfo = NULL; } + #endif /* * RelationSetLockForDescriptorOpen -- - * Sets read locks for a relation descriptor. + * Sets read locks for a relation descriptor. */ #ifdef LOCKDEBUGALL #define LOCKDEBUGALL_30 \ elog(DEBUG, "RelationSetLockForDescriptorOpen(%s[%d,%d]) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) #else #define LOCKDEBUGALL_30 -#endif /* LOCKDEBUGALL*/ - +#endif /* LOCKDEBUGALL */ + void RelationSetLockForDescriptorOpen(Relation relation) { - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - LOCKDEBUGALL_30; - - /* ---------------- - * read lock catalog tuples which compose the relation descriptor - * XXX race condition? XXX For now, do nothing. - * ---------------- - */ + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + LOCKDEBUGALL_30; + + /* ---------------- + * read lock catalog tuples which compose the relation descriptor + * XXX race condition? XXX For now, do nothing. + * ---------------- + */ } /* ---------------- - * RelationSetLockForRead + * RelationSetLockForRead * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_40 \ elog(DEBUG, "RelationSetLockForRead(%s[%d,%d]) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) #else #define LOCKDEBUG_40 -#endif /* LOCKDEBUG*/ - +#endif /* LOCKDEBUG */ + /* * RelationSetLockForRead -- - * Sets relation level read lock. + * Sets relation level read lock. */ void RelationSetLockForRead(Relation relation) { - LockInfo linfo; - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - LOCKDEBUG_40; - - /* ---------------- - * If we don't have lock info on the reln just go ahead and - * lock it without trying to short circuit the lock manager. - * ---------------- - */ - if (!LockInfoIsValid(relation->lockInfo)) + LockInfo linfo; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + LOCKDEBUG_40; + + /* ---------------- + * If we don't have lock info on the reln just go ahead and + * lock it without trying to short circuit the lock manager. + * ---------------- + */ + if (!LockInfoIsValid(relation->lockInfo)) { - RelationInitLockInfo(relation); - linfo = (LockInfo) relation->lockInfo; - linfo->flags |= ReadRelationLock; - MultiLockReln(linfo, READ_LOCK); - return; + RelationInitLockInfo(relation); + linfo = (LockInfo) relation->lockInfo; + linfo->flags |= ReadRelationLock; + MultiLockReln(linfo, READ_LOCK); + return; } - else - linfo = (LockInfo) relation->lockInfo; - - MultiLockReln(linfo, READ_LOCK); + else + linfo = (LockInfo) relation->lockInfo; + + MultiLockReln(linfo, READ_LOCK); } /* ---------------- - * RelationUnsetLockForRead + * RelationUnsetLockForRead * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_50 \ elog(DEBUG, "RelationUnsetLockForRead(%s[%d,%d]) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) #else #define LOCKDEBUG_50 -#endif /* LOCKDEBUG*/ - +#endif /* LOCKDEBUG */ + /* * RelationUnsetLockForRead -- - * Unsets relation level read lock. + * Unsets relation level read lock. */ void RelationUnsetLockForRead(Relation relation) { - LockInfo linfo; - - /* ---------------- - * sanity check - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - linfo = (LockInfo) relation->lockInfo; - - /* ---------------- - * If we don't have lock info on the reln just go ahead and - * release it. - * ---------------- - */ - if (!LockInfoIsValid(linfo)) + LockInfo linfo; + + /* ---------------- + * sanity check + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + linfo = (LockInfo) relation->lockInfo; + + /* ---------------- + * If we don't have lock info on the reln just go ahead and + * release it. + * ---------------- + */ + if (!LockInfoIsValid(linfo)) { - elog(WARN, - "Releasing a lock on %s with invalid lock information", - RelationGetRelationName(relation)); + elog(WARN, + "Releasing a lock on %s with invalid lock information", + RelationGetRelationName(relation)); } - - MultiReleaseReln(linfo, READ_LOCK); + + MultiReleaseReln(linfo, READ_LOCK); } /* ---------------- - * RelationSetLockForWrite(relation) + * RelationSetLockForWrite(relation) * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_60 \ elog(DEBUG, "RelationSetLockForWrite(%s[%d,%d]) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId) #else #define LOCKDEBUG_60 -#endif /* LOCKDEBUG*/ - +#endif /* LOCKDEBUG */ + /* * RelationSetLockForWrite -- - * Sets relation level write lock. + * Sets relation level write lock. */ void RelationSetLockForWrite(Relation relation) { - LockInfo linfo; - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - LOCKDEBUG_60; - - /* ---------------- - * If we don't have lock info on the reln just go ahead and - * lock it without trying to short circuit the lock manager. - * ---------------- - */ - if (!LockInfoIsValid(relation->lockInfo)) + LockInfo linfo; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + LOCKDEBUG_60; + + /* ---------------- + * If we don't have lock info on the reln just go ahead and + * lock it without trying to short circuit the lock manager. + * ---------------- + */ + if (!LockInfoIsValid(relation->lockInfo)) { - RelationInitLockInfo(relation); - linfo = (LockInfo) relation->lockInfo; - linfo->flags |= WriteRelationLock; - MultiLockReln(linfo, WRITE_LOCK); - return; + RelationInitLockInfo(relation); + linfo = (LockInfo) relation->lockInfo; + linfo->flags |= WriteRelationLock; + MultiLockReln(linfo, WRITE_LOCK); + return; } - else - linfo = (LockInfo) relation->lockInfo; - - MultiLockReln(linfo, WRITE_LOCK); + else + linfo = (LockInfo) relation->lockInfo; + + MultiLockReln(linfo, WRITE_LOCK); } /* ---------------- - * RelationUnsetLockForWrite + * RelationUnsetLockForWrite * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_70 \ elog(DEBUG, "RelationUnsetLockForWrite(%s[%d,%d]) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId); + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId); #else #define LOCKDEBUG_70 -#endif /* LOCKDEBUG */ - +#endif /* LOCKDEBUG */ + /* * RelationUnsetLockForWrite -- - * Unsets relation level write lock. + * Unsets relation level write lock. */ void RelationUnsetLockForWrite(Relation relation) { - LockInfo linfo; - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) { - return; - } - - linfo = (LockInfo) relation->lockInfo; - - if (!LockInfoIsValid(linfo)) + LockInfo linfo; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + { + return; + } + + linfo = (LockInfo) relation->lockInfo; + + if (!LockInfoIsValid(linfo)) { - elog(WARN, - "Releasing a lock on %s with invalid lock information", - RelationGetRelationName(relation)); + elog(WARN, + "Releasing a lock on %s with invalid lock information", + RelationGetRelationName(relation)); } - - MultiReleaseReln(linfo, WRITE_LOCK); + + MultiReleaseReln(linfo, WRITE_LOCK); } /* ---------------- - * RelationSetLockForTupleRead + * RelationSetLockForTupleRead * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_80 \ elog(DEBUG, "RelationSetLockForTupleRead(%s[%d,%d], 0x%x) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, \ - itemPointer) + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, \ + itemPointer) #define LOCKDEBUG_81 \ - elog(DEBUG, "RelationSetLockForTupleRead() escalating"); + elog(DEBUG, "RelationSetLockForTupleRead() escalating"); #else #define LOCKDEBUG_80 #define LOCKDEBUG_81 -#endif /* LOCKDEBUG */ - +#endif /* LOCKDEBUG */ + /* * RelationSetLockForTupleRead -- - * Sets tuple level read lock. + * Sets tuple level read lock. */ #ifdef NOT_USED void RelationSetLockForTupleRead(Relation relation, ItemPointer itemPointer) { - LockInfo linfo; - TransactionId curXact; - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - LOCKDEBUG_80; - - /* --------------------- - * If our lock info is invalid don't bother trying to short circuit - * the lock manager. - * --------------------- - */ - if (!LockInfoIsValid(relation->lockInfo)) + LockInfo linfo; + TransactionId curXact; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + LOCKDEBUG_80; + + /* --------------------- + * If our lock info is invalid don't bother trying to short circuit + * the lock manager. + * --------------------- + */ + if (!LockInfoIsValid(relation->lockInfo)) { - RelationInitLockInfo(relation); - linfo = (LockInfo) relation->lockInfo; - linfo->flags |= - IntentReadRelationLock | - IntentReadPageLock | + RelationInitLockInfo(relation); + linfo = (LockInfo) relation->lockInfo; + linfo->flags |= + IntentReadRelationLock | + IntentReadPageLock | ReadTupleLock; - MultiLockTuple(linfo, itemPointer, READ_LOCK); - return; + MultiLockTuple(linfo, itemPointer, READ_LOCK); + return; } - else - linfo = (LockInfo) relation->lockInfo; - - /* ---------------- - * no need to set a lower granularity lock - * ---------------- - */ - curXact = GetCurrentTransactionId(); - if ((linfo->flags & ReadRelationLock) && - TransactionIdEquals(curXact, linfo->transactionIdData)) + else + linfo = (LockInfo) relation->lockInfo; + + /* ---------------- + * no need to set a lower granularity lock + * ---------------- + */ + curXact = GetCurrentTransactionId(); + if ((linfo->flags & ReadRelationLock) && + TransactionIdEquals(curXact, linfo->transactionIdData)) { - return; + return; } - - /* ---------------- - * If we don't already have a tuple lock this transaction - * ---------------- - */ - if (!( (linfo->flags & ReadTupleLock) && - TransactionIdEquals(curXact, linfo->transactionIdData) )) { - - linfo->flags |= - IntentReadRelationLock | - IntentReadPageLock | - ReadTupleLock; - - /* clear count */ - linfo->flags &= ~TupleLevelLockCountMask; - - } else { - if (TupleLevelLockLimit == (TupleLevelLockCountMask & - linfo->flags)) { - LOCKDEBUG_81; - - /* escalate */ - MultiLockReln(linfo, READ_LOCK); - - /* clear count */ - linfo->flags &= ~TupleLevelLockCountMask; - return; + + /* ---------------- + * If we don't already have a tuple lock this transaction + * ---------------- + */ + if (!((linfo->flags & ReadTupleLock) && + TransactionIdEquals(curXact, linfo->transactionIdData))) + { + + linfo->flags |= + IntentReadRelationLock | + IntentReadPageLock | + ReadTupleLock; + + /* clear count */ + linfo->flags &= ~TupleLevelLockCountMask; + + } + else + { + if (TupleLevelLockLimit == (TupleLevelLockCountMask & + linfo->flags)) + { + LOCKDEBUG_81; + + /* escalate */ + MultiLockReln(linfo, READ_LOCK); + + /* clear count */ + linfo->flags &= ~TupleLevelLockCountMask; + return; + } + + /* increment count */ + linfo->flags = + (linfo->flags & ~TupleLevelLockCountMask) | + (1 + (TupleLevelLockCountMask & linfo->flags)); } - - /* increment count */ - linfo->flags = - (linfo->flags & ~TupleLevelLockCountMask) | - (1 + (TupleLevelLockCountMask & linfo->flags)); - } - - TransactionIdStore(curXact, &linfo->transactionIdData); - - /* ---------------- - * Lock the tuple. - * ---------------- - */ - MultiLockTuple(linfo, itemPointer, READ_LOCK); + + TransactionIdStore(curXact, &linfo->transactionIdData); + + /* ---------------- + * Lock the tuple. + * ---------------- + */ + MultiLockTuple(linfo, itemPointer, READ_LOCK); } + #endif /* ---------------- - * RelationSetLockForReadPage + * RelationSetLockForReadPage * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_90 \ elog(DEBUG, "RelationSetLockForReadPage(%s[%d,%d], @%d) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page); + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page); #else #define LOCKDEBUG_90 -#endif /* LOCKDEBUG*/ - +#endif /* LOCKDEBUG */ + /* ---------------- - * RelationSetLockForWritePage + * RelationSetLockForWritePage * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_100 \ elog(DEBUG, "RelationSetLockForWritePage(%s[%d,%d], @%d) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page); + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page); #else #define LOCKDEBUG_100 -#endif /* LOCKDEBUG */ - +#endif /* LOCKDEBUG */ + /* * RelationSetLockForWritePage -- - * Sets write lock on a page. + * Sets write lock on a page. */ -void +void RelationSetLockForWritePage(Relation relation, - ItemPointer itemPointer) + ItemPointer itemPointer) { - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - /* --------------- - * Make sure linfo is initialized - * --------------- - */ - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - /* ---------------- - * attempt to set lock - * ---------------- - */ - MultiLockPage((LockInfo) relation->lockInfo, itemPointer, WRITE_LOCK); + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + /* --------------- + * Make sure linfo is initialized + * --------------- + */ + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + /* ---------------- + * attempt to set lock + * ---------------- + */ + MultiLockPage((LockInfo) relation->lockInfo, itemPointer, WRITE_LOCK); } /* ---------------- - * RelationUnsetLockForReadPage + * RelationUnsetLockForReadPage * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_110 \ elog(DEBUG, "RelationUnsetLockForReadPage(%s[%d,%d], @%d) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page) + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page) #else #define LOCKDEBUG_110 -#endif /* LOCKDEBUG */ - +#endif /* LOCKDEBUG */ + /* ---------------- - * RelationUnsetLockForWritePage + * RelationUnsetLockForWritePage * ---------------- */ #ifdef LOCKDEBUG #define LOCKDEBUG_120 \ elog(DEBUG, "RelationUnsetLockForWritePage(%s[%d,%d], @%d) called", \ - RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page) + RelationGetRelationName(relation), lRelId.dbId, lRelId.relId, page) #else #define LOCKDEBUG_120 -#endif /* LOCKDEBUG */ - +#endif /* LOCKDEBUG */ + /* - * Set a single level write page lock. Assumes that you already + * Set a single level write page lock. Assumes that you already * have a write intent lock on the relation. */ void RelationSetSingleWLockPage(Relation relation, - ItemPointer itemPointer) + ItemPointer itemPointer) { - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - SingleLockPage((LockInfo)relation->lockInfo, itemPointer, WRITE_LOCK, !UNLOCK); + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + SingleLockPage((LockInfo) relation->lockInfo, itemPointer, WRITE_LOCK, !UNLOCK); } /* @@ -743,23 +758,23 @@ RelationSetSingleWLockPage(Relation relation, */ void RelationUnsetSingleWLockPage(Relation relation, - ItemPointer itemPointer) + ItemPointer itemPointer) { - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - elog(WARN, - "Releasing a lock on %s with invalid lock information", - RelationGetRelationName(relation)); - - SingleLockPage((LockInfo)relation->lockInfo, itemPointer, WRITE_LOCK, UNLOCK); + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + elog(WARN, + "Releasing a lock on %s with invalid lock information", + RelationGetRelationName(relation)); + + SingleLockPage((LockInfo) relation->lockInfo, itemPointer, WRITE_LOCK, UNLOCK); } /* @@ -768,45 +783,45 @@ RelationUnsetSingleWLockPage(Relation relation, */ void RelationSetSingleRLockPage(Relation relation, - ItemPointer itemPointer) + ItemPointer itemPointer) { - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - SingleLockPage((LockInfo)relation->lockInfo, itemPointer, READ_LOCK, !UNLOCK); + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + SingleLockPage((LockInfo) relation->lockInfo, itemPointer, READ_LOCK, !UNLOCK); } -/* +/* * Unset a single level read page lock. */ void RelationUnsetSingleRLockPage(Relation relation, - ItemPointer itemPointer) + ItemPointer itemPointer) { - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - elog(WARN, - "Releasing a lock on %s with invalid lock information", - RelationGetRelationName(relation)); - - SingleLockPage((LockInfo)relation->lockInfo, itemPointer, READ_LOCK, UNLOCK); + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + elog(WARN, + "Releasing a lock on %s with invalid lock information", + RelationGetRelationName(relation)); + + SingleLockPage((LockInfo) relation->lockInfo, itemPointer, READ_LOCK, UNLOCK); } /* @@ -821,18 +836,18 @@ RelationUnsetSingleRLockPage(Relation relation, void RelationSetRIntentLock(Relation relation) { - /* ----------------- - * Sanity check - * ----------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - SingleLockReln((LockInfo)relation->lockInfo, READ_LOCK+INTENT, !UNLOCK); + /* ----------------- + * Sanity check + * ----------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + SingleLockReln((LockInfo) relation->lockInfo, READ_LOCK + INTENT, !UNLOCK); } /* @@ -841,18 +856,18 @@ RelationSetRIntentLock(Relation relation) void RelationUnsetRIntentLock(Relation relation) { - /* ----------------- - * Sanity check - * ----------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - SingleLockReln((LockInfo)relation->lockInfo, READ_LOCK+INTENT, UNLOCK); + /* ----------------- + * Sanity check + * ----------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + SingleLockReln((LockInfo) relation->lockInfo, READ_LOCK + INTENT, UNLOCK); } /* @@ -862,18 +877,18 @@ RelationUnsetRIntentLock(Relation relation) void RelationSetWIntentLock(Relation relation) { - /* ----------------- - * Sanity check - * ----------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - SingleLockReln((LockInfo)relation->lockInfo, WRITE_LOCK+INTENT, !UNLOCK); + /* ----------------- + * Sanity check + * ----------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + SingleLockReln((LockInfo) relation->lockInfo, WRITE_LOCK + INTENT, !UNLOCK); } /* @@ -882,69 +897,71 @@ RelationSetWIntentLock(Relation relation) void RelationUnsetWIntentLock(Relation relation) { - /* ----------------- - * Sanity check - * ----------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - SingleLockReln((LockInfo)relation->lockInfo, WRITE_LOCK+INTENT, UNLOCK); + /* ----------------- + * Sanity check + * ----------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + SingleLockReln((LockInfo) relation->lockInfo, WRITE_LOCK + INTENT, UNLOCK); } /* * Extend locks are used primarily in tertiary storage devices such as - * a WORM disk jukebox. Sometimes need exclusive access to extend a + * a WORM disk jukebox. Sometimes need exclusive access to extend a * file by a block. */ #ifdef NOT_USED void RelationSetLockForExtend(Relation relation) { - /* ----------------- - * Sanity check - * ----------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - MultiLockReln((LockInfo) relation->lockInfo, EXTEND_LOCK); + /* ----------------- + * Sanity check + * ----------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + MultiLockReln((LockInfo) relation->lockInfo, EXTEND_LOCK); } + #endif #ifdef NOT_USED void RelationUnsetLockForExtend(Relation relation) { - /* ----------------- - * Sanity check - * ----------------- - */ - Assert(RelationIsValid(relation)); - if (LockingDisabled()) - return; - - if (!LockInfoIsValid(relation->lockInfo)) - RelationInitLockInfo(relation); - - MultiReleaseReln((LockInfo) relation->lockInfo, EXTEND_LOCK); + /* ----------------- + * Sanity check + * ----------------- + */ + Assert(RelationIsValid(relation)); + if (LockingDisabled()) + return; + + if (!LockInfoIsValid(relation->lockInfo)) + RelationInitLockInfo(relation); + + MultiReleaseReln((LockInfo) relation->lockInfo, EXTEND_LOCK); } + #endif -/* +/* * Create an LRelid --- Why not just pass in a pointer to the storage? */ static void -LRelIdAssign(LRelId *lRelId, Oid dbId, Oid relId) -{ - lRelId->dbId = dbId; - lRelId->relId = relId; +LRelIdAssign(LRelId * lRelId, Oid dbId, Oid relId) +{ + lRelId->dbId = dbId; + lRelId->relId = relId; } diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 15ede2e0ed9..7e592945f1b 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -1,37 +1,37 @@ /*------------------------------------------------------------------------- * * lock.c-- - * simple lock acquisition + * simple lock acquisition * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.11 1997/08/19 21:33:19 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.12 1997/09/07 04:48:58 momjian Exp $ * * NOTES - * Outside modules can create a lock table and acquire/release - * locks. A lock table is a shared memory hash table. When - * a process tries to acquire a lock of a type that conflicts - * with existing locks, it is put to sleep using the routines - * in storage/lmgr/proc.c. + * Outside modules can create a lock table and acquire/release + * locks. A lock table is a shared memory hash table. When + * a process tries to acquire a lock of a type that conflicts + * with existing locks, it is put to sleep using the routines + * in storage/lmgr/proc.c. * - * Interface: + * Interface: * - * LockAcquire(), LockRelease(), LockTabInit(). + * LockAcquire(), LockRelease(), LockTabInit(). * - * LockReplace() is called only within this module and by the - * lkchain module. It releases a lock without looking - * the lock up in the lock table. + * LockReplace() is called only within this module and by the + * lkchain module. It releases a lock without looking + * the lock up in the lock table. * - * NOTE: This module is used to define new lock tables. The - * multi-level lock table (multi.c) used by the heap - * access methods calls these routines. See multi.c for - * examples showing how to use this interface. + * NOTE: This module is used to define new lock tables. The + * multi-level lock table (multi.c) used by the heap + * access methods calls these routines. See multi.c for + * examples showing how to use this interface. * *------------------------------------------------------------------------- */ -#include <stdio.h> /* for sprintf() */ +#include <stdio.h> /* for sprintf() */ #include <string.h> #include <sys/types.h> #include <unistd.h> @@ -48,8 +48,9 @@ #include "access/xact.h" #include "access/transam.h" -static int WaitOnLock(LOCKTAB *ltable, LockTableId tableId, LOCK *lock, - LOCKT lockt); +static int +WaitOnLock(LOCKTAB * ltable, LockTableId tableId, LOCK * lock, + LOCKT lockt); /*#define LOCK_MGR_DEBUG*/ @@ -60,84 +61,85 @@ static int WaitOnLock(LOCKTAB *ltable, LockTableId tableId, LOCK *lock, #define LOCK_DUMP_AUX(where,lock,type) #define XID_PRINT(where,xidentP) -#else /* LOCK_MGR_DEBUG */ - -int lockDebug = 0; -unsigned int lock_debug_oid_min = BootstrapObjectIdData; -static char *lock_types[] = { - "NONE", - "WRITE", - "READ", - "WRITE INTENT", - "READ INTENT", - "EXTEND" +#else /* LOCK_MGR_DEBUG */ + +int lockDebug = 0; +unsigned int lock_debug_oid_min = BootstrapObjectIdData; +static char *lock_types[] = { + "NONE", + "WRITE", + "READ", + "WRITE INTENT", + "READ INTENT", + "EXTEND" }; #define LOCK_PRINT(where,tag,type)\ - if ((lockDebug >= 1) && (tag->relId >= lock_debug_oid_min)) \ - elog(DEBUG, \ - "%s: pid (%d) rel (%d) dbid (%d) tid (%d,%d) type (%s)",where, \ - getpid(),\ - tag->relId, tag->dbId, \ - ((tag->tupleId.ip_blkid.bi_hi<<16)+\ - tag->tupleId.ip_blkid.bi_lo),\ - tag->tupleId.ip_posid, \ - lock_types[type]) + if ((lockDebug >= 1) && (tag->relId >= lock_debug_oid_min)) \ + elog(DEBUG, \ + "%s: pid (%d) rel (%d) dbid (%d) tid (%d,%d) type (%s)",where, \ + getpid(),\ + tag->relId, tag->dbId, \ + ((tag->tupleId.ip_blkid.bi_hi<<16)+\ + tag->tupleId.ip_blkid.bi_lo),\ + tag->tupleId.ip_posid, \ + lock_types[type]) #define LOCK_DUMP(where,lock,type)\ - if ((lockDebug >= 1) && (lock->tag.relId >= lock_debug_oid_min)) \ - LOCK_DUMP_AUX(where,lock,type) + if ((lockDebug >= 1) && (lock->tag.relId >= lock_debug_oid_min)) \ + LOCK_DUMP_AUX(where,lock,type) #define LOCK_DUMP_AUX(where,lock,type)\ - elog(DEBUG, \ - "%s: pid (%d) rel (%d) dbid (%d) tid (%d,%d) nHolding (%d) "\ - "holders (%d,%d,%d,%d,%d) type (%s)",where, \ - getpid(),\ - lock->tag.relId, lock->tag.dbId, \ - ((lock->tag.tupleId.ip_blkid.bi_hi<<16)+\ - lock->tag.tupleId.ip_blkid.bi_lo),\ - lock->tag.tupleId.ip_posid, \ - lock->nHolding,\ - lock->holders[1],\ - lock->holders[2],\ - lock->holders[3],\ - lock->holders[4],\ - lock->holders[5],\ - lock_types[type]) + elog(DEBUG, \ + "%s: pid (%d) rel (%d) dbid (%d) tid (%d,%d) nHolding (%d) "\ + "holders (%d,%d,%d,%d,%d) type (%s)",where, \ + getpid(),\ + lock->tag.relId, lock->tag.dbId, \ + ((lock->tag.tupleId.ip_blkid.bi_hi<<16)+\ + lock->tag.tupleId.ip_blkid.bi_lo),\ + lock->tag.tupleId.ip_posid, \ + lock->nHolding,\ + lock->holders[1],\ + lock->holders[2],\ + lock->holders[3],\ + lock->holders[4],\ + lock->holders[5],\ + lock_types[type]) #define XID_PRINT(where,xidentP)\ - if ((lockDebug >= 2) && \ - (((LOCK *)MAKE_PTR(xidentP->tag.lock))->tag.relId \ - >= lock_debug_oid_min)) \ - elog(DEBUG,\ - "%s: pid (%d) xid (%d) pid (%d) lock (%x) nHolding (%d) "\ - "holders (%d,%d,%d,%d,%d)",\ - where,\ - getpid(),\ - xidentP->tag.xid,\ - xidentP->tag.pid,\ - xidentP->tag.lock,\ - xidentP->nHolding,\ - xidentP->holders[1],\ - xidentP->holders[2],\ - xidentP->holders[3],\ - xidentP->holders[4],\ - xidentP->holders[5]) - -#endif /* LOCK_MGR_DEBUG */ - -SPINLOCK LockMgrLock; /* in Shmem or created in CreateSpinlocks() */ + if ((lockDebug >= 2) && \ + (((LOCK *)MAKE_PTR(xidentP->tag.lock))->tag.relId \ + >= lock_debug_oid_min)) \ + elog(DEBUG,\ + "%s: pid (%d) xid (%d) pid (%d) lock (%x) nHolding (%d) "\ + "holders (%d,%d,%d,%d,%d)",\ + where,\ + getpid(),\ + xidentP->tag.xid,\ + xidentP->tag.pid,\ + xidentP->tag.lock,\ + xidentP->nHolding,\ + xidentP->holders[1],\ + xidentP->holders[2],\ + xidentP->holders[3],\ + xidentP->holders[4],\ + xidentP->holders[5]) + +#endif /* LOCK_MGR_DEBUG */ + +SPINLOCK LockMgrLock; /* in Shmem or created in + * CreateSpinlocks() */ /* This is to simplify/speed up some bit arithmetic */ -static MASK BITS_OFF[MAX_LOCKTYPES]; -static MASK BITS_ON[MAX_LOCKTYPES]; +static MASK BITS_OFF[MAX_LOCKTYPES]; +static MASK BITS_ON[MAX_LOCKTYPES]; /* ----------------- * XXX Want to move this to this file * ----------------- */ -static bool LockingIsDisabled; +static bool LockingIsDisabled; /* ------------------- * map from tableId to the lock table structure @@ -149,28 +151,28 @@ static LOCKTAB *AllTables[MAX_TABLES]; * no zero-th table * ------------------- */ -static int NumTables = 1; +static int NumTables = 1; /* ------------------- * InitLocks -- Init the lock module. Create a private data - * structure for constructing conflict masks. + * structure for constructing conflict masks. * ------------------- */ void InitLocks() { - int i; - int bit; - - bit = 1; - /* ------------------- - * remember 0th locktype is invalid - * ------------------- - */ - for (i=0;i<MAX_LOCKTYPES;i++,bit <<= 1) + int i; + int bit; + + bit = 1; + /* ------------------- + * remember 0th locktype is invalid + * ------------------- + */ + for (i = 0; i < MAX_LOCKTYPES; i++, bit <<= 1) { - BITS_ON[i] = bit; - BITS_OFF[i] = ~bit; + BITS_ON[i] = bit; + BITS_OFF[i] = ~bit; } } @@ -181,30 +183,30 @@ InitLocks() void LockDisable(int status) { - LockingIsDisabled = status; + LockingIsDisabled = status; } /* * LockTypeInit -- initialize the lock table's lock type - * structures + * structures * * Notes: just copying. Should only be called once. */ static void -LockTypeInit(LOCKTAB *ltable, - MASK *conflictsP, - int *prioP, - int ntypes) +LockTypeInit(LOCKTAB * ltable, + MASK * conflictsP, + int *prioP, + int ntypes) { - int i; - - ltable->ctl->nLockTypes = ntypes; - ntypes++; - for (i=0;i<ntypes;i++,prioP++,conflictsP++) + int i; + + ltable->ctl->nLockTypes = ntypes; + ntypes++; + for (i = 0; i < ntypes; i++, prioP++, conflictsP++) { - ltable->ctl->conflictTab[i] = *conflictsP; - ltable->ctl->prio[i] = *prioP; + ltable->ctl->conflictTab[i] = *conflictsP; + ltable->ctl->prio[i] = *prioP; } } @@ -212,873 +214,900 @@ LockTypeInit(LOCKTAB *ltable, * LockTabInit -- initialize a lock table structure * * Notes: - * (a) a lock table has four separate entries in the binding - * table. This is because every shared hash table and spinlock - * has its name stored in the binding table at its creation. It - * is wasteful, in this case, but not much space is involved. + * (a) a lock table has four separate entries in the binding + * table. This is because every shared hash table and spinlock + * has its name stored in the binding table at its creation. It + * is wasteful, in this case, but not much space is involved. * */ LockTableId LockTabInit(char *tabName, - MASK *conflictsP, - int *prioP, - int ntypes) + MASK * conflictsP, + int *prioP, + int ntypes) { - LOCKTAB *ltable; - char *shmemName; - HASHCTL info; - int hash_flags; - bool found; - int status = TRUE; - - if (ntypes > MAX_LOCKTYPES) + LOCKTAB *ltable; + char *shmemName; + HASHCTL info; + int hash_flags; + bool found; + int status = TRUE; + + if (ntypes > MAX_LOCKTYPES) { - elog(NOTICE,"LockTabInit: too many lock types %d greater than %d", - ntypes,MAX_LOCKTYPES); - return(INVALID_TABLEID); + elog(NOTICE, "LockTabInit: too many lock types %d greater than %d", + ntypes, MAX_LOCKTYPES); + return (INVALID_TABLEID); } - - if (NumTables > MAX_TABLES) + + if (NumTables > MAX_TABLES) { - elog(NOTICE, - "LockTabInit: system limit of MAX_TABLES (%d) lock tables", - MAX_TABLES); - return(INVALID_TABLEID); + elog(NOTICE, + "LockTabInit: system limit of MAX_TABLES (%d) lock tables", + MAX_TABLES); + return (INVALID_TABLEID); } - - /* allocate a string for the binding table lookup */ - shmemName = (char *) palloc((unsigned)(strlen(tabName)+32)); - if (! shmemName) + + /* allocate a string for the binding table lookup */ + shmemName = (char *) palloc((unsigned) (strlen(tabName) + 32)); + if (!shmemName) { - elog(NOTICE,"LockTabInit: couldn't malloc string %s \n",tabName); - return(INVALID_TABLEID); + elog(NOTICE, "LockTabInit: couldn't malloc string %s \n", tabName); + return (INVALID_TABLEID); } - - /* each lock table has a non-shared header */ - ltable = (LOCKTAB *) palloc((unsigned) sizeof(LOCKTAB)); - if (! ltable) + + /* each lock table has a non-shared header */ + ltable = (LOCKTAB *) palloc((unsigned) sizeof(LOCKTAB)); + if (!ltable) { - elog(NOTICE,"LockTabInit: couldn't malloc lock table %s\n",tabName); - pfree (shmemName); - return(INVALID_TABLEID); + elog(NOTICE, "LockTabInit: couldn't malloc lock table %s\n", tabName); + pfree(shmemName); + return (INVALID_TABLEID); } - - /* ------------------------ - * find/acquire the spinlock for the table - * ------------------------ - */ - SpinAcquire(LockMgrLock); - - - /* ----------------------- - * allocate a control structure from shared memory or attach to it - * if it already exists. - * ----------------------- - */ - sprintf(shmemName,"%s (ctl)",tabName); - ltable->ctl = (LOCKCTL *) - ShmemInitStruct(shmemName,(unsigned)sizeof(LOCKCTL),&found); - - if (! ltable->ctl) + + /* ------------------------ + * find/acquire the spinlock for the table + * ------------------------ + */ + SpinAcquire(LockMgrLock); + + + /* ----------------------- + * allocate a control structure from shared memory or attach to it + * if it already exists. + * ----------------------- + */ + sprintf(shmemName, "%s (ctl)", tabName); + ltable->ctl = (LOCKCTL *) + ShmemInitStruct(shmemName, (unsigned) sizeof(LOCKCTL), &found); + + if (!ltable->ctl) { - elog(FATAL,"LockTabInit: couldn't initialize %s",tabName); - status = FALSE; + elog(FATAL, "LockTabInit: couldn't initialize %s", tabName); + status = FALSE; } - - /* ---------------- - * we're first - initialize - * ---------------- - */ - if (! found) + + /* ---------------- + * we're first - initialize + * ---------------- + */ + if (!found) { - memset(ltable->ctl, 0, sizeof(LOCKCTL)); - ltable->ctl->masterLock = LockMgrLock; - ltable->ctl->tableId = NumTables; + memset(ltable->ctl, 0, sizeof(LOCKCTL)); + ltable->ctl->masterLock = LockMgrLock; + ltable->ctl->tableId = NumTables; } - - /* -------------------- - * other modules refer to the lock table by a tableId - * -------------------- - */ - AllTables[NumTables] = ltable; - NumTables++; - Assert(NumTables <= MAX_TABLES); - - /* ---------------------- - * allocate a hash table for the lock tags. This is used - * to find the different locks. - * ---------------------- - */ - info.keysize = sizeof(LOCKTAG); - info.datasize = sizeof(LOCK); - info.hash = tag_hash; - hash_flags = (HASH_ELEM | HASH_FUNCTION); - - sprintf(shmemName,"%s (lock hash)",tabName); - ltable->lockHash = (HTAB *) ShmemInitHash(shmemName, - INIT_TABLE_SIZE,MAX_TABLE_SIZE, - &info,hash_flags); - - Assert( ltable->lockHash->hash == tag_hash); - if (! ltable->lockHash) + + /* -------------------- + * other modules refer to the lock table by a tableId + * -------------------- + */ + AllTables[NumTables] = ltable; + NumTables++; + Assert(NumTables <= MAX_TABLES); + + /* ---------------------- + * allocate a hash table for the lock tags. This is used + * to find the different locks. + * ---------------------- + */ + info.keysize = sizeof(LOCKTAG); + info.datasize = sizeof(LOCK); + info.hash = tag_hash; + hash_flags = (HASH_ELEM | HASH_FUNCTION); + + sprintf(shmemName, "%s (lock hash)", tabName); + ltable->lockHash = (HTAB *) ShmemInitHash(shmemName, + INIT_TABLE_SIZE, MAX_TABLE_SIZE, + &info, hash_flags); + + Assert(ltable->lockHash->hash == tag_hash); + if (!ltable->lockHash) { - elog(FATAL,"LockTabInit: couldn't initialize %s",tabName); - status = FALSE; + elog(FATAL, "LockTabInit: couldn't initialize %s", tabName); + status = FALSE; } - - /* ------------------------- - * allocate an xid table. When different transactions hold - * the same lock, additional information must be saved (locks per tx). - * ------------------------- - */ - info.keysize = XID_TAGSIZE; - info.datasize = sizeof(XIDLookupEnt); - info.hash = tag_hash; - hash_flags = (HASH_ELEM | HASH_FUNCTION); - - sprintf(shmemName,"%s (xid hash)",tabName); - ltable->xidHash = (HTAB *) ShmemInitHash(shmemName, - INIT_TABLE_SIZE,MAX_TABLE_SIZE, - &info,hash_flags); - - if (! ltable->xidHash) + + /* ------------------------- + * allocate an xid table. When different transactions hold + * the same lock, additional information must be saved (locks per tx). + * ------------------------- + */ + info.keysize = XID_TAGSIZE; + info.datasize = sizeof(XIDLookupEnt); + info.hash = tag_hash; + hash_flags = (HASH_ELEM | HASH_FUNCTION); + + sprintf(shmemName, "%s (xid hash)", tabName); + ltable->xidHash = (HTAB *) ShmemInitHash(shmemName, + INIT_TABLE_SIZE, MAX_TABLE_SIZE, + &info, hash_flags); + + if (!ltable->xidHash) { - elog(FATAL,"LockTabInit: couldn't initialize %s",tabName); - status = FALSE; + elog(FATAL, "LockTabInit: couldn't initialize %s", tabName); + status = FALSE; } - - /* init ctl data structures */ - LockTypeInit(ltable, conflictsP, prioP, ntypes); - - SpinRelease(LockMgrLock); - - pfree (shmemName); - - if (status) - return(ltable->ctl->tableId); - else - return(INVALID_TABLEID); + + /* init ctl data structures */ + LockTypeInit(ltable, conflictsP, prioP, ntypes); + + SpinRelease(LockMgrLock); + + pfree(shmemName); + + if (status) + return (ltable->ctl->tableId); + else + return (INVALID_TABLEID); } /* * LockTabRename -- allocate another tableId to the same - * lock table. + * lock table. * * NOTES: Both the lock module and the lock chain (lchain.c) - * module use table id's to distinguish between different - * kinds of locks. Short term and long term locks look - * the same to the lock table, but are handled differently - * by the lock chain manager. This function allows the - * client to use different tableIds when acquiring/releasing - * short term and long term locks. + * module use table id's to distinguish between different + * kinds of locks. Short term and long term locks look + * the same to the lock table, but are handled differently + * by the lock chain manager. This function allows the + * client to use different tableIds when acquiring/releasing + * short term and long term locks. */ #ifdef NOT_USED LockTableId LockTabRename(LockTableId tableId) { - LockTableId newTableId; - - if (NumTables >= MAX_TABLES) + LockTableId newTableId; + + if (NumTables >= MAX_TABLES) { - return(INVALID_TABLEID); + return (INVALID_TABLEID); } - if (AllTables[tableId] == INVALID_TABLEID) + if (AllTables[tableId] == INVALID_TABLEID) { - return(INVALID_TABLEID); + return (INVALID_TABLEID); } - - /* other modules refer to the lock table by a tableId */ - newTableId = NumTables; - NumTables++; - - AllTables[newTableId] = AllTables[tableId]; - return(newTableId); + + /* other modules refer to the lock table by a tableId */ + newTableId = NumTables; + NumTables++; + + AllTables[newTableId] = AllTables[tableId]; + return (newTableId); } + #endif /* * LockAcquire -- Check for lock conflicts, sleep if conflict found, - * set lock if/when no conflicts. + * set lock if/when no conflicts. * * Returns: TRUE if parameters are correct, FALSE otherwise. * * Side Effects: The lock is always acquired. No way to abort - * a lock acquisition other than aborting the transaction. - * Lock is recorded in the lkchain. + * a lock acquisition other than aborting the transaction. + * Lock is recorded in the lkchain. #ifdef USER_LOCKS - * Note on User Locks: - * User locks are handled totally on the application side as - * long term cooperative locks which extend beyond the normal - * transaction boundaries. Their purpose is to indicate to an - * application that someone is `working' on an item. So it is - * possible to put an user lock on a tuple's oid, retrieve the - * tuple, work on it for an hour and then update it and remove - * the lock. While the lock is active other clients can still - * read and write the tuple but they can be aware that it has - * been locked at the application level by someone. - * User locks use lock tags made of an uint16 and an uint32, for - * example 0 and a tuple oid, or any other arbitrary pair of - * numbers following a convention established by the application. - * In this sense tags don't refer to tuples or database entities. - * User locks and normal locks are completely orthogonal and - * they don't interfere with each other, so it is possible - * to acquire a normal lock on an user-locked tuple or user-lock - * a tuple for which a normal write lock already exists. - * User locks are always non blocking, therefore they are never - * acquired if already held by another process. They must be - * released explicitly by the application but they are released - * automatically when a backend terminates. - * They are indicated by a dummy tableId 0 which doesn't have - * any table allocated but uses the normal lock table, and are - * distinguished from normal locks for the following differences: + * Note on User Locks: + * User locks are handled totally on the application side as + * long term cooperative locks which extend beyond the normal + * transaction boundaries. Their purpose is to indicate to an + * application that someone is `working' on an item. So it is + * possible to put an user lock on a tuple's oid, retrieve the + * tuple, work on it for an hour and then update it and remove + * the lock. While the lock is active other clients can still + * read and write the tuple but they can be aware that it has + * been locked at the application level by someone. + * User locks use lock tags made of an uint16 and an uint32, for + * example 0 and a tuple oid, or any other arbitrary pair of + * numbers following a convention established by the application. + * In this sense tags don't refer to tuples or database entities. + * User locks and normal locks are completely orthogonal and + * they don't interfere with each other, so it is possible + * to acquire a normal lock on an user-locked tuple or user-lock + * a tuple for which a normal write lock already exists. + * User locks are always non blocking, therefore they are never + * acquired if already held by another process. They must be + * released explicitly by the application but they are released + * automatically when a backend terminates. + * They are indicated by a dummy tableId 0 which doesn't have + * any table allocated but uses the normal lock table, and are + * distinguished from normal locks for the following differences: * - * normal lock user lock + * normal lock user lock * - * tableId 1 0 - * tag.relId rel oid 0 - * tag.ItemPointerData.ip_blkid block id lock id2 - * tag.ItemPointerData.ip_posid tuple offset lock id1 - * xid.pid 0 backend pid - * xid.xid current xid 0 - * persistence transaction user or backend + * tableId 1 0 + * tag.relId rel oid 0 + * tag.ItemPointerData.ip_blkid block id lock id2 + * tag.ItemPointerData.ip_posid tuple offset lock id1 + * xid.pid 0 backend pid + * xid.xid current xid 0 + * persistence transaction user or backend * - * The lockt parameter can have the same values for normal locks - * although probably only WRITE_LOCK can have some practical use. + * The lockt parameter can have the same values for normal locks + * although probably only WRITE_LOCK can have some practical use. * - * DZ - 4 Oct 1996 + * DZ - 4 Oct 1996 #endif */ bool -LockAcquire(LockTableId tableId, LOCKTAG *lockName, LOCKT lockt) +LockAcquire(LockTableId tableId, LOCKTAG * lockName, LOCKT lockt) { - XIDLookupEnt *result,item; - HTAB *xidTable; - bool found; - LOCK *lock = NULL; - SPINLOCK masterLock; - LOCKTAB *ltable; - int status; - TransactionId myXid; - + XIDLookupEnt *result, + item; + HTAB *xidTable; + bool found; + LOCK *lock = NULL; + SPINLOCK masterLock; + LOCKTAB *ltable; + int status; + TransactionId myXid; + #ifdef USER_LOCKS - int is_user_lock; + int is_user_lock; - is_user_lock = (tableId == 0); - if (is_user_lock) { - tableId = 1; + is_user_lock = (tableId == 0); + if (is_user_lock) + { + tableId = 1; #ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockAcquire: user lock tag [%u,%u] %d", - lockName->tupleId.ip_posid, - ((lockName->tupleId.ip_blkid.bi_hi<<16)+ - lockName->tupleId.ip_blkid.bi_lo), - lockt); + elog(NOTICE, "LockAcquire: user lock tag [%u,%u] %d", + lockName->tupleId.ip_posid, + ((lockName->tupleId.ip_blkid.bi_hi << 16) + + lockName->tupleId.ip_blkid.bi_lo), + lockt); #endif - } + } #endif - Assert (tableId < NumTables); - ltable = AllTables[tableId]; - if (!ltable) + Assert(tableId < NumTables); + ltable = AllTables[tableId]; + if (!ltable) { - elog(NOTICE,"LockAcquire: bad lock table %d",tableId); - return (FALSE); + elog(NOTICE, "LockAcquire: bad lock table %d", tableId); + return (FALSE); } - - if (LockingIsDisabled) + + if (LockingIsDisabled) { - return(TRUE); + return (TRUE); } - - LOCK_PRINT("Acquire",lockName,lockt); - masterLock = ltable->ctl->masterLock; - - SpinAcquire(masterLock); - - Assert( ltable->lockHash->hash == tag_hash); - lock = (LOCK *)hash_search(ltable->lockHash,(Pointer)lockName,HASH_ENTER,&found); - - if (! lock) + + LOCK_PRINT("Acquire", lockName, lockt); + masterLock = ltable->ctl->masterLock; + + SpinAcquire(masterLock); + + Assert(ltable->lockHash->hash == tag_hash); + lock = (LOCK *) hash_search(ltable->lockHash, (Pointer) lockName, HASH_ENTER, &found); + + if (!lock) { - SpinRelease(masterLock); - elog(FATAL,"LockAcquire: lock table %d is corrupted",tableId); - return(FALSE); + SpinRelease(masterLock); + elog(FATAL, "LockAcquire: lock table %d is corrupted", tableId); + return (FALSE); } - - /* -------------------- - * if there was nothing else there, complete initialization - * -------------------- - */ - if (! found) + + /* -------------------- + * if there was nothing else there, complete initialization + * -------------------- + */ + if (!found) { - lock->mask = 0; - ProcQueueInit(&(lock->waitProcs)); - memset((char *)lock->holders, 0, sizeof(int)*MAX_LOCKTYPES); - memset((char *)lock->activeHolders, 0, sizeof(int)*MAX_LOCKTYPES); - lock->nHolding = 0; - lock->nActive = 0; - - Assert(BlockIdEquals(&(lock->tag.tupleId.ip_blkid), - &(lockName->tupleId.ip_blkid))); - + lock->mask = 0; + ProcQueueInit(&(lock->waitProcs)); + memset((char *) lock->holders, 0, sizeof(int) * MAX_LOCKTYPES); + memset((char *) lock->activeHolders, 0, sizeof(int) * MAX_LOCKTYPES); + lock->nHolding = 0; + lock->nActive = 0; + + Assert(BlockIdEquals(&(lock->tag.tupleId.ip_blkid), + &(lockName->tupleId.ip_blkid))); + } - - /* ------------------ - * add an element to the lock queue so that we can clear the - * locks at end of transaction. - * ------------------ - */ - xidTable = ltable->xidHash; - myXid = GetCurrentTransactionId(); - - /* ------------------ - * Zero out all of the tag bytes (this clears the padding bytes for long - * word alignment and ensures hashing consistency). - * ------------------ - */ - memset(&item, 0, XID_TAGSIZE); - TransactionIdStore(myXid, &item.tag.xid); - item.tag.lock = MAKE_OFFSET(lock); + + /* ------------------ + * add an element to the lock queue so that we can clear the + * locks at end of transaction. + * ------------------ + */ + xidTable = ltable->xidHash; + myXid = GetCurrentTransactionId(); + + /* ------------------ + * Zero out all of the tag bytes (this clears the padding bytes for long + * word alignment and ensures hashing consistency). + * ------------------ + */ + memset(&item, 0, XID_TAGSIZE); + TransactionIdStore(myXid, &item.tag.xid); + item.tag.lock = MAKE_OFFSET(lock); #if 0 - item.tag.pid = MyPid; + item.tag.pid = MyPid; #endif - + #ifdef USER_LOCKS - if (is_user_lock) { - item.tag.pid = getpid(); - item.tag.xid = myXid = 0; + if (is_user_lock) + { + item.tag.pid = getpid(); + item.tag.xid = myXid = 0; #ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockAcquire: user lock xid [%d,%d,%d]", - item.tag.lock, item.tag.pid, item.tag.xid); + elog(NOTICE, "LockAcquire: user lock xid [%d,%d,%d]", + item.tag.lock, item.tag.pid, item.tag.xid); #endif - } + } #endif - result = (XIDLookupEnt *)hash_search(xidTable, (Pointer)&item, HASH_ENTER, &found); - if (!result) + result = (XIDLookupEnt *) hash_search(xidTable, (Pointer) & item, HASH_ENTER, &found); + if (!result) { - elog(NOTICE,"LockAcquire: xid table corrupted"); - return(STATUS_ERROR); + elog(NOTICE, "LockAcquire: xid table corrupted"); + return (STATUS_ERROR); } - if (!found) + if (!found) { - XID_PRINT("LockAcquire: queueing XidEnt", result); - ProcAddLock(&result->queue); - result->nHolding = 0; - memset((char *)result->holders, 0, sizeof(int)*MAX_LOCKTYPES); + XID_PRINT("LockAcquire: queueing XidEnt", result); + ProcAddLock(&result->queue); + result->nHolding = 0; + memset((char *) result->holders, 0, sizeof(int) * MAX_LOCKTYPES); } - - /* ---------------- - * lock->nholding tells us how many processes have _tried_ to - * acquire this lock, Regardless of whether they succeeded or - * failed in doing so. - * ---------------- - */ - lock->nHolding++; - lock->holders[lockt]++; - - /* -------------------- - * If I'm the only one holding a lock, then there - * cannot be a conflict. Need to subtract one from the - * lock's count since we just bumped the count up by 1 - * above. - * -------------------- - */ - if (result->nHolding == lock->nActive) + + /* ---------------- + * lock->nholding tells us how many processes have _tried_ to + * acquire this lock, Regardless of whether they succeeded or + * failed in doing so. + * ---------------- + */ + lock->nHolding++; + lock->holders[lockt]++; + + /* -------------------- + * If I'm the only one holding a lock, then there + * cannot be a conflict. Need to subtract one from the + * lock's count since we just bumped the count up by 1 + * above. + * -------------------- + */ + if (result->nHolding == lock->nActive) { - result->holders[lockt]++; - result->nHolding++; - GrantLock(lock, lockt); - SpinRelease(masterLock); - return(TRUE); + result->holders[lockt]++; + result->nHolding++; + GrantLock(lock, lockt); + SpinRelease(masterLock); + return (TRUE); } - - Assert(result->nHolding <= lock->nActive); - - status = LockResolveConflicts(ltable, lock, lockt, myXid); - - if (status == STATUS_OK) + + Assert(result->nHolding <= lock->nActive); + + status = LockResolveConflicts(ltable, lock, lockt, myXid); + + if (status == STATUS_OK) { - GrantLock(lock, lockt); + GrantLock(lock, lockt); } - else if (status == STATUS_FOUND) + else if (status == STATUS_FOUND) { #ifdef USER_LOCKS - /* - * User locks are non blocking. If we can't acquire a lock - * remove the xid entry and return FALSE without waiting. - */ - if (is_user_lock) { - if (!result->nHolding) { - SHMQueueDelete(&result->queue); - hash_search(xidTable, (Pointer)&item, HASH_REMOVE, &found); - } - lock->nHolding--; - lock->holders[lockt]--; - SpinRelease(masterLock); + + /* + * User locks are non blocking. If we can't acquire a lock remove + * the xid entry and return FALSE without waiting. + */ + if (is_user_lock) + { + if (!result->nHolding) + { + SHMQueueDelete(&result->queue); + hash_search(xidTable, (Pointer) & item, HASH_REMOVE, &found); + } + lock->nHolding--; + lock->holders[lockt]--; + SpinRelease(masterLock); #ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockAcquire: user lock failed"); + elog(NOTICE, "LockAcquire: user lock failed"); #endif - return(FALSE); - } + return (FALSE); + } #endif - status = WaitOnLock(ltable, tableId, lock, lockt); - XID_PRINT("Someone granted me the lock", result); + status = WaitOnLock(ltable, tableId, lock, lockt); + XID_PRINT("Someone granted me the lock", result); } - - SpinRelease(masterLock); - - return(status == STATUS_OK); + + SpinRelease(masterLock); + + return (status == STATUS_OK); } /* ---------------------------- * LockResolveConflicts -- test for lock conflicts * * NOTES: - * Here's what makes this complicated: one transaction's + * Here's what makes this complicated: one transaction's * locks don't conflict with one another. When many processes * hold locks, each has to subtract off the other's locks when * determining whether or not any new lock acquired conflicts with * the old ones. * - * For example, if I am already holding a WRITE_INTENT lock, - * there will not be a conflict with my own READ_LOCK. If I - * don't consider the intent lock when checking for conflicts, - * I find no conflict. + * For example, if I am already holding a WRITE_INTENT lock, + * there will not be a conflict with my own READ_LOCK. If I + * don't consider the intent lock when checking for conflicts, + * I find no conflict. * ---------------------------- */ int -LockResolveConflicts(LOCKTAB *ltable, - LOCK *lock, - LOCKT lockt, - TransactionId xid) +LockResolveConflicts(LOCKTAB * ltable, + LOCK * lock, + LOCKT lockt, + TransactionId xid) { - XIDLookupEnt *result,item; - int *myHolders; - int nLockTypes; - HTAB *xidTable; - bool found; - int bitmask; - int i,tmpMask; - - nLockTypes = ltable->ctl->nLockTypes; - xidTable = ltable->xidHash; - - /* --------------------- - * read my own statistics from the xid table. If there - * isn't an entry, then we'll just add one. - * - * Zero out the tag, this clears the padding bytes for long - * word alignment and ensures hashing consistency. - * ------------------ - */ - memset(&item, 0, XID_TAGSIZE); - TransactionIdStore(xid, &item.tag.xid); - item.tag.lock = MAKE_OFFSET(lock); + XIDLookupEnt *result, + item; + int *myHolders; + int nLockTypes; + HTAB *xidTable; + bool found; + int bitmask; + int i, + tmpMask; + + nLockTypes = ltable->ctl->nLockTypes; + xidTable = ltable->xidHash; + + /* --------------------- + * read my own statistics from the xid table. If there + * isn't an entry, then we'll just add one. + * + * Zero out the tag, this clears the padding bytes for long + * word alignment and ensures hashing consistency. + * ------------------ + */ + memset(&item, 0, XID_TAGSIZE); + TransactionIdStore(xid, &item.tag.xid); + item.tag.lock = MAKE_OFFSET(lock); #if 0 - item.tag.pid = pid; + item.tag.pid = pid; #endif - - if (! (result = (XIDLookupEnt *) - hash_search(xidTable, (Pointer)&item, HASH_ENTER, &found))) + + if (!(result = (XIDLookupEnt *) + hash_search(xidTable, (Pointer) & item, HASH_ENTER, &found))) { - elog(NOTICE,"LockResolveConflicts: xid table corrupted"); - return(STATUS_ERROR); + elog(NOTICE, "LockResolveConflicts: xid table corrupted"); + return (STATUS_ERROR); } - myHolders = result->holders; - - if (! found) + myHolders = result->holders; + + if (!found) { - /* --------------- - * we're not holding any type of lock yet. Clear - * the lock stats. - * --------------- - */ - memset(result->holders, 0, nLockTypes * sizeof(*(lock->holders))); - result->nHolding = 0; + /* --------------- + * we're not holding any type of lock yet. Clear + * the lock stats. + * --------------- + */ + memset(result->holders, 0, nLockTypes * sizeof(*(lock->holders))); + result->nHolding = 0; } - - /* ---------------------------- - * first check for global conflicts: If no locks conflict - * with mine, then I get the lock. - * - * Checking for conflict: lock->mask represents the types of - * currently held locks. conflictTable[lockt] has a bit - * set for each type of lock that conflicts with mine. Bitwise - * compare tells if there is a conflict. - * ---------------------------- - */ - if (! (ltable->ctl->conflictTab[lockt] & lock->mask)) + + /* ---------------------------- + * first check for global conflicts: If no locks conflict + * with mine, then I get the lock. + * + * Checking for conflict: lock->mask represents the types of + * currently held locks. conflictTable[lockt] has a bit + * set for each type of lock that conflicts with mine. Bitwise + * compare tells if there is a conflict. + * ---------------------------- + */ + if (!(ltable->ctl->conflictTab[lockt] & lock->mask)) { - - result->holders[lockt]++; - result->nHolding++; - - XID_PRINT("Conflict Resolved: updated xid entry stats", result); - - return(STATUS_OK); + + result->holders[lockt]++; + result->nHolding++; + + XID_PRINT("Conflict Resolved: updated xid entry stats", result); + + return (STATUS_OK); } - - /* ------------------------ - * Rats. Something conflicts. But it could still be my own - * lock. We have to construct a conflict mask - * that does not reflect our own locks. - * ------------------------ - */ - bitmask = 0; - tmpMask = 2; - for (i=1;i<=nLockTypes;i++, tmpMask <<= 1) + + /* ------------------------ + * Rats. Something conflicts. But it could still be my own + * lock. We have to construct a conflict mask + * that does not reflect our own locks. + * ------------------------ + */ + bitmask = 0; + tmpMask = 2; + for (i = 1; i <= nLockTypes; i++, tmpMask <<= 1) { - if (lock->activeHolders[i] - myHolders[i]) + if (lock->activeHolders[i] - myHolders[i]) { - bitmask |= tmpMask; + bitmask |= tmpMask; } } - - /* ------------------------ - * now check again for conflicts. 'bitmask' describes the types - * of locks held by other processes. If one of these - * conflicts with the kind of lock that I want, there is a - * conflict and I have to sleep. - * ------------------------ - */ - if (! (ltable->ctl->conflictTab[lockt] & bitmask)) + + /* ------------------------ + * now check again for conflicts. 'bitmask' describes the types + * of locks held by other processes. If one of these + * conflicts with the kind of lock that I want, there is a + * conflict and I have to sleep. + * ------------------------ + */ + if (!(ltable->ctl->conflictTab[lockt] & bitmask)) { - - /* no conflict. Get the lock and go on */ - - result->holders[lockt]++; - result->nHolding++; - - XID_PRINT("Conflict Resolved: updated xid entry stats", result); - - return(STATUS_OK); - + + /* no conflict. Get the lock and go on */ + + result->holders[lockt]++; + result->nHolding++; + + XID_PRINT("Conflict Resolved: updated xid entry stats", result); + + return (STATUS_OK); + } - - return(STATUS_FOUND); + + return (STATUS_FOUND); } static int -WaitOnLock(LOCKTAB *ltable, LockTableId tableId, LOCK *lock, LOCKT lockt) +WaitOnLock(LOCKTAB * ltable, LockTableId tableId, LOCK * lock, LOCKT lockt) { - PROC_QUEUE *waitQueue = &(lock->waitProcs); - - int prio = ltable->ctl->prio[lockt]; - - /* the waitqueue is ordered by priority. I insert myself - * according to the priority of the lock I am acquiring. - * - * SYNC NOTE: I am assuming that the lock table spinlock - * is sufficient synchronization for this queue. That - * will not be true if/when people can be deleted from - * the queue by a SIGINT or something. - */ - LOCK_DUMP_AUX("WaitOnLock: sleeping on lock", lock, lockt); - if (ProcSleep(waitQueue, - ltable->ctl->masterLock, - lockt, - prio, - lock) != NO_ERROR) + PROC_QUEUE *waitQueue = &(lock->waitProcs); + + int prio = ltable->ctl->prio[lockt]; + + /* + * the waitqueue is ordered by priority. I insert myself according to + * the priority of the lock I am acquiring. + * + * SYNC NOTE: I am assuming that the lock table spinlock is sufficient + * synchronization for this queue. That will not be true if/when + * people can be deleted from the queue by a SIGINT or something. + */ + LOCK_DUMP_AUX("WaitOnLock: sleeping on lock", lock, lockt); + if (ProcSleep(waitQueue, + ltable->ctl->masterLock, + lockt, + prio, + lock) != NO_ERROR) { - /* ------------------- - * This could have happend as a result of a deadlock, see HandleDeadLock() - * Decrement the lock nHolding and holders fields as we are no longer - * waiting on this lock. - * ------------------- - */ - lock->nHolding--; - lock->holders[lockt]--; - LOCK_DUMP_AUX("WaitOnLock: aborting on lock", lock, lockt); - SpinRelease(ltable->ctl->masterLock); - elog(WARN,"WaitOnLock: error on wakeup - Aborting this transaction"); + /* ------------------- + * This could have happend as a result of a deadlock, see HandleDeadLock() + * Decrement the lock nHolding and holders fields as we are no longer + * waiting on this lock. + * ------------------- + */ + lock->nHolding--; + lock->holders[lockt]--; + LOCK_DUMP_AUX("WaitOnLock: aborting on lock", lock, lockt); + SpinRelease(ltable->ctl->masterLock); + elog(WARN, "WaitOnLock: error on wakeup - Aborting this transaction"); } - - LOCK_DUMP_AUX("WaitOnLock: wakeup on lock", lock, lockt); - return(STATUS_OK); + + LOCK_DUMP_AUX("WaitOnLock: wakeup on lock", lock, lockt); + return (STATUS_OK); } /* * LockRelease -- look up 'lockName' in lock table 'tableId' and - * release it. + * release it. * * Side Effects: if the lock no longer conflicts with the highest - * priority waiting process, that process is granted the lock - * and awoken. (We have to grant the lock here to avoid a - * race between the waking process and any new process to - * come along and request the lock). + * priority waiting process, that process is granted the lock + * and awoken. (We have to grant the lock here to avoid a + * race between the waking process and any new process to + * come along and request the lock). */ bool -LockRelease(LockTableId tableId, LOCKTAG *lockName, LOCKT lockt) +LockRelease(LockTableId tableId, LOCKTAG * lockName, LOCKT lockt) { - LOCK *lock = NULL; - SPINLOCK masterLock; - bool found; - LOCKTAB *ltable; - XIDLookupEnt *result,item; - HTAB *xidTable; - bool wakeupNeeded = true; - + LOCK *lock = NULL; + SPINLOCK masterLock; + bool found; + LOCKTAB *ltable; + XIDLookupEnt *result, + item; + HTAB *xidTable; + bool wakeupNeeded = true; + #ifdef USER_LOCKS - int is_user_lock; + int is_user_lock; - is_user_lock = (tableId == 0); - if (is_user_lock) { - tableId = 1; + is_user_lock = (tableId == 0); + if (is_user_lock) + { + tableId = 1; #ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockRelease: user lock tag [%u,%u] %d", - lockName->tupleId.ip_posid, - ((lockName->tupleId.ip_blkid.bi_hi<<16)+ - lockName->tupleId.ip_blkid.bi_lo), - lockt); + elog(NOTICE, "LockRelease: user lock tag [%u,%u] %d", + lockName->tupleId.ip_posid, + ((lockName->tupleId.ip_blkid.bi_hi << 16) + + lockName->tupleId.ip_blkid.bi_lo), + lockt); #endif - } + } #endif - Assert (tableId < NumTables); - ltable = AllTables[tableId]; - if (!ltable) { - elog(NOTICE, "ltable is null in LockRelease"); - return (FALSE); - } - - if (LockingIsDisabled) + Assert(tableId < NumTables); + ltable = AllTables[tableId]; + if (!ltable) + { + elog(NOTICE, "ltable is null in LockRelease"); + return (FALSE); + } + + if (LockingIsDisabled) { - return(TRUE); + return (TRUE); } - - LOCK_PRINT("Release",lockName,lockt); - - masterLock = ltable->ctl->masterLock; - xidTable = ltable->xidHash; - - SpinAcquire(masterLock); - - Assert( ltable->lockHash->hash == tag_hash); - lock = (LOCK *) - hash_search(ltable->lockHash,(Pointer)lockName,HASH_FIND_SAVE,&found); - + + LOCK_PRINT("Release", lockName, lockt); + + masterLock = ltable->ctl->masterLock; + xidTable = ltable->xidHash; + + SpinAcquire(masterLock); + + Assert(ltable->lockHash->hash == tag_hash); + lock = (LOCK *) + hash_search(ltable->lockHash, (Pointer) lockName, HASH_FIND_SAVE, &found); + #ifdef USER_LOCKS - /* - * If the entry is not found hash_search returns TRUE - * instead of NULL, so we must check it explicitly. - */ - if ((is_user_lock) && (lock == (LOCK *)TRUE)) { - SpinRelease(masterLock); - elog(NOTICE,"LockRelease: there are no locks with this tag"); - return(FALSE); - } + + /* + * If the entry is not found hash_search returns TRUE instead of NULL, + * so we must check it explicitly. + */ + if ((is_user_lock) && (lock == (LOCK *) TRUE)) + { + SpinRelease(masterLock); + elog(NOTICE, "LockRelease: there are no locks with this tag"); + return (FALSE); + } #endif - /* let the caller print its own error message, too. - * Do not elog(WARN). - */ - if (! lock) + /* + * let the caller print its own error message, too. Do not elog(WARN). + */ + if (!lock) { - SpinRelease(masterLock); - elog(NOTICE,"LockRelease: locktable corrupted"); - return(FALSE); + SpinRelease(masterLock); + elog(NOTICE, "LockRelease: locktable corrupted"); + return (FALSE); } - - if (! found) + + if (!found) { - SpinRelease(masterLock); - elog(NOTICE,"LockRelease: locktable lookup failed, no lock"); - return(FALSE); + SpinRelease(masterLock); + elog(NOTICE, "LockRelease: locktable lookup failed, no lock"); + return (FALSE); } - - Assert(lock->nHolding > 0); - + + Assert(lock->nHolding > 0); + #ifdef USER_LOCKS - /* - * If this is an user lock it can be removed only after - * checking that it was acquired by the current process, - * so this code is skipped and executed later. - */ - if (!is_user_lock) { -#endif - /* - * fix the general lock stats - */ - lock->nHolding--; - lock->holders[lockt]--; - lock->nActive--; - lock->activeHolders[lockt]--; - - Assert(lock->nActive >= 0); - - if (! lock->nHolding) + + /* + * If this is an user lock it can be removed only after checking that + * it was acquired by the current process, so this code is skipped and + * executed later. + */ + if (!is_user_lock) { - /* ------------------ - * if there's no one waiting in the queue, - * we just released the last lock. - * Delete it from the lock table. - * ------------------ - */ - Assert( ltable->lockHash->hash == tag_hash); - lock = (LOCK *) hash_search(ltable->lockHash, - (Pointer) &(lock->tag), - HASH_REMOVE_SAVED, - &found); - Assert(lock && found); - wakeupNeeded = false; - } +#endif + + /* + * fix the general lock stats + */ + lock->nHolding--; + lock->holders[lockt]--; + lock->nActive--; + lock->activeHolders[lockt]--; + + Assert(lock->nActive >= 0); + + if (!lock->nHolding) + { + /* ------------------ + * if there's no one waiting in the queue, + * we just released the last lock. + * Delete it from the lock table. + * ------------------ + */ + Assert(ltable->lockHash->hash == tag_hash); + lock = (LOCK *) hash_search(ltable->lockHash, + (Pointer) & (lock->tag), + HASH_REMOVE_SAVED, + &found); + Assert(lock && found); + wakeupNeeded = false; + } #ifdef USER_LOCKS - } + } #endif - - /* ------------------ - * Zero out all of the tag bytes (this clears the padding bytes for long - * word alignment and ensures hashing consistency). - * ------------------ - */ - memset(&item, 0, XID_TAGSIZE); - - TransactionIdStore(GetCurrentTransactionId(), &item.tag.xid); - item.tag.lock = MAKE_OFFSET(lock); + + /* ------------------ + * Zero out all of the tag bytes (this clears the padding bytes for long + * word alignment and ensures hashing consistency). + * ------------------ + */ + memset(&item, 0, XID_TAGSIZE); + + TransactionIdStore(GetCurrentTransactionId(), &item.tag.xid); + item.tag.lock = MAKE_OFFSET(lock); #if 0 - item.tag.pid = MyPid; + item.tag.pid = MyPid; #endif - + #ifdef USER_LOCKS - if (is_user_lock) { - item.tag.pid = getpid(); - item.tag.xid = 0; + if (is_user_lock) + { + item.tag.pid = getpid(); + item.tag.xid = 0; #ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockRelease: user lock xid [%d,%d,%d]", - item.tag.lock, item.tag.pid, item.tag.xid); + elog(NOTICE, "LockRelease: user lock xid [%d,%d,%d]", + item.tag.lock, item.tag.pid, item.tag.xid); #endif - } + } #endif - if (! ( result = (XIDLookupEnt *) hash_search(xidTable, - (Pointer)&item, - HASH_FIND_SAVE, - &found) ) - || !found) + if (!(result = (XIDLookupEnt *) hash_search(xidTable, + (Pointer) & item, + HASH_FIND_SAVE, + &found)) + || !found) { - SpinRelease(masterLock); + SpinRelease(masterLock); #ifdef USER_LOCKS - if ((is_user_lock) && (result)) { - elog(NOTICE,"LockRelease: you don't have a lock on this tag"); - } else { - elog(NOTICE,"LockRelease: find xid, table corrupted"); - } + if ((is_user_lock) && (result)) + { + elog(NOTICE, "LockRelease: you don't have a lock on this tag"); + } + else + { + elog(NOTICE, "LockRelease: find xid, table corrupted"); + } #else - elog(NOTICE,"LockReplace: xid table corrupted"); + elog(NOTICE, "LockReplace: xid table corrupted"); #endif - return(FALSE); + return (FALSE); } - /* - * now check to see if I have any private locks. If I do, - * decrement the counts associated with them. - */ - result->holders[lockt]--; - result->nHolding--; - - XID_PRINT("LockRelease updated xid stats", result); - - /* - * If this was my last hold on this lock, delete my entry - * in the XID table. - */ - if (! result->nHolding) + + /* + * now check to see if I have any private locks. If I do, decrement + * the counts associated with them. + */ + result->holders[lockt]--; + result->nHolding--; + + XID_PRINT("LockRelease updated xid stats", result); + + /* + * If this was my last hold on this lock, delete my entry in the XID + * table. + */ + if (!result->nHolding) { #ifdef USER_LOCKS - if (result->queue.prev == INVALID_OFFSET) { - elog(NOTICE,"LockRelease: xid.prev == INVALID_OFFSET"); - } - if (result->queue.next == INVALID_OFFSET) { - elog(NOTICE,"LockRelease: xid.next == INVALID_OFFSET"); - } + if (result->queue.prev == INVALID_OFFSET) + { + elog(NOTICE, "LockRelease: xid.prev == INVALID_OFFSET"); + } + if (result->queue.next == INVALID_OFFSET) + { + elog(NOTICE, "LockRelease: xid.next == INVALID_OFFSET"); + } #endif - if (result->queue.next != INVALID_OFFSET) - SHMQueueDelete(&result->queue); - if (! (result = (XIDLookupEnt *) - hash_search(xidTable, (Pointer)&item, HASH_REMOVE_SAVED, &found)) || - ! found) + if (result->queue.next != INVALID_OFFSET) + SHMQueueDelete(&result->queue); + if (!(result = (XIDLookupEnt *) + hash_search(xidTable, (Pointer) & item, HASH_REMOVE_SAVED, &found)) || + !found) { - SpinRelease(masterLock); + SpinRelease(masterLock); #ifdef USER_LOCKS - elog(NOTICE,"LockRelease: remove xid, table corrupted"); + elog(NOTICE, "LockRelease: remove xid, table corrupted"); #else - elog(NOTICE,"LockReplace: xid table corrupted"); + elog(NOTICE, "LockReplace: xid table corrupted"); #endif - return(FALSE); + return (FALSE); } } - + #ifdef USER_LOCKS - /* - * If this is an user lock remove it now, after the - * corresponding xid entry has been found and deleted. - */ - if (is_user_lock) { - /* - * fix the general lock stats - */ - lock->nHolding--; - lock->holders[lockt]--; - lock->nActive--; - lock->activeHolders[lockt]--; - - Assert(lock->nActive >= 0); - - if (! lock->nHolding) + + /* + * If this is an user lock remove it now, after the corresponding xid + * entry has been found and deleted. + */ + if (is_user_lock) { - /* ------------------ - * if there's no one waiting in the queue, - * we just released the last lock. - * Delete it from the lock table. - * ------------------ - */ - Assert( ltable->lockHash->hash == tag_hash); - lock = (LOCK *) hash_search(ltable->lockHash, - (Pointer) &(lock->tag), - HASH_REMOVE, - &found); - Assert(lock && found); - wakeupNeeded = false; + + /* + * fix the general lock stats + */ + lock->nHolding--; + lock->holders[lockt]--; + lock->nActive--; + lock->activeHolders[lockt]--; + + Assert(lock->nActive >= 0); + + if (!lock->nHolding) + { + /* ------------------ + * if there's no one waiting in the queue, + * we just released the last lock. + * Delete it from the lock table. + * ------------------ + */ + Assert(ltable->lockHash->hash == tag_hash); + lock = (LOCK *) hash_search(ltable->lockHash, + (Pointer) & (lock->tag), + HASH_REMOVE, + &found); + Assert(lock && found); + wakeupNeeded = false; + } } - } #endif - /* -------------------------- - * If there are still active locks of the type I just released, no one - * should be woken up. Whoever is asleep will still conflict - * with the remaining locks. - * -------------------------- - */ - if (! (lock->activeHolders[lockt])) + /* -------------------------- + * If there are still active locks of the type I just released, no one + * should be woken up. Whoever is asleep will still conflict + * with the remaining locks. + * -------------------------- + */ + if (!(lock->activeHolders[lockt])) { - /* change the conflict mask. No more of this lock type. */ - lock->mask &= BITS_OFF[lockt]; + /* change the conflict mask. No more of this lock type. */ + lock->mask &= BITS_OFF[lockt]; } - - if (wakeupNeeded) + + if (wakeupNeeded) { - /* -------------------------- - * Wake the first waiting process and grant him the lock if it - * doesn't conflict. The woken process must record the lock - * himself. - * -------------------------- - */ - ProcLockWakeup(&(lock->waitProcs), (char *) ltable, (char *) lock); + /* -------------------------- + * Wake the first waiting process and grant him the lock if it + * doesn't conflict. The woken process must record the lock + * himself. + * -------------------------- + */ + ProcLockWakeup(&(lock->waitProcs), (char *) ltable, (char *) lock); } - - SpinRelease(masterLock); - return(TRUE); + + SpinRelease(masterLock); + return (TRUE); } /* * GrantLock -- udpate the lock data structure to show - * the new lock holder. + * the new lock holder. */ void -GrantLock(LOCK *lock, LOCKT lockt) +GrantLock(LOCK * lock, LOCKT lockt) { - lock->nActive++; - lock->activeHolders[lockt]++; - lock->mask |= BITS_ON[lockt]; + lock->nActive++; + lock->activeHolders[lockt]++; + lock->mask |= BITS_ON[lockt]; } #ifdef USER_LOCKS @@ -1086,265 +1115,281 @@ GrantLock(LOCK *lock, LOCKT lockt) * LockReleaseAll -- Release all locks in a process lock queue. * * Note: This code is a little complicated by the presence in the - * same queue of user locks which can't be removed from the - * normal lock queue at the end of a transaction. They must - * however be removed when the backend exits. - * A dummy tableId 0 is used to indicate that we are releasing - * the user locks, from the code added to ProcKill(). + * same queue of user locks which can't be removed from the + * normal lock queue at the end of a transaction. They must + * however be removed when the backend exits. + * A dummy tableId 0 is used to indicate that we are releasing + * the user locks, from the code added to ProcKill(). */ #endif bool -LockReleaseAll(LockTableId tableId, SHM_QUEUE *lockQueue) +LockReleaseAll(LockTableId tableId, SHM_QUEUE * lockQueue) { - PROC_QUEUE *waitQueue; - int done; - XIDLookupEnt *xidLook = NULL; - XIDLookupEnt *tmp = NULL; - SHMEM_OFFSET end = MAKE_OFFSET(lockQueue); - SPINLOCK masterLock; - LOCKTAB *ltable; - int i,nLockTypes; - LOCK *lock; - bool found; - + PROC_QUEUE *waitQueue; + int done; + XIDLookupEnt *xidLook = NULL; + XIDLookupEnt *tmp = NULL; + SHMEM_OFFSET end = MAKE_OFFSET(lockQueue); + SPINLOCK masterLock; + LOCKTAB *ltable; + int i, + nLockTypes; + LOCK *lock; + bool found; + #ifdef USER_LOCKS - int is_user_lock_table, my_pid, count, nskip; + int is_user_lock_table, + my_pid, + count, + nskip; - is_user_lock_table = (tableId == 0); - my_pid = getpid(); + is_user_lock_table = (tableId == 0); + my_pid = getpid(); #ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockReleaseAll: tableId=%d, pid=%d", tableId, my_pid); + elog(NOTICE, "LockReleaseAll: tableId=%d, pid=%d", tableId, my_pid); #endif - if (is_user_lock_table) { - tableId = 1; - } + if (is_user_lock_table) + { + tableId = 1; + } #endif - Assert (tableId < NumTables); - ltable = AllTables[tableId]; - if (!ltable) - return (FALSE); - - nLockTypes = ltable->ctl->nLockTypes; - masterLock = ltable->ctl->masterLock; - - if (SHMQueueEmpty(lockQueue)) - return TRUE; - + Assert(tableId < NumTables); + ltable = AllTables[tableId]; + if (!ltable) + return (FALSE); + + nLockTypes = ltable->ctl->nLockTypes; + masterLock = ltable->ctl->masterLock; + + if (SHMQueueEmpty(lockQueue)) + return TRUE; + #ifdef USER_LOCKS - SpinAcquire(masterLock); + SpinAcquire(masterLock); #endif - SHMQueueFirst(lockQueue,(Pointer*)&xidLook,&xidLook->queue); - - XID_PRINT("LockReleaseAll", xidLook); - + SHMQueueFirst(lockQueue, (Pointer *) & xidLook, &xidLook->queue); + + XID_PRINT("LockReleaseAll", xidLook); + #ifndef USER_LOCKS - SpinAcquire(masterLock); + SpinAcquire(masterLock); #else - count = nskip = 0; + count = nskip = 0; #endif - for (;;) + for (;;) { - /* --------------------------- - * XXX Here we assume the shared memory queue is circular and - * that we know its internal structure. Should have some sort of - * macros to allow one to walk it. mer 20 July 1991 - * --------------------------- - */ - done = (xidLook->queue.next == end); - lock = (LOCK *) MAKE_PTR(xidLook->tag.lock); - - LOCK_PRINT("ReleaseAll",(&lock->tag),0); - + /* --------------------------- + * XXX Here we assume the shared memory queue is circular and + * that we know its internal structure. Should have some sort of + * macros to allow one to walk it. mer 20 July 1991 + * --------------------------- + */ + done = (xidLook->queue.next == end); + lock = (LOCK *) MAKE_PTR(xidLook->tag.lock); + + LOCK_PRINT("ReleaseAll", (&lock->tag), 0); + #ifdef USER_LOCKS - /* - * Sometimes the queue appears to be messed up. - */ - if (count++ > 2000) { - elog(NOTICE,"LockReleaseAll: xid loop detected, giving up"); - nskip = 0; - break; - } - if (is_user_lock_table) { - if ((xidLook->tag.pid == 0) || (xidLook->tag.xid != 0)) { -#ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockReleaseAll: skip normal lock [%d,%d,%d]", - xidLook->tag.lock,xidLook->tag.pid,xidLook->tag.xid); -#endif - nskip++; - goto next_item; + + /* + * Sometimes the queue appears to be messed up. + */ + if (count++ > 2000) + { + elog(NOTICE, "LockReleaseAll: xid loop detected, giving up"); + nskip = 0; + break; } - if (xidLook->tag.pid != my_pid) { - /* This should never happen */ + if (is_user_lock_table) + { + if ((xidLook->tag.pid == 0) || (xidLook->tag.xid != 0)) + { #ifdef USER_LOCKS_DEBUG - elog(NOTICE, - "LockReleaseAll: skip other pid [%u,%u] [%d,%d,%d]", - lock->tag.tupleId.ip_posid, - ((lock->tag.tupleId.ip_blkid.bi_hi<<16)+ - lock->tag.tupleId.ip_blkid.bi_lo), - xidLook->tag.lock,xidLook->tag.pid,xidLook->tag.xid); + elog(NOTICE, "LockReleaseAll: skip normal lock [%d,%d,%d]", + xidLook->tag.lock, xidLook->tag.pid, xidLook->tag.xid); #endif - nskip++; - goto next_item; - } + nskip++; + goto next_item; + } + if (xidLook->tag.pid != my_pid) + { + /* This should never happen */ #ifdef USER_LOCKS_DEBUG - elog(NOTICE, - "LockReleaseAll: release user lock [%u,%u] [%d,%d,%d]", - lock->tag.tupleId.ip_posid, - ((lock->tag.tupleId.ip_blkid.bi_hi<<16)+ - lock->tag.tupleId.ip_blkid.bi_lo), - xidLook->tag.lock,xidLook->tag.pid,xidLook->tag.xid); + elog(NOTICE, + "LockReleaseAll: skip other pid [%u,%u] [%d,%d,%d]", + lock->tag.tupleId.ip_posid, + ((lock->tag.tupleId.ip_blkid.bi_hi << 16) + + lock->tag.tupleId.ip_blkid.bi_lo), + xidLook->tag.lock, xidLook->tag.pid, xidLook->tag.xid); #endif - } else { - if ((xidLook->tag.pid != 0) || (xidLook->tag.xid == 0)) { + nskip++; + goto next_item; + } #ifdef USER_LOCKS_DEBUG - elog(NOTICE, - "LockReleaseAll: skip user lock [%u,%u] [%d,%d,%d]", - lock->tag.tupleId.ip_posid, - ((lock->tag.tupleId.ip_blkid.bi_hi<<16)+ - lock->tag.tupleId.ip_blkid.bi_lo), - xidLook->tag.lock,xidLook->tag.pid,xidLook->tag.xid); + elog(NOTICE, + "LockReleaseAll: release user lock [%u,%u] [%d,%d,%d]", + lock->tag.tupleId.ip_posid, + ((lock->tag.tupleId.ip_blkid.bi_hi << 16) + + lock->tag.tupleId.ip_blkid.bi_lo), + xidLook->tag.lock, xidLook->tag.pid, xidLook->tag.xid); #endif - nskip++; - goto next_item; } + else + { + if ((xidLook->tag.pid != 0) || (xidLook->tag.xid == 0)) + { #ifdef USER_LOCKS_DEBUG - elog(NOTICE,"LockReleaseAll: release normal lock [%d,%d,%d]", - xidLook->tag.lock,xidLook->tag.pid,xidLook->tag.xid); + elog(NOTICE, + "LockReleaseAll: skip user lock [%u,%u] [%d,%d,%d]", + lock->tag.tupleId.ip_posid, + ((lock->tag.tupleId.ip_blkid.bi_hi << 16) + + lock->tag.tupleId.ip_blkid.bi_lo), + xidLook->tag.lock, xidLook->tag.pid, xidLook->tag.xid); #endif - } + nskip++; + goto next_item; + } +#ifdef USER_LOCKS_DEBUG + elog(NOTICE, "LockReleaseAll: release normal lock [%d,%d,%d]", + xidLook->tag.lock, xidLook->tag.pid, xidLook->tag.xid); +#endif + } #endif - /* ------------------ - * fix the general lock stats - * ------------------ - */ - if (lock->nHolding != xidLook->nHolding) + /* ------------------ + * fix the general lock stats + * ------------------ + */ + if (lock->nHolding != xidLook->nHolding) { - lock->nHolding -= xidLook->nHolding; - lock->nActive -= xidLook->nHolding; - Assert(lock->nActive >= 0); - for (i=1; i<=nLockTypes; i++) + lock->nHolding -= xidLook->nHolding; + lock->nActive -= xidLook->nHolding; + Assert(lock->nActive >= 0); + for (i = 1; i <= nLockTypes; i++) { - lock->holders[i] -= xidLook->holders[i]; - lock->activeHolders[i] -= xidLook->holders[i]; - if (! lock->activeHolders[i]) - lock->mask &= BITS_OFF[i]; + lock->holders[i] -= xidLook->holders[i]; + lock->activeHolders[i] -= xidLook->holders[i]; + if (!lock->activeHolders[i]) + lock->mask &= BITS_OFF[i]; } } - else + else { - /* -------------- - * set nHolding to zero so that we can garbage collect the lock - * down below... - * -------------- - */ - lock->nHolding = 0; + /* -------------- + * set nHolding to zero so that we can garbage collect the lock + * down below... + * -------------- + */ + lock->nHolding = 0; } - /* ---------------- - * always remove the xidLookup entry, we're done with it now - * ---------------- - */ + /* ---------------- + * always remove the xidLookup entry, we're done with it now + * ---------------- + */ #ifdef USER_LOCKS - SHMQueueDelete(&xidLook->queue); + SHMQueueDelete(&xidLook->queue); #endif - if ((! hash_search(ltable->xidHash, (Pointer)xidLook, HASH_REMOVE, &found)) - || !found) + if ((!hash_search(ltable->xidHash, (Pointer) xidLook, HASH_REMOVE, &found)) + || !found) { - SpinRelease(masterLock); + SpinRelease(masterLock); #ifdef USER_LOCKS - elog(NOTICE,"LockReleaseAll: xid table corrupted"); + elog(NOTICE, "LockReleaseAll: xid table corrupted"); #else - elog(NOTICE,"LockReplace: xid table corrupted"); + elog(NOTICE, "LockReplace: xid table corrupted"); #endif - return(FALSE); + return (FALSE); } - - if (! lock->nHolding) + + if (!lock->nHolding) { - /* -------------------- - * if there's no one waiting in the queue, we've just released - * the last lock. - * -------------------- - */ - - Assert( ltable->lockHash->hash == tag_hash); - lock = (LOCK *) - hash_search(ltable->lockHash,(Pointer)&(lock->tag),HASH_REMOVE, &found); - if ((! lock) || (!found)) + /* -------------------- + * if there's no one waiting in the queue, we've just released + * the last lock. + * -------------------- + */ + + Assert(ltable->lockHash->hash == tag_hash); + lock = (LOCK *) + hash_search(ltable->lockHash, (Pointer) & (lock->tag), HASH_REMOVE, &found); + if ((!lock) || (!found)) { - SpinRelease(masterLock); + SpinRelease(masterLock); #ifdef USER_LOCKS - elog(NOTICE,"LockReleaseAll: cannot remove lock from HTAB"); + elog(NOTICE, "LockReleaseAll: cannot remove lock from HTAB"); #else - elog(NOTICE,"LockReplace: cannot remove lock from HTAB"); + elog(NOTICE, "LockReplace: cannot remove lock from HTAB"); #endif - return(FALSE); + return (FALSE); } } - else + else { - /* -------------------- - * Wake the first waiting process and grant him the lock if it - * doesn't conflict. The woken process must record the lock - * him/herself. - * -------------------- - */ - waitQueue = &(lock->waitProcs); - ProcLockWakeup(waitQueue, (char *) ltable, (char *) lock); + /* -------------------- + * Wake the first waiting process and grant him the lock if it + * doesn't conflict. The woken process must record the lock + * him/herself. + * -------------------- + */ + waitQueue = &(lock->waitProcs); + ProcLockWakeup(waitQueue, (char *) ltable, (char *) lock); } - + #ifdef USER_LOCKS - next_item: +next_item: #endif - if (done) - break; - SHMQueueFirst(&xidLook->queue,(Pointer*)&tmp,&tmp->queue); - xidLook = tmp; + if (done) + break; + SHMQueueFirst(&xidLook->queue, (Pointer *) & tmp, &tmp->queue); + xidLook = tmp; } - SpinRelease(masterLock); + SpinRelease(masterLock); #ifdef USER_LOCKS - /* - * Reinitialize the queue only if nothing has been left in. - */ - if (nskip == 0) + + /* + * Reinitialize the queue only if nothing has been left in. + */ + if (nskip == 0) #endif - SHMQueueInit(lockQueue); - return TRUE; + SHMQueueInit(lockQueue); + return TRUE; } int LockShmemSize() { - int size = 0; - int nLockBuckets, nLockSegs; - int nXidBuckets, nXidSegs; - - nLockBuckets = 1 << (int)my_log2((NLOCKENTS - 1) / DEF_FFACTOR + 1); - nLockSegs = 1 << (int)my_log2((nLockBuckets - 1) / DEF_SEGSIZE + 1); - - nXidBuckets = 1 << (int)my_log2((NLOCKS_PER_XACT-1) / DEF_FFACTOR + 1); - nXidSegs = 1 << (int)my_log2((nLockBuckets - 1) / DEF_SEGSIZE + 1); - - size += MAXALIGN(NBACKENDS * sizeof(PROC)); /* each MyProc */ - size += MAXALIGN(NBACKENDS * sizeof(LOCKCTL)); /* each ltable->ctl */ - size += MAXALIGN(sizeof(PROC_HDR)); /* ProcGlobal */ - - size += MAXALIGN(my_log2(NLOCKENTS) * sizeof(void *)); - size += MAXALIGN(sizeof(HHDR)); - size += nLockSegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - size += NLOCKENTS * /* XXX not multiple of BUCKET_ALLOC_INCR? */ - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(sizeof(LOCK))); /* contains hash key */ - - size += MAXALIGN(my_log2(NBACKENDS) * sizeof(void *)); - size += MAXALIGN(sizeof(HHDR)); - size += nXidSegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - size += NBACKENDS * /* XXX not multiple of BUCKET_ALLOC_INCR? */ - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(sizeof(XIDLookupEnt))); /* contains hash key */ - - return size; + int size = 0; + int nLockBuckets, + nLockSegs; + int nXidBuckets, + nXidSegs; + + nLockBuckets = 1 << (int) my_log2((NLOCKENTS - 1) / DEF_FFACTOR + 1); + nLockSegs = 1 << (int) my_log2((nLockBuckets - 1) / DEF_SEGSIZE + 1); + + nXidBuckets = 1 << (int) my_log2((NLOCKS_PER_XACT - 1) / DEF_FFACTOR + 1); + nXidSegs = 1 << (int) my_log2((nLockBuckets - 1) / DEF_SEGSIZE + 1); + + size += MAXALIGN(NBACKENDS * sizeof(PROC)); /* each MyProc */ + size += MAXALIGN(NBACKENDS * sizeof(LOCKCTL)); /* each ltable->ctl */ + size += MAXALIGN(sizeof(PROC_HDR)); /* ProcGlobal */ + + size += MAXALIGN(my_log2(NLOCKENTS) * sizeof(void *)); + size += MAXALIGN(sizeof(HHDR)); + size += nLockSegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + size += NLOCKENTS * /* XXX not multiple of BUCKET_ALLOC_INCR? */ + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(sizeof(LOCK))); /* contains hash key */ + + size += MAXALIGN(my_log2(NBACKENDS) * sizeof(void *)); + size += MAXALIGN(sizeof(HHDR)); + size += nXidSegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + size += NBACKENDS * /* XXX not multiple of BUCKET_ALLOC_INCR? */ + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(sizeof(XIDLookupEnt))); /* contains hash key */ + + return size; } /* ----------------- @@ -1354,7 +1399,7 @@ LockShmemSize() bool LockingDisabled() { - return LockingIsDisabled; + return LockingIsDisabled; } #ifdef DEADLOCK_DEBUG @@ -1364,67 +1409,71 @@ LockingDisabled() void DumpLocks() { - SHMEM_OFFSET location; - PROC *proc; - SHM_QUEUE *lockQueue; - int done; - XIDLookupEnt *xidLook = NULL; - XIDLookupEnt *tmp = NULL; - SHMEM_OFFSET end; - SPINLOCK masterLock; - int nLockTypes; - LOCK *lock; - int pid, count; - int tableId = 1; - LOCKTAB *ltable; - - pid = getpid(); - ShmemPIDLookup(pid,&location); - if (location == INVALID_OFFSET) - return; - proc = (PROC *) MAKE_PTR(location); - if (proc != MyProc) - return; - lockQueue = &proc->lockQueue; - - Assert (tableId < NumTables); - ltable = AllTables[tableId]; - if (!ltable) - return; - - nLockTypes = ltable->ctl->nLockTypes; - masterLock = ltable->ctl->masterLock; - - if (SHMQueueEmpty(lockQueue)) - return; - - SHMQueueFirst(lockQueue,(Pointer*)&xidLook,&xidLook->queue); - end = MAKE_OFFSET(lockQueue); - - LOCK_DUMP("DumpLocks", MyProc->waitLock, 0); - XID_PRINT("DumpLocks", xidLook); - - for (count=0;;) { - /* --------------------------- - * XXX Here we assume the shared memory queue is circular and - * that we know its internal structure. Should have some sort of - * macros to allow one to walk it. mer 20 July 1991 - * --------------------------- - */ - done = (xidLook->queue.next == end); - lock = (LOCK *) MAKE_PTR(xidLook->tag.lock); - - LOCK_DUMP("DumpLocks",lock,0); - - if (count++ > 2000) { - elog(NOTICE,"DumpLocks: xid loop detected, giving up"); - break; + SHMEM_OFFSET location; + PROC *proc; + SHM_QUEUE *lockQueue; + int done; + XIDLookupEnt *xidLook = NULL; + XIDLookupEnt *tmp = NULL; + SHMEM_OFFSET end; + SPINLOCK masterLock; + int nLockTypes; + LOCK *lock; + int pid, + count; + int tableId = 1; + LOCKTAB *ltable; + + pid = getpid(); + ShmemPIDLookup(pid, &location); + if (location == INVALID_OFFSET) + return; + proc = (PROC *) MAKE_PTR(location); + if (proc != MyProc) + return; + lockQueue = &proc->lockQueue; + + Assert(tableId < NumTables); + ltable = AllTables[tableId]; + if (!ltable) + return; + + nLockTypes = ltable->ctl->nLockTypes; + masterLock = ltable->ctl->masterLock; + + if (SHMQueueEmpty(lockQueue)) + return; + + SHMQueueFirst(lockQueue, (Pointer *) & xidLook, &xidLook->queue); + end = MAKE_OFFSET(lockQueue); + + LOCK_DUMP("DumpLocks", MyProc->waitLock, 0); + XID_PRINT("DumpLocks", xidLook); + + for (count = 0;;) + { + /* --------------------------- + * XXX Here we assume the shared memory queue is circular and + * that we know its internal structure. Should have some sort of + * macros to allow one to walk it. mer 20 July 1991 + * --------------------------- + */ + done = (xidLook->queue.next == end); + lock = (LOCK *) MAKE_PTR(xidLook->tag.lock); + + LOCK_DUMP("DumpLocks", lock, 0); + + if (count++ > 2000) + { + elog(NOTICE, "DumpLocks: xid loop detected, giving up"); + break; + } + + if (done) + break; + SHMQueueFirst(&xidLook->queue, (Pointer *) & tmp, &tmp->queue); + xidLook = tmp; } - - if (done) - break; - SHMQueueFirst(&xidLook->queue,(Pointer*)&tmp,&tmp->queue); - xidLook = tmp; - } } + #endif diff --git a/src/backend/storage/lmgr/multi.c b/src/backend/storage/lmgr/multi.c index 9cd3a36b48c..d5466fce04c 100644 --- a/src/backend/storage/lmgr/multi.c +++ b/src/backend/storage/lmgr/multi.c @@ -1,22 +1,22 @@ /*------------------------------------------------------------------------- * * multi.c-- - * multi level lock table manager + * multi level lock table manager * - * Standard multi-level lock manager as per the Gray paper - * (at least, that is what it is supposed to be). We implement - * three levels -- RELN, PAGE, TUPLE. Tuple is actually TID - * a physical record pointer. It isn't an object id. + * Standard multi-level lock manager as per the Gray paper + * (at least, that is what it is supposed to be). We implement + * three levels -- RELN, PAGE, TUPLE. Tuple is actually TID + * a physical record pointer. It isn't an object id. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.4 1997/08/19 21:33:25 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.5 1997/09/07 04:49:02 momjian Exp $ * * NOTES: - * (1) The lock.c module assumes that the caller here is doing - * two phase locking. + * (1) The lock.c module assumes that the caller here is doing + * two phase locking. * *------------------------------------------------------------------------- */ @@ -27,53 +27,59 @@ #include "storage/multilev.h" #include "utils/rel.h" -#include "miscadmin.h" /* MyDatabaseId */ +#include "miscadmin.h" /* MyDatabaseId */ -static bool MultiAcquire(LockTableId tableId, LOCKTAG *tag, LOCKT lockt, +static bool +MultiAcquire(LockTableId tableId, LOCKTAG * tag, LOCKT lockt, LOCK_LEVEL level); -static bool MultiRelease(LockTableId tableId, LOCKTAG *tag, LOCKT lockt, +static bool +MultiRelease(LockTableId tableId, LOCKTAG * tag, LOCKT lockt, LOCK_LEVEL level); /* * INTENT indicates to higher level that a lower level lock has been - * set. For example, a write lock on a tuple conflicts with a write - * lock on a relation. This conflict is detected as a WRITE_INTENT/ + * set. For example, a write lock on a tuple conflicts with a write + * lock on a relation. This conflict is detected as a WRITE_INTENT/ * WRITE conflict between the tuple's intent lock and the relation's * write lock. */ -static int MultiConflicts[] = { - (int)NULL, - /* All reads and writes at any level conflict with a write lock */ - (1 << WRITE_LOCK)|(1 << WRITE_INTENT)|(1 << READ_LOCK)|(1 << READ_INTENT), - /* read locks conflict with write locks at curr and lower levels */ - (1 << WRITE_LOCK)| (1 << WRITE_INTENT), - /* write intent locks */ - (1 << READ_LOCK) | (1 << WRITE_LOCK), - /* read intent locks*/ - (1 << WRITE_LOCK), - /* extend locks for archive storage manager conflict only w/extend locks */ - (1 << EXTEND_LOCK) +static int MultiConflicts[] = { + (int) NULL, + /* All reads and writes at any level conflict with a write lock */ + (1 << WRITE_LOCK) | (1 << WRITE_INTENT) | (1 << READ_LOCK) | (1 << READ_INTENT), + /* read locks conflict with write locks at curr and lower levels */ + (1 << WRITE_LOCK) | (1 << WRITE_INTENT), + /* write intent locks */ + (1 << READ_LOCK) | (1 << WRITE_LOCK), + /* read intent locks */ + (1 << WRITE_LOCK), + + /* + * extend locks for archive storage manager conflict only w/extend + * locks + */ + (1 << EXTEND_LOCK) }; /* * write locks have higher priority than read locks and extend locks. May * want to treat INTENT locks differently. */ -static int MultiPrios[] = { - (int)NULL, - 2, - 1, - 2, - 1, - 1 +static int MultiPrios[] = { + (int) NULL, + 2, + 1, + 2, + 1, + 1 }; -/* +/* * Lock table identifier for this lock table. The multi-level * lock table is ONE lock table, not three. */ -LockTableId MultiTableId = (LockTableId)NULL; -LockTableId ShortTermTableId = (LockTableId)NULL; +LockTableId MultiTableId = (LockTableId) NULL; +LockTableId ShortTermTableId = (LockTableId) NULL; /* * Create the lock table described by MultiConflicts and Multiprio. @@ -81,30 +87,31 @@ LockTableId ShortTermTableId = (LockTableId)NULL; LockTableId InitMultiLevelLockm() { - int tableId; - - /* ----------------------- - * If we're already initialized just return the table id. - * ----------------------- - */ - if (MultiTableId) + int tableId; + + /* ----------------------- + * If we're already initialized just return the table id. + * ----------------------- + */ + if (MultiTableId) + return MultiTableId; + + tableId = LockTabInit("LockTable", MultiConflicts, MultiPrios, 5); + MultiTableId = tableId; + if (!(MultiTableId)) + { + elog(WARN, "InitMultiLockm: couldnt initialize lock table"); + } + /* ----------------------- + * No short term lock table for now. -Jeff 15 July 1991 + * + * ShortTermTableId = LockTabRename(tableId); + * if (! (ShortTermTableId)) { + * elog(WARN,"InitMultiLockm: couldnt rename lock table"); + * } + * ----------------------- + */ return MultiTableId; - - tableId = LockTabInit("LockTable", MultiConflicts, MultiPrios, 5); - MultiTableId = tableId; - if (! (MultiTableId)) { - elog(WARN,"InitMultiLockm: couldnt initialize lock table"); - } - /* ----------------------- - * No short term lock table for now. -Jeff 15 July 1991 - * - * ShortTermTableId = LockTabRename(tableId); - * if (! (ShortTermTableId)) { - * elog(WARN,"InitMultiLockm: couldnt rename lock table"); - * } - * ----------------------- - */ - return MultiTableId; } /* @@ -115,16 +122,16 @@ InitMultiLevelLockm() bool MultiLockReln(LockInfo linfo, LOCKT lockt) { - LOCKTAG tag; - - /* LOCKTAG has two bytes of padding, unfortunately. The - * hash function will return miss if the padding bytes aren't - * zero'd. - */ - memset(&tag,0,sizeof(tag)); - tag.relId = linfo->lRelId.relId; - tag.dbId = linfo->lRelId.dbId; - return(MultiAcquire(MultiTableId, &tag, lockt, RELN_LEVEL)); + LOCKTAG tag; + + /* + * LOCKTAG has two bytes of padding, unfortunately. The hash function + * will return miss if the padding bytes aren't zero'd. + */ + memset(&tag, 0, sizeof(tag)); + tag.relId = linfo->lRelId.relId; + tag.dbId = linfo->lRelId.dbId; + return (MultiAcquire(MultiTableId, &tag, lockt, RELN_LEVEL)); } /* @@ -133,25 +140,25 @@ MultiLockReln(LockInfo linfo, LOCKT lockt) * Returns: TRUE if lock is set, FALSE otherwise. * * Side Effects: causes intention level locks to be set - * at the page and relation level. + * at the page and relation level. */ bool MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt) { - LOCKTAG tag; - - /* LOCKTAG has two bytes of padding, unfortunately. The - * hash function will return miss if the padding bytes aren't - * zero'd. - */ - memset(&tag,0,sizeof(tag)); - - tag.relId = linfo->lRelId.relId; - tag.dbId = linfo->lRelId.dbId; - - /* not locking any valid Tuple, just the page */ - tag.tupleId = *tidPtr; - return(MultiAcquire(MultiTableId, &tag, lockt, TUPLE_LEVEL)); + LOCKTAG tag; + + /* + * LOCKTAG has two bytes of padding, unfortunately. The hash function + * will return miss if the padding bytes aren't zero'd. + */ + memset(&tag, 0, sizeof(tag)); + + tag.relId = linfo->lRelId.relId; + tag.dbId = linfo->lRelId.dbId; + + /* not locking any valid Tuple, just the page */ + tag.tupleId = *tidPtr; + return (MultiAcquire(MultiTableId, &tag, lockt, TUPLE_LEVEL)); } /* @@ -160,28 +167,28 @@ MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt) bool MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt) { - LOCKTAG tag; - - /* LOCKTAG has two bytes of padding, unfortunately. The - * hash function will return miss if the padding bytes aren't - * zero'd. - */ - memset(&tag,0,sizeof(tag)); - - - /* ---------------------------- - * Now we want to set the page offset to be invalid - * and lock the block. There is some confusion here as to what - * a page is. In Postgres a page is an 8k block, however this - * block may be partitioned into many subpages which are sometimes - * also called pages. The term is overloaded, so don't be fooled - * when we say lock the page we mean the 8k block. -Jeff 16 July 1991 - * ---------------------------- - */ - tag.relId = linfo->lRelId.relId; - tag.dbId = linfo->lRelId.dbId; - BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid)); - return(MultiAcquire(MultiTableId, &tag, lockt, PAGE_LEVEL)); + LOCKTAG tag; + + /* + * LOCKTAG has two bytes of padding, unfortunately. The hash function + * will return miss if the padding bytes aren't zero'd. + */ + memset(&tag, 0, sizeof(tag)); + + + /* ---------------------------- + * Now we want to set the page offset to be invalid + * and lock the block. There is some confusion here as to what + * a page is. In Postgres a page is an 8k block, however this + * block may be partitioned into many subpages which are sometimes + * also called pages. The term is overloaded, so don't be fooled + * when we say lock the page we mean the 8k block. -Jeff 16 July 1991 + * ---------------------------- + */ + tag.relId = linfo->lRelId.relId; + tag.dbId = linfo->lRelId.dbId; + BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid)); + return (MultiAcquire(MultiTableId, &tag, lockt, PAGE_LEVEL)); } /* @@ -190,102 +197,110 @@ MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt) * Returns: TRUE if lock is set, FALSE if not * Side Effects: */ -static bool +static bool MultiAcquire(LockTableId tableId, - LOCKTAG *tag, - LOCKT lockt, - LOCK_LEVEL level) + LOCKTAG * tag, + LOCKT lockt, + LOCK_LEVEL level) { - LOCKT locks[N_LEVELS]; - int i,status; - LOCKTAG xxTag, *tmpTag = &xxTag; - int retStatus = TRUE; - - /* - * Three levels implemented. If we set a low level (e.g. Tuple) - * lock, we must set INTENT locks on the higher levels. The - * intent lock detects conflicts between the low level lock - * and an existing high level lock. For example, setting a - * write lock on a tuple in a relation is disallowed if there - * is an existing read lock on the entire relation. The - * write lock would set a WRITE + INTENT lock on the relation - * and that lock would conflict with the read. - */ - switch (level) { - case RELN_LEVEL: - locks[0] = lockt; - locks[1] = NO_LOCK; - locks[2] = NO_LOCK; - break; - case PAGE_LEVEL: - locks[0] = lockt + INTENT; - locks[1] = lockt; - locks[2] = NO_LOCK; - break; - case TUPLE_LEVEL: - locks[0] = lockt + INTENT; - locks[1] = lockt + INTENT; - locks[2] = lockt; - break; - default: - elog(WARN,"MultiAcquire: bad lock level"); - return(FALSE); - } - - /* - * construct a new tag as we go. Always loop through all levels, - * but if we arent' seting a low level lock, locks[i] is set to - * NO_LOCK for the lower levels. Always start from the highest - * level and go to the lowest level. - */ - memset(tmpTag,0,sizeof(*tmpTag)); - tmpTag->relId = tag->relId; - tmpTag->dbId = tag->dbId; - - for (i=0;i<N_LEVELS;i++) { - if (locks[i] != NO_LOCK) { - switch (i) { - case RELN_LEVEL: - /* ------------- - * Set the block # and offset to invalid - * ------------- - */ - BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber); - tmpTag->tupleId.ip_posid = InvalidOffsetNumber; - break; - case PAGE_LEVEL: - /* ------------- - * Copy the block #, set the offset to invalid - * ------------- - */ - BlockIdCopy(&(tmpTag->tupleId.ip_blkid), - &(tag->tupleId.ip_blkid)); - tmpTag->tupleId.ip_posid = InvalidOffsetNumber; + LOCKT locks[N_LEVELS]; + int i, + status; + LOCKTAG xxTag, + *tmpTag = &xxTag; + int retStatus = TRUE; + + /* + * Three levels implemented. If we set a low level (e.g. Tuple) lock, + * we must set INTENT locks on the higher levels. The intent lock + * detects conflicts between the low level lock and an existing high + * level lock. For example, setting a write lock on a tuple in a + * relation is disallowed if there is an existing read lock on the + * entire relation. The write lock would set a WRITE + INTENT lock on + * the relation and that lock would conflict with the read. + */ + switch (level) + { + case RELN_LEVEL: + locks[0] = lockt; + locks[1] = NO_LOCK; + locks[2] = NO_LOCK; break; - case TUPLE_LEVEL: - /* -------------- - * Copy the entire tuple id. - * -------------- - */ - ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId); + case PAGE_LEVEL: + locks[0] = lockt + INTENT; + locks[1] = lockt; + locks[2] = NO_LOCK; break; - } - - status = LockAcquire(tableId, tmpTag, locks[i]); - if (! status) { - /* failed for some reason. Before returning we have - * to release all of the locks we just acquired. - * MultiRelease(xx,xx,xx, i) means release starting from - * the last level lock we successfully acquired - */ - retStatus = FALSE; - MultiRelease(tableId, tag, lockt, i); - /* now leave the loop. Don't try for any more locks */ + case TUPLE_LEVEL: + locks[0] = lockt + INTENT; + locks[1] = lockt + INTENT; + locks[2] = lockt; break; - } + default: + elog(WARN, "MultiAcquire: bad lock level"); + return (FALSE); } - } - return(retStatus); + + /* + * construct a new tag as we go. Always loop through all levels, but + * if we arent' seting a low level lock, locks[i] is set to NO_LOCK + * for the lower levels. Always start from the highest level and go + * to the lowest level. + */ + memset(tmpTag, 0, sizeof(*tmpTag)); + tmpTag->relId = tag->relId; + tmpTag->dbId = tag->dbId; + + for (i = 0; i < N_LEVELS; i++) + { + if (locks[i] != NO_LOCK) + { + switch (i) + { + case RELN_LEVEL: + /* ------------- + * Set the block # and offset to invalid + * ------------- + */ + BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber); + tmpTag->tupleId.ip_posid = InvalidOffsetNumber; + break; + case PAGE_LEVEL: + /* ------------- + * Copy the block #, set the offset to invalid + * ------------- + */ + BlockIdCopy(&(tmpTag->tupleId.ip_blkid), + &(tag->tupleId.ip_blkid)); + tmpTag->tupleId.ip_posid = InvalidOffsetNumber; + break; + case TUPLE_LEVEL: + /* -------------- + * Copy the entire tuple id. + * -------------- + */ + ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId); + break; + } + + status = LockAcquire(tableId, tmpTag, locks[i]); + if (!status) + { + + /* + * failed for some reason. Before returning we have to + * release all of the locks we just acquired. + * MultiRelease(xx,xx,xx, i) means release starting from + * the last level lock we successfully acquired + */ + retStatus = FALSE; + MultiRelease(tableId, tag, lockt, i); + /* now leave the loop. Don't try for any more locks */ + break; + } + } + } + return (retStatus); } /* ------------------ @@ -294,24 +309,25 @@ MultiAcquire(LockTableId tableId, */ #ifdef NOT_USED bool -MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt) +MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt) { - LOCKTAG tag; - - /* ------------------ - * LOCKTAG has two bytes of padding, unfortunately. The - * hash function will return miss if the padding bytes aren't - * zero'd. - * ------------------ - */ - memset(&tag, 0,sizeof(LOCKTAG)); - - tag.relId = linfo->lRelId.relId; - tag.dbId = linfo->lRelId.dbId; - BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid)); - - return (MultiRelease(MultiTableId, &tag, lockt, PAGE_LEVEL)); + LOCKTAG tag; + + /* ------------------ + * LOCKTAG has two bytes of padding, unfortunately. The + * hash function will return miss if the padding bytes aren't + * zero'd. + * ------------------ + */ + memset(&tag, 0, sizeof(LOCKTAG)); + + tag.relId = linfo->lRelId.relId; + tag.dbId = linfo->lRelId.dbId; + BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid)); + + return (MultiRelease(MultiTableId, &tag, lockt, PAGE_LEVEL)); } + #endif /* ------------------ @@ -319,21 +335,21 @@ MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKT lockt) * ------------------ */ bool -MultiReleaseReln(LockInfo linfo, LOCKT lockt) +MultiReleaseReln(LockInfo linfo, LOCKT lockt) { - LOCKTAG tag; - - /* ------------------ - * LOCKTAG has two bytes of padding, unfortunately. The - * hash function will return miss if the padding bytes aren't - * zero'd. - * ------------------ - */ - memset(&tag, 0, sizeof(LOCKTAG)); - tag.relId = linfo->lRelId.relId; - tag.dbId = linfo->lRelId.dbId; - - return (MultiRelease(MultiTableId, &tag, lockt, RELN_LEVEL)); + LOCKTAG tag; + + /* ------------------ + * LOCKTAG has two bytes of padding, unfortunately. The + * hash function will return miss if the padding bytes aren't + * zero'd. + * ------------------ + */ + memset(&tag, 0, sizeof(LOCKTAG)); + tag.relId = linfo->lRelId.relId; + tag.dbId = linfo->lRelId.dbId; + + return (MultiRelease(MultiTableId, &tag, lockt, RELN_LEVEL)); } /* @@ -341,81 +357,88 @@ MultiReleaseReln(LockInfo linfo, LOCKT lockt) * * Returns: TRUE if successful, FALSE otherwise. */ -static bool +static bool MultiRelease(LockTableId tableId, - LOCKTAG *tag, - LOCKT lockt, - LOCK_LEVEL level) + LOCKTAG * tag, + LOCKT lockt, + LOCK_LEVEL level) { - LOCKT locks[N_LEVELS]; - int i,status; - LOCKTAG xxTag, *tmpTag = &xxTag; - - /* - * same level scheme as MultiAcquire(). - */ - switch (level) { - case RELN_LEVEL: - locks[0] = lockt; - locks[1] = NO_LOCK; - locks[2] = NO_LOCK; - break; - case PAGE_LEVEL: - locks[0] = lockt + INTENT; - locks[1] = lockt; - locks[2] = NO_LOCK; - break; - case TUPLE_LEVEL: - locks[0] = lockt + INTENT; - locks[1] = lockt + INTENT; - locks[2] = lockt; - break; - default: - elog(WARN,"MultiRelease: bad lockt"); - } - - /* - * again, construct the tag on the fly. This time, however, - * we release the locks in the REVERSE order -- from lowest - * level to highest level. - * - * Must zero out the tag to set padding byes to zero and ensure - * hashing consistency. - */ - memset(tmpTag, 0, sizeof(*tmpTag)); - tmpTag->relId = tag->relId; - tmpTag->dbId = tag->dbId; - - for (i=(N_LEVELS-1); i>=0; i--) { - if (locks[i] != NO_LOCK) { - switch (i) { - case RELN_LEVEL: - /* ------------- - * Set the block # and offset to invalid - * ------------- - */ - BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber); - tmpTag->tupleId.ip_posid = InvalidOffsetNumber; + LOCKT locks[N_LEVELS]; + int i, + status; + LOCKTAG xxTag, + *tmpTag = &xxTag; + + /* + * same level scheme as MultiAcquire(). + */ + switch (level) + { + case RELN_LEVEL: + locks[0] = lockt; + locks[1] = NO_LOCK; + locks[2] = NO_LOCK; break; - case PAGE_LEVEL: - /* ------------- - * Copy the block #, set the offset to invalid - * ------------- - */ - BlockIdCopy(&(tmpTag->tupleId.ip_blkid), - &(tag->tupleId.ip_blkid)); - tmpTag->tupleId.ip_posid = InvalidOffsetNumber; + case PAGE_LEVEL: + locks[0] = lockt + INTENT; + locks[1] = lockt; + locks[2] = NO_LOCK; break; - case TUPLE_LEVEL: - ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId); + case TUPLE_LEVEL: + locks[0] = lockt + INTENT; + locks[1] = lockt + INTENT; + locks[2] = lockt; break; - } - status = LockRelease(tableId, tmpTag, locks[i]); - if (! status) { - elog(WARN,"MultiRelease: couldn't release after error"); - } + default: + elog(WARN, "MultiRelease: bad lockt"); + } + + /* + * again, construct the tag on the fly. This time, however, we + * release the locks in the REVERSE order -- from lowest level to + * highest level. + * + * Must zero out the tag to set padding byes to zero and ensure hashing + * consistency. + */ + memset(tmpTag, 0, sizeof(*tmpTag)); + tmpTag->relId = tag->relId; + tmpTag->dbId = tag->dbId; + + for (i = (N_LEVELS - 1); i >= 0; i--) + { + if (locks[i] != NO_LOCK) + { + switch (i) + { + case RELN_LEVEL: + /* ------------- + * Set the block # and offset to invalid + * ------------- + */ + BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber); + tmpTag->tupleId.ip_posid = InvalidOffsetNumber; + break; + case PAGE_LEVEL: + /* ------------- + * Copy the block #, set the offset to invalid + * ------------- + */ + BlockIdCopy(&(tmpTag->tupleId.ip_blkid), + &(tag->tupleId.ip_blkid)); + tmpTag->tupleId.ip_posid = InvalidOffsetNumber; + break; + case TUPLE_LEVEL: + ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId); + break; + } + status = LockRelease(tableId, tmpTag, locks[i]); + if (!status) + { + elog(WARN, "MultiRelease: couldn't release after error"); + } + } } - } - /* shouldn't reach here */ - return false; + /* shouldn't reach here */ + return false; } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 528bfa1e35d..e3872091dfc 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1,25 +1,25 @@ /*------------------------------------------------------------------------- * * proc.c-- - * routines to manage per-process shared memory data structure + * routines to manage per-process shared memory data structure * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.18 1997/08/19 21:33:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.19 1997/09/07 04:49:03 momjian Exp $ * *------------------------------------------------------------------------- */ /* - * Each postgres backend gets one of these. We'll use it to - * clean up after the process should the process suddenly die. + * Each postgres backend gets one of these. We'll use it to + * clean up after the process should the process suddenly die. * * * Interface (a): - * ProcSleep(), ProcWakeup(), ProcWakeupNext(), - * ProcQueueAlloc() -- create a shm queue for sleeping processes - * ProcQueueInit() -- create a queue without allocing memory + * ProcSleep(), ProcWakeup(), ProcWakeupNext(), + * ProcQueueAlloc() -- create a shm queue for sleeping processes + * ProcQueueInit() -- create a queue without allocing memory * * Locking and waiting for buffers can cause the backend to be * put to sleep. Whoever releases the lock, etc. wakes the @@ -30,23 +30,23 @@ * * ProcReleaseLocks -- frees the locks associated with this process, * ProcKill -- destroys the shared memory state (and locks) - * associated with the process. + * associated with the process. * * 5/15/91 -- removed the buffer pool based lock chain in favor - * of a shared memory lock chain. The write-protection is - * more expensive if the lock chain is in the buffer pool. - * The only reason I kept the lock chain in the buffer pool - * in the first place was to allow the lock table to grow larger - * than available shared memory and that isn't going to work - * without a lot of unimplemented support anyway. + * of a shared memory lock chain. The write-protection is + * more expensive if the lock chain is in the buffer pool. + * The only reason I kept the lock chain in the buffer pool + * in the first place was to allow the lock table to grow larger + * than available shared memory and that isn't going to work + * without a lot of unimplemented support anyway. * * 4/7/95 -- instead of allocating a set of 1 semaphore per process, we - * allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores - * shared among backends (we keep a few sets of semaphores around). - * This is so that we can support more backends. (system-wide semaphore - * sets run out pretty fast.) -ay 4/95 + * allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores + * shared among backends (we keep a few sets of semaphores around). + * This is so that we can support more backends. (system-wide semaphore + * sets run out pretty fast.) -ay 4/95 * - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.18 1997/08/19 21:33:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.19 1997/09/07 04:49:03 momjian Exp $ */ #include <sys/time.h> #include <unistd.h> @@ -69,21 +69,21 @@ #include "storage/ipc.h" /* In Ultrix, sem.h must be included after ipc.h */ #include <sys/sem.h> -#include "storage/buf.h" +#include "storage/buf.h" #include "storage/lock.h" #include "storage/lmgr.h" #include "storage/shmem.h" #include "storage/spin.h" #include "storage/proc.h" -static void HandleDeadLock(int sig); -static PROC *ProcWakeup(PROC *proc, int errType); +static void HandleDeadLock(int sig); +static PROC *ProcWakeup(PROC * proc, int errType); /* * timeout (in seconds) for resolving possible deadlock */ #ifndef DEADLOCK_TIMEOUT -#define DEADLOCK_TIMEOUT 60 +#define DEADLOCK_TIMEOUT 60 #endif /* -------------------- @@ -93,51 +93,52 @@ static PROC *ProcWakeup(PROC *proc, int errType); * memory. -mer 17 July 1991 * -------------------- */ -SPINLOCK ProcStructLock; +SPINLOCK ProcStructLock; /* * For cleanup routines. Don't cleanup if the initialization * has not happened. */ -static bool ProcInitialized = FALSE; +static bool ProcInitialized = FALSE; static PROC_HDR *ProcGlobal = NULL; -PROC *MyProc = NULL; +PROC *MyProc = NULL; + +static void ProcKill(int exitStatus, int pid); +static void ProcGetNewSemKeyAndNum(IPCKey * key, int *semNum); +static void ProcFreeSem(IpcSemaphoreKey semKey, int semNum); -static void ProcKill(int exitStatus, int pid); -static void ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum); -static void ProcFreeSem(IpcSemaphoreKey semKey, int semNum); /* * InitProcGlobal - - * initializes the global process table. We put it here so that - * the postmaster can do this initialization. (ProcFreeAllSem needs - * to read this table on exiting the postmaster. If we have the first - * backend do this, starting up and killing the postmaster without - * starting any backends will be a problem.) + * initializes the global process table. We put it here so that + * the postmaster can do this initialization. (ProcFreeAllSem needs + * to read this table on exiting the postmaster. If we have the first + * backend do this, starting up and killing the postmaster without + * starting any backends will be a problem.) */ void InitProcGlobal(IPCKey key) { - bool found = false; + bool found = false; - /* attach to the free list */ - ProcGlobal = (PROC_HDR *) - ShmemInitStruct("Proc Header",(unsigned)sizeof(PROC_HDR),&found); + /* attach to the free list */ + ProcGlobal = (PROC_HDR *) + ShmemInitStruct("Proc Header", (unsigned) sizeof(PROC_HDR), &found); - /* -------------------- - * We're the first - initialize. - * -------------------- - */ - if (! found) + /* -------------------- + * We're the first - initialize. + * -------------------- + */ + if (!found) { - int i; + int i; - ProcGlobal->numProcs = 0; - ProcGlobal->freeProcs = INVALID_OFFSET; - ProcGlobal->currKey = IPCGetProcessSemaphoreInitKey(key); - for (i=0; i < MAX_PROC_SEMS/PROC_NSEMS_PER_SET; i++) - ProcGlobal->freeSemMap[i] = 0; + ProcGlobal->numProcs = 0; + ProcGlobal->freeProcs = INVALID_OFFSET; + ProcGlobal->currKey = IPCGetProcessSemaphoreInitKey(key); + for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++) + ProcGlobal->freeSemMap[i] = 0; } } @@ -149,141 +150,149 @@ InitProcGlobal(IPCKey key) void InitProcess(IPCKey key) { - bool found = false; - int pid; - int semstat; - unsigned long location, myOffset; - - /* ------------------ - * Routine called if deadlock timer goes off. See ProcSleep() - * ------------------ - */ - pqsignal(SIGALRM, HandleDeadLock); - - SpinAcquire(ProcStructLock); - - /* attach to the free list */ - ProcGlobal = (PROC_HDR *) - ShmemInitStruct("Proc Header",(unsigned)sizeof(PROC_HDR),&found); - if (!found) { - /* this should not happen. InitProcGlobal() is called before this. */ - elog(WARN, "InitProcess: Proc Header uninitialized"); - } - - if (MyProc != NULL) + bool found = false; + int pid; + int semstat; + unsigned long location, + myOffset; + + /* ------------------ + * Routine called if deadlock timer goes off. See ProcSleep() + * ------------------ + */ + pqsignal(SIGALRM, HandleDeadLock); + + SpinAcquire(ProcStructLock); + + /* attach to the free list */ + ProcGlobal = (PROC_HDR *) + ShmemInitStruct("Proc Header", (unsigned) sizeof(PROC_HDR), &found); + if (!found) { - SpinRelease(ProcStructLock); - elog(WARN,"ProcInit: you already exist"); - return; + /* this should not happen. InitProcGlobal() is called before this. */ + elog(WARN, "InitProcess: Proc Header uninitialized"); } - - /* try to get a proc from the free list first */ - - myOffset = ProcGlobal->freeProcs; - - if (myOffset != INVALID_OFFSET) + + if (MyProc != NULL) { - MyProc = (PROC *) MAKE_PTR(myOffset); - ProcGlobal->freeProcs = MyProc->links.next; + SpinRelease(ProcStructLock); + elog(WARN, "ProcInit: you already exist"); + return; } - else + + /* try to get a proc from the free list first */ + + myOffset = ProcGlobal->freeProcs; + + if (myOffset != INVALID_OFFSET) { - /* have to allocate one. We can't use the normal binding - * table mechanism because the proc structure is stored - * by PID instead of by a global name (need to look it - * up by PID when we cleanup dead processes). - */ - - MyProc = (PROC *) ShmemAlloc((unsigned)sizeof(PROC)); - if (! MyProc) + MyProc = (PROC *) MAKE_PTR(myOffset); + ProcGlobal->freeProcs = MyProc->links.next; + } + else + { + + /* + * have to allocate one. We can't use the normal binding table + * mechanism because the proc structure is stored by PID instead + * of by a global name (need to look it up by PID when we cleanup + * dead processes). + */ + + MyProc = (PROC *) ShmemAlloc((unsigned) sizeof(PROC)); + if (!MyProc) { - SpinRelease(ProcStructLock); - elog (FATAL,"cannot create new proc: out of memory"); + SpinRelease(ProcStructLock); + elog(FATAL, "cannot create new proc: out of memory"); } - - /* this cannot be initialized until after the buffer pool */ - SHMQueueInit(&(MyProc->lockQueue)); - MyProc->procId = ProcGlobal->numProcs; - ProcGlobal->numProcs++; + + /* this cannot be initialized until after the buffer pool */ + SHMQueueInit(&(MyProc->lockQueue)); + MyProc->procId = ProcGlobal->numProcs; + ProcGlobal->numProcs++; } - - /* - * zero out the spin lock counts and set the sLocks field for - * ProcStructLock to 1 as we have acquired this spinlock above but - * didn't record it since we didn't have MyProc until now. - */ - memset(MyProc->sLocks, 0, sizeof(MyProc->sLocks)); - MyProc->sLocks[ProcStructLock] = 1; - - - if (IsUnderPostmaster) { - IPCKey semKey; - int semNum; - int semId; - union semun semun; - - ProcGetNewSemKeyAndNum(&semKey, &semNum); - - semId = IpcSemaphoreCreate(semKey, - PROC_NSEMS_PER_SET, - IPCProtection, - IpcSemaphoreDefaultStartValue, - 0, - &semstat); + /* - * we might be reusing a semaphore that belongs to a dead - * backend. So be careful and reinitialize its value here. + * zero out the spin lock counts and set the sLocks field for + * ProcStructLock to 1 as we have acquired this spinlock above but + * didn't record it since we didn't have MyProc until now. */ - semun.val = IpcSemaphoreDefaultStartValue; - semctl(semId, semNum, SETVAL, semun); - - IpcSemaphoreLock(semId, semNum, IpcExclusiveLock); - MyProc->sem.semId = semId; - MyProc->sem.semNum = semNum; - MyProc->sem.semKey = semKey; - } else { - MyProc->sem.semId = -1; - } - - /* ---------------------- - * Release the lock. - * ---------------------- - */ - SpinRelease(ProcStructLock); - - MyProc->pid = 0; - MyProc->xid = InvalidTransactionId; + memset(MyProc->sLocks, 0, sizeof(MyProc->sLocks)); + MyProc->sLocks[ProcStructLock] = 1; + + + if (IsUnderPostmaster) + { + IPCKey semKey; + int semNum; + int semId; + union semun semun; + + ProcGetNewSemKeyAndNum(&semKey, &semNum); + + semId = IpcSemaphoreCreate(semKey, + PROC_NSEMS_PER_SET, + IPCProtection, + IpcSemaphoreDefaultStartValue, + 0, + &semstat); + + /* + * we might be reusing a semaphore that belongs to a dead backend. + * So be careful and reinitialize its value here. + */ + semun.val = IpcSemaphoreDefaultStartValue; + semctl(semId, semNum, SETVAL, semun); + + IpcSemaphoreLock(semId, semNum, IpcExclusiveLock); + MyProc->sem.semId = semId; + MyProc->sem.semNum = semNum; + MyProc->sem.semKey = semKey; + } + else + { + MyProc->sem.semId = -1; + } + + /* ---------------------- + * Release the lock. + * ---------------------- + */ + SpinRelease(ProcStructLock); + + MyProc->pid = 0; + MyProc->xid = InvalidTransactionId; #if 0 - MyProc->pid = MyPid; + MyProc->pid = MyPid; #endif - - /* ---------------- - * Start keeping spin lock stats from here on. Any botch before - * this initialization is forever botched - * ---------------- - */ - memset(MyProc->sLocks, 0, MAX_SPINS*sizeof(*MyProc->sLocks)); - - /* ------------------------- - * Install ourselves in the binding table. The name to - * use is determined by the OS-assigned process id. That - * allows the cleanup process to find us after any untimely - * exit. - * ------------------------- - */ - pid = getpid(); - location = MAKE_OFFSET(MyProc); - if ((! ShmemPIDLookup(pid,&location)) || (location != MAKE_OFFSET(MyProc))) + + /* ---------------- + * Start keeping spin lock stats from here on. Any botch before + * this initialization is forever botched + * ---------------- + */ + memset(MyProc->sLocks, 0, MAX_SPINS * sizeof(*MyProc->sLocks)); + + /* ------------------------- + * Install ourselves in the binding table. The name to + * use is determined by the OS-assigned process id. That + * allows the cleanup process to find us after any untimely + * exit. + * ------------------------- + */ + pid = getpid(); + location = MAKE_OFFSET(MyProc); + if ((!ShmemPIDLookup(pid, &location)) || (location != MAKE_OFFSET(MyProc))) { - elog(FATAL,"InitProc: ShmemPID table broken"); + elog(FATAL, "InitProc: ShmemPID table broken"); } - - MyProc->errType = NO_ERROR; - SHMQueueElemInit(&(MyProc->links)); - - on_exitpg(ProcKill, (caddr_t)pid); - - ProcInitialized = TRUE; + + MyProc->errType = NO_ERROR; + SHMQueueElemInit(&(MyProc->links)); + + on_exitpg(ProcKill, (caddr_t) pid); + + ProcInitialized = TRUE; } /* @@ -293,109 +302,112 @@ InitProcess(IPCKey key) void ProcReleaseLocks() { - if (!MyProc) - return; - LockReleaseAll(1,&MyProc->lockQueue); + if (!MyProc) + return; + LockReleaseAll(1, &MyProc->lockQueue); } /* * ProcRemove - - * used by the postmaster to clean up the global tables. This also frees - * up the semaphore used for the lmgr of the process. (We have to do - * this is the postmaster instead of doing a IpcSemaphoreKill on exiting - * the process because the semaphore set is shared among backends and - * we don't want to remove other's semaphores on exit.) + * used by the postmaster to clean up the global tables. This also frees + * up the semaphore used for the lmgr of the process. (We have to do + * this is the postmaster instead of doing a IpcSemaphoreKill on exiting + * the process because the semaphore set is shared among backends and + * we don't want to remove other's semaphores on exit.) */ bool ProcRemove(int pid) { - SHMEM_OFFSET location; - PROC *proc; - - location = INVALID_OFFSET; - - location = ShmemPIDDestroy(pid); - if (location == INVALID_OFFSET) - return(FALSE); - proc = (PROC *) MAKE_PTR(location); - - SpinAcquire(ProcStructLock); - - ProcFreeSem(proc->sem.semKey, proc->sem.semNum); - - proc->links.next = ProcGlobal->freeProcs; - ProcGlobal->freeProcs = MAKE_OFFSET(proc); - - SpinRelease(ProcStructLock); - - return(TRUE); + SHMEM_OFFSET location; + PROC *proc; + + location = INVALID_OFFSET; + + location = ShmemPIDDestroy(pid); + if (location == INVALID_OFFSET) + return (FALSE); + proc = (PROC *) MAKE_PTR(location); + + SpinAcquire(ProcStructLock); + + ProcFreeSem(proc->sem.semKey, proc->sem.semNum); + + proc->links.next = ProcGlobal->freeProcs; + ProcGlobal->freeProcs = MAKE_OFFSET(proc); + + SpinRelease(ProcStructLock); + + return (TRUE); } /* * ProcKill() -- Destroy the per-proc data structure for - * this process. Release any of its held spin locks. + * this process. Release any of its held spin locks. */ static void ProcKill(int exitStatus, int pid) { - PROC *proc; - SHMEM_OFFSET location; - - /* -------------------- - * If this is a FATAL exit the postmaster will have to kill all the - * existing backends and reinitialize shared memory. So all we don't - * need to do anything here. - * -------------------- - */ - if (exitStatus != 0) - return; - - if (! pid) + PROC *proc; + SHMEM_OFFSET location; + + /* -------------------- + * If this is a FATAL exit the postmaster will have to kill all the + * existing backends and reinitialize shared memory. So all we don't + * need to do anything here. + * -------------------- + */ + if (exitStatus != 0) + return; + + if (!pid) { - pid = getpid(); + pid = getpid(); } - - ShmemPIDLookup(pid,&location); - if (location == INVALID_OFFSET) - return; - - proc = (PROC *) MAKE_PTR(location); - - if (proc != MyProc) { - Assert( pid != getpid() ); - } else - MyProc = NULL; - - /* --------------- - * Assume one lock table. - * --------------- - */ - ProcReleaseSpins(proc); - LockReleaseAll(1,&proc->lockQueue); - + + ShmemPIDLookup(pid, &location); + if (location == INVALID_OFFSET) + return; + + proc = (PROC *) MAKE_PTR(location); + + if (proc != MyProc) + { + Assert(pid != getpid()); + } + else + MyProc = NULL; + + /* --------------- + * Assume one lock table. + * --------------- + */ + ProcReleaseSpins(proc); + LockReleaseAll(1, &proc->lockQueue); + #ifdef USER_LOCKS - LockReleaseAll(0,&proc->lockQueue); + LockReleaseAll(0, &proc->lockQueue); #endif - /* ---------------- - * get off the wait queue - * ---------------- - */ - LockLockTable(); - if (proc->links.next != INVALID_OFFSET) { - Assert(proc->waitLock->waitProcs.size > 0); - SHMQueueDelete(&(proc->links)); - --proc->waitLock->waitProcs.size; - } - SHMQueueElemInit(&(proc->links)); - UnlockLockTable(); - - return; + /* ---------------- + * get off the wait queue + * ---------------- + */ + LockLockTable(); + if (proc->links.next != INVALID_OFFSET) + { + Assert(proc->waitLock->waitProcs.size > 0); + SHMQueueDelete(&(proc->links)); + --proc->waitLock->waitProcs.size; + } + SHMQueueElemInit(&(proc->links)); + UnlockLockTable(); + + return; } /* * ProcQueue package: routines for putting processes to sleep - * and waking them up + * and waking them up */ /* @@ -405,33 +417,34 @@ ProcKill(int exitStatus, int pid) * Side Effects: Initializes the queue if we allocated one */ #ifdef NOT_USED -PROC_QUEUE * +PROC_QUEUE * ProcQueueAlloc(char *name) { - bool found; - PROC_QUEUE *queue = (PROC_QUEUE *) - ShmemInitStruct(name,(unsigned)sizeof(PROC_QUEUE),&found); - - if (! queue) + bool found; + PROC_QUEUE *queue = (PROC_QUEUE *) + ShmemInitStruct(name, (unsigned) sizeof(PROC_QUEUE), &found); + + if (!queue) { - return(NULL); + return (NULL); } - if (! found) + if (!found) { - ProcQueueInit(queue); + ProcQueueInit(queue); } - return(queue); + return (queue); } + #endif /* * ProcQueueInit -- initialize a shared memory process queue */ void -ProcQueueInit(PROC_QUEUE *queue) +ProcQueueInit(PROC_QUEUE * queue) { - SHMQueueInit(&(queue->links)); - queue->size = 0; + SHMQueueInit(&(queue->links)); + queue->size = 0; } @@ -444,124 +457,126 @@ ProcQueueInit(PROC_QUEUE *queue) * to acquire it, we sleep. * * ASSUME: that no one will fiddle with the queue until after - * we release the spin lock. + * we release the spin lock. * * NOTES: The process queue is now a priority queue for locking. */ int -ProcSleep(PROC_QUEUE *queue, - SPINLOCK spinlock, - int token, - int prio, - LOCK *lock) +ProcSleep(PROC_QUEUE * queue, + SPINLOCK spinlock, + int token, + int prio, + LOCK * lock) { - int i; - PROC *proc; - struct itimerval timeval, dummy; - - proc = (PROC *) MAKE_PTR(queue->links.prev); - for (i=0;i<queue->size;i++) + int i; + PROC *proc; + struct itimerval timeval, + dummy; + + proc = (PROC *) MAKE_PTR(queue->links.prev); + for (i = 0; i < queue->size; i++) { - if (proc->prio < prio) - proc = (PROC *) MAKE_PTR(proc->links.prev); - else - break; + if (proc->prio < prio) + proc = (PROC *) MAKE_PTR(proc->links.prev); + else + break; } - - MyProc->prio = prio; - MyProc->token = token; - MyProc->waitLock = lock; - - /* ------------------- - * currently, we only need this for the ProcWakeup routines - * ------------------- - */ - TransactionIdStore((TransactionId) GetCurrentTransactionId(), &MyProc->xid); - - /* ------------------- - * assume that these two operations are atomic (because - * of the spinlock). - * ------------------- - */ - SHMQueueInsertTL(&(proc->links),&(MyProc->links)); - queue->size++; - - SpinRelease(spinlock); - - /* -------------- - * Postgres does not have any deadlock detection code and for this - * reason we must set a timer to wake up the process in the event of - * a deadlock. For now the timer is set for 1 minute and we assume that - * any process which sleeps for this amount of time is deadlocked and will - * receive a SIGALRM signal. The handler should release the processes - * semaphore and abort the current transaction. - * - * Need to zero out struct to set the interval and the micro seconds fields - * to 0. - * -------------- - */ - memset(&timeval, 0, sizeof(struct itimerval)); - timeval.it_value.tv_sec = DEADLOCK_TIMEOUT; - - if (setitimer(ITIMER_REAL, &timeval, &dummy)) - elog(FATAL, "ProcSleep: Unable to set timer for process wakeup"); - - /* -------------- - * if someone wakes us between SpinRelease and IpcSemaphoreLock, - * IpcSemaphoreLock will not block. The wakeup is "saved" by - * the semaphore implementation. - * -------------- - */ - IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, IpcExclusiveLock); - - /* --------------- - * We were awoken before a timeout - now disable the timer - * --------------- - */ - timeval.it_value.tv_sec = 0; - - - if (setitimer(ITIMER_REAL, &timeval, &dummy)) - elog(FATAL, "ProcSleep: Unable to diable timer for process wakeup"); - - /* ---------------- - * We were assumed to be in a critical section when we went - * to sleep. - * ---------------- - */ - SpinAcquire(spinlock); - - return(MyProc->errType); + + MyProc->prio = prio; + MyProc->token = token; + MyProc->waitLock = lock; + + /* ------------------- + * currently, we only need this for the ProcWakeup routines + * ------------------- + */ + TransactionIdStore((TransactionId) GetCurrentTransactionId(), &MyProc->xid); + + /* ------------------- + * assume that these two operations are atomic (because + * of the spinlock). + * ------------------- + */ + SHMQueueInsertTL(&(proc->links), &(MyProc->links)); + queue->size++; + + SpinRelease(spinlock); + + /* -------------- + * Postgres does not have any deadlock detection code and for this + * reason we must set a timer to wake up the process in the event of + * a deadlock. For now the timer is set for 1 minute and we assume that + * any process which sleeps for this amount of time is deadlocked and will + * receive a SIGALRM signal. The handler should release the processes + * semaphore and abort the current transaction. + * + * Need to zero out struct to set the interval and the micro seconds fields + * to 0. + * -------------- + */ + memset(&timeval, 0, sizeof(struct itimerval)); + timeval.it_value.tv_sec = DEADLOCK_TIMEOUT; + + if (setitimer(ITIMER_REAL, &timeval, &dummy)) + elog(FATAL, "ProcSleep: Unable to set timer for process wakeup"); + + /* -------------- + * if someone wakes us between SpinRelease and IpcSemaphoreLock, + * IpcSemaphoreLock will not block. The wakeup is "saved" by + * the semaphore implementation. + * -------------- + */ + IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum, IpcExclusiveLock); + + /* --------------- + * We were awoken before a timeout - now disable the timer + * --------------- + */ + timeval.it_value.tv_sec = 0; + + + if (setitimer(ITIMER_REAL, &timeval, &dummy)) + elog(FATAL, "ProcSleep: Unable to diable timer for process wakeup"); + + /* ---------------- + * We were assumed to be in a critical section when we went + * to sleep. + * ---------------- + */ + SpinAcquire(spinlock); + + return (MyProc->errType); } /* * ProcWakeup -- wake up a process by releasing its private semaphore. * - * remove the process from the wait queue and set its links invalid. - * RETURN: the next process in the wait queue. + * remove the process from the wait queue and set its links invalid. + * RETURN: the next process in the wait queue. */ -static PROC * -ProcWakeup(PROC *proc, int errType) +static PROC * +ProcWakeup(PROC * proc, int errType) { - PROC *retProc; - /* assume that spinlock has been acquired */ - - if (proc->links.prev == INVALID_OFFSET || - proc->links.next == INVALID_OFFSET) - return((PROC *) NULL); - - retProc = (PROC *) MAKE_PTR(proc->links.prev); - - /* you have to update waitLock->waitProcs.size yourself */ - SHMQueueDelete(&(proc->links)); - SHMQueueElemInit(&(proc->links)); - - proc->errType = errType; - - IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum, IpcExclusiveLock); - - return retProc; + PROC *retProc; + + /* assume that spinlock has been acquired */ + + if (proc->links.prev == INVALID_OFFSET || + proc->links.next == INVALID_OFFSET) + return ((PROC *) NULL); + + retProc = (PROC *) MAKE_PTR(proc->links.prev); + + /* you have to update waitLock->waitProcs.size yourself */ + SHMQueueDelete(&(proc->links)); + SHMQueueElemInit(&(proc->links)); + + proc->errType = errType; + + IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum, IpcExclusiveLock); + + return retProc; } @@ -572,61 +587,64 @@ ProcWakeup(PROC *proc, int errType) int ProcGetId() { - return( MyProc->procId ); + return (MyProc->procId); } + #endif /* * ProcLockWakeup -- routine for waking up processes when a lock is - * released. + * released. */ int -ProcLockWakeup(PROC_QUEUE *queue, char *ltable, char *lock) +ProcLockWakeup(PROC_QUEUE * queue, char *ltable, char *lock) { - PROC *proc; - int count; - - if (! queue->size) - return(STATUS_NOT_FOUND); - - proc = (PROC *) MAKE_PTR(queue->links.prev); - count = 0; - while ((LockResolveConflicts ((LOCKTAB *) ltable, - (LOCK *) lock, - proc->token, - proc->xid) == STATUS_OK)) + PROC *proc; + int count; + + if (!queue->size) + return (STATUS_NOT_FOUND); + + proc = (PROC *) MAKE_PTR(queue->links.prev); + count = 0; + while ((LockResolveConflicts((LOCKTAB *) ltable, + (LOCK *) lock, + proc->token, + proc->xid) == STATUS_OK)) { - /* there was a waiting process, grant it the lock before waking it - * up. This will prevent another process from seizing the lock - * between the time we release the lock master (spinlock) and - * the time that the awoken process begins executing again. - */ - GrantLock((LOCK *) lock, proc->token); - queue->size--; - - /* - * ProcWakeup removes proc from the lock waiting process queue and - * returns the next proc in chain. If a writer just dropped - * its lock and there are several waiting readers, wake them all up. - */ - proc = ProcWakeup(proc, NO_ERROR); - - count++; - if (!proc || queue->size == 0) - break; + + /* + * there was a waiting process, grant it the lock before waking it + * up. This will prevent another process from seizing the lock + * between the time we release the lock master (spinlock) and the + * time that the awoken process begins executing again. + */ + GrantLock((LOCK *) lock, proc->token); + queue->size--; + + /* + * ProcWakeup removes proc from the lock waiting process queue and + * returns the next proc in chain. If a writer just dropped its + * lock and there are several waiting readers, wake them all up. + */ + proc = ProcWakeup(proc, NO_ERROR); + + count++; + if (!proc || queue->size == 0) + break; } - - if (count) - return(STATUS_OK); - else - /* Something is still blocking us. May have deadlocked. */ - return(STATUS_NOT_FOUND); + + if (count) + return (STATUS_OK); + else + /* Something is still blocking us. May have deadlocked. */ + return (STATUS_NOT_FOUND); } void -ProcAddLock(SHM_QUEUE *elem) +ProcAddLock(SHM_QUEUE * elem) { - SHMQueueInsertTL(&MyProc->lockQueue,elem); + SHMQueueInsertTL(&MyProc->lockQueue, elem); } /* -------------------- @@ -634,194 +652,201 @@ ProcAddLock(SHM_QUEUE *elem) * while waiting for a lock to be released by some other process. After * the one minute deadline we assume we have a deadlock and must abort * this transaction. We must also indicate that I'm no longer waiting - * on a lock so that other processes don't try to wake me up and screw + * on a lock so that other processes don't try to wake me up and screw * up my semaphore. * -------------------- */ static void HandleDeadLock(int sig) { - LOCK *lock; - int size; - - LockLockTable(); - - /* --------------------- - * Check to see if we've been awoken by anyone in the interim. - * - * If we have we can return and resume our transaction -- happy day. - * Before we are awoken the process releasing the lock grants it to - * us so we know that we don't have to wait anymore. - * - * Damn these names are LONG! -mer - * --------------------- - */ - if (IpcSemaphoreGetCount(MyProc->sem.semId, MyProc->sem.semNum) == - IpcSemaphoreDefaultStartValue) { - UnlockLockTable(); - return; - } - - /* - * you would think this would be unnecessary, but... - * - * this also means we've been removed already. in some ports - * (e.g., sparc and aix) the semop(2) implementation is such that - * we can actually end up in this handler after someone has removed - * us from the queue and bopped the semaphore *but the test above - * fails to detect the semaphore update* (presumably something weird - * having to do with the order in which the semaphore wakeup signal - * and SIGALRM get handled). - */ - if (MyProc->links.prev == INVALID_OFFSET || - MyProc->links.next == INVALID_OFFSET) { - UnlockLockTable(); - return; - } - - lock = MyProc->waitLock; - size = lock->waitProcs.size; /* so we can look at this in the core */ - + LOCK *lock; + int size; + + LockLockTable(); + + /* --------------------- + * Check to see if we've been awoken by anyone in the interim. + * + * If we have we can return and resume our transaction -- happy day. + * Before we are awoken the process releasing the lock grants it to + * us so we know that we don't have to wait anymore. + * + * Damn these names are LONG! -mer + * --------------------- + */ + if (IpcSemaphoreGetCount(MyProc->sem.semId, MyProc->sem.semNum) == + IpcSemaphoreDefaultStartValue) + { + UnlockLockTable(); + return; + } + + /* + * you would think this would be unnecessary, but... + * + * this also means we've been removed already. in some ports (e.g., + * sparc and aix) the semop(2) implementation is such that we can + * actually end up in this handler after someone has removed us from + * the queue and bopped the semaphore *but the test above fails to + * detect the semaphore update* (presumably something weird having to + * do with the order in which the semaphore wakeup signal and SIGALRM + * get handled). + */ + if (MyProc->links.prev == INVALID_OFFSET || + MyProc->links.next == INVALID_OFFSET) + { + UnlockLockTable(); + return; + } + + lock = MyProc->waitLock; + size = lock->waitProcs.size;/* so we can look at this in the core */ + #ifdef DEADLOCK_DEBUG - DumpLocks(); + DumpLocks(); #endif - /* ------------------------ - * Get this process off the lock's wait queue - * ------------------------ - */ - Assert(lock->waitProcs.size > 0); - --lock->waitProcs.size; - SHMQueueDelete(&(MyProc->links)); - SHMQueueElemInit(&(MyProc->links)); - - /* ------------------ - * Unlock my semaphore so that the count is right for next time. - * I was awoken by a signal, not by someone unlocking my semaphore. - * ------------------ - */ - IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum, IpcExclusiveLock); - - /* ------------- - * Set MyProc->errType to STATUS_ERROR so that we abort after - * returning from this handler. - * ------------- - */ - MyProc->errType = STATUS_ERROR; - - /* - * if this doesn't follow the IpcSemaphoreUnlock then we get lock - * table corruption ("LockReplace: xid table corrupted") due to - * race conditions. i don't claim to understand this... - */ - UnlockLockTable(); - - elog(NOTICE, "Timeout -- possible deadlock"); - return; + /* ------------------------ + * Get this process off the lock's wait queue + * ------------------------ + */ + Assert(lock->waitProcs.size > 0); + --lock->waitProcs.size; + SHMQueueDelete(&(MyProc->links)); + SHMQueueElemInit(&(MyProc->links)); + + /* ------------------ + * Unlock my semaphore so that the count is right for next time. + * I was awoken by a signal, not by someone unlocking my semaphore. + * ------------------ + */ + IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum, IpcExclusiveLock); + + /* ------------- + * Set MyProc->errType to STATUS_ERROR so that we abort after + * returning from this handler. + * ------------- + */ + MyProc->errType = STATUS_ERROR; + + /* + * if this doesn't follow the IpcSemaphoreUnlock then we get lock + * table corruption ("LockReplace: xid table corrupted") due to race + * conditions. i don't claim to understand this... + */ + UnlockLockTable(); + + elog(NOTICE, "Timeout -- possible deadlock"); + return; } void -ProcReleaseSpins(PROC *proc) +ProcReleaseSpins(PROC * proc) { - int i; - - if (!proc) - proc = MyProc; - - if (!proc) - return; - for (i=0; i < (int)MAX_SPINS; i++) + int i; + + if (!proc) + proc = MyProc; + + if (!proc) + return; + for (i = 0; i < (int) MAX_SPINS; i++) { - if (proc->sLocks[i]) + if (proc->sLocks[i]) { - Assert(proc->sLocks[i] == 1); - SpinRelease(i); + Assert(proc->sLocks[i] == 1); + SpinRelease(i); } } } /***************************************************************************** - * + * *****************************************************************************/ /* * ProcGetNewSemKeyAndNum - - * scan the free semaphore bitmap and allocate a single semaphore from - * a semaphore set. (If the semaphore set doesn't exist yet, - * IpcSemaphoreCreate will create it. Otherwise, we use the existing - * semaphore set.) + * scan the free semaphore bitmap and allocate a single semaphore from + * a semaphore set. (If the semaphore set doesn't exist yet, + * IpcSemaphoreCreate will create it. Otherwise, we use the existing + * semaphore set.) */ static void -ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum) +ProcGetNewSemKeyAndNum(IPCKey * key, int *semNum) { - int i; - int32 *freeSemMap = ProcGlobal->freeSemMap; - unsigned int fullmask; - - /* - * we hold ProcStructLock when entering this routine. We scan through - * the bitmap to look for a free semaphore. - */ - fullmask = ~0 >> (32 - PROC_NSEMS_PER_SET); - for(i=0; i < MAX_PROC_SEMS/PROC_NSEMS_PER_SET; i++) { - int mask = 1; - int j; - - if (freeSemMap[i] == fullmask) - continue; /* none free for this set */ - - for(j = 0; j < PROC_NSEMS_PER_SET; j++) { - if ((freeSemMap[i] & mask) == 0) { - /* - * a free semaphore found. Mark it as allocated. - */ - freeSemMap[i] |= mask; + int i; + int32 *freeSemMap = ProcGlobal->freeSemMap; + unsigned int fullmask; - *key = ProcGlobal->currKey + i; - *semNum = j; - return; - } - mask <<= 1; + /* + * we hold ProcStructLock when entering this routine. We scan through + * the bitmap to look for a free semaphore. + */ + fullmask = ~0 >> (32 - PROC_NSEMS_PER_SET); + for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++) + { + int mask = 1; + int j; + + if (freeSemMap[i] == fullmask) + continue; /* none free for this set */ + + for (j = 0; j < PROC_NSEMS_PER_SET; j++) + { + if ((freeSemMap[i] & mask) == 0) + { + + /* + * a free semaphore found. Mark it as allocated. + */ + freeSemMap[i] |= mask; + + *key = ProcGlobal->currKey + i; + *semNum = j; + return; + } + mask <<= 1; + } } - } - /* if we reach here, all the semaphores are in use. */ - elog(WARN, "InitProc: cannot allocate a free semaphore"); + /* if we reach here, all the semaphores are in use. */ + elog(WARN, "InitProc: cannot allocate a free semaphore"); } /* * ProcFreeSem - - * free up our semaphore in the semaphore set. If we're the last one - * in the set, also remove the semaphore set. + * free up our semaphore in the semaphore set. If we're the last one + * in the set, also remove the semaphore set. */ static void ProcFreeSem(IpcSemaphoreKey semKey, int semNum) { - int mask; - int i; - int32 *freeSemMap = ProcGlobal->freeSemMap; + int mask; + int i; + int32 *freeSemMap = ProcGlobal->freeSemMap; - i = semKey - ProcGlobal->currKey; - mask = ~(1 << semNum); - freeSemMap[i] &= mask; + i = semKey - ProcGlobal->currKey; + mask = ~(1 << semNum); + freeSemMap[i] &= mask; - if (freeSemMap[i]==0) - IpcSemaphoreKill(semKey); + if (freeSemMap[i] == 0) + IpcSemaphoreKill(semKey); } /* * ProcFreeAllSemaphores - - * on exiting the postmaster, we free up all the semaphores allocated - * to the lmgrs of the backends. + * on exiting the postmaster, we free up all the semaphores allocated + * to the lmgrs of the backends. */ void ProcFreeAllSemaphores() { - int i; - int32 *freeSemMap = ProcGlobal->freeSemMap; + int i; + int32 *freeSemMap = ProcGlobal->freeSemMap; - for(i=0; i < MAX_PROC_SEMS/PROC_NSEMS_PER_SET; i++) { - if (freeSemMap[i]!=0) - IpcSemaphoreKill(ProcGlobal->currKey + i); - } + for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++) + { + if (freeSemMap[i] != 0) + IpcSemaphoreKill(ProcGlobal->currKey + i); + } } diff --git a/src/backend/storage/lmgr/single.c b/src/backend/storage/lmgr/single.c index 20feddbed9f..80220368437 100644 --- a/src/backend/storage/lmgr/single.c +++ b/src/backend/storage/lmgr/single.c @@ -1,19 +1,19 @@ /*------------------------------------------------------------------------- * * single.c-- - * set single locks in the multi-level lock hierarchy + * set single locks in the multi-level lock hierarchy * - * Sometimes we don't want to set all levels of the multi-level - * lock hierarchy at once. This allows us to set and release - * one level at a time. It's useful in index scans when - * you can set an intent lock at the beginning and thereafter - * only set page locks. Tends to speed things up. + * Sometimes we don't want to set all levels of the multi-level + * lock hierarchy at once. This allows us to set and release + * one level at a time. It's useful in index scans when + * you can set an intent lock at the beginning and thereafter + * only set page locks. Tends to speed things up. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/single.c,v 1.2 1996/11/03 05:07:33 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/single.c,v 1.3 1997/09/07 04:49:04 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -21,7 +21,7 @@ #include "postgres.h" -#include "storage/lmgr.h" /* where the declarations go */ +#include "storage/lmgr.h" /* where the declarations go */ #include "storage/lock.h" #include "storage/multilev.h" #include "utils/rel.h" @@ -34,28 +34,27 @@ bool SingleLockReln(LockInfo linfo, LOCKT lockt, int action) { - LOCKTAG tag; - - /* - * LOCKTAG has two bytes of padding, unfortunately. The - * hash function will return miss if the padding bytes aren't - * zero'd. - */ - memset(&tag,0,sizeof(tag)); - tag.relId = linfo->lRelId.relId; - tag.dbId = linfo->lRelId.dbId; - BlockIdSet(&(tag.tupleId.ip_blkid), InvalidBlockNumber); - tag.tupleId.ip_posid = InvalidOffsetNumber; - - if (action == UNLOCK) - return(LockRelease(MultiTableId, &tag, lockt)); - else - return(LockAcquire(MultiTableId, &tag, lockt)); + LOCKTAG tag; + + /* + * LOCKTAG has two bytes of padding, unfortunately. The hash function + * will return miss if the padding bytes aren't zero'd. + */ + memset(&tag, 0, sizeof(tag)); + tag.relId = linfo->lRelId.relId; + tag.dbId = linfo->lRelId.dbId; + BlockIdSet(&(tag.tupleId.ip_blkid), InvalidBlockNumber); + tag.tupleId.ip_posid = InvalidOffsetNumber; + + if (action == UNLOCK) + return (LockRelease(MultiTableId, &tag, lockt)); + else + return (LockAcquire(MultiTableId, &tag, lockt)); } /* * SingleLockPage -- use multi-level lock table, but lock - * only at the page level. + * only at the page level. * * Assumes that an INTENT lock has already been set in the * multi-level lock table. @@ -63,27 +62,25 @@ SingleLockReln(LockInfo linfo, LOCKT lockt, int action) */ bool SingleLockPage(LockInfo linfo, - ItemPointer tidPtr, - LOCKT lockt, - int action) + ItemPointer tidPtr, + LOCKT lockt, + int action) { - LOCKTAG tag; - - /* - * LOCKTAG has two bytes of padding, unfortunately. The - * hash function will return miss if the padding bytes aren't - * zero'd. - */ - memset(&tag,0,sizeof(tag)); - tag.relId = linfo->lRelId.relId; - tag.dbId = linfo->lRelId.dbId; - BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid)); - tag.tupleId.ip_posid = InvalidOffsetNumber; - - - if (action == UNLOCK) - return(LockRelease(MultiTableId, &tag, lockt)); - else - return(LockAcquire(MultiTableId, &tag, lockt)); -} + LOCKTAG tag; + /* + * LOCKTAG has two bytes of padding, unfortunately. The hash function + * will return miss if the padding bytes aren't zero'd. + */ + memset(&tag, 0, sizeof(tag)); + tag.relId = linfo->lRelId.relId; + tag.dbId = linfo->lRelId.dbId; + BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid)); + tag.tupleId.ip_posid = InvalidOffsetNumber; + + + if (action == UNLOCK) + return (LockRelease(MultiTableId, &tag, lockt)); + else + return (LockAcquire(MultiTableId, &tag, lockt)); +} diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 98d1c59d5f0..75e1b5da9e7 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * bufpage.c-- - * POSTGRES standard buffer page code. + * POSTGRES standard buffer page code. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.8 1997/08/24 23:07:30 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.9 1997/09/07 04:49:06 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -26,341 +26,368 @@ #include "lib/qsort.h" -static void PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr, - char *location, Size size); +static void +PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr, + char *location, Size size); -static bool PageManagerShuffle = true; /* default is shuffle mode */ +static bool PageManagerShuffle = true; /* default is shuffle mode */ /* ---------------------------------------------------------------- - * Page support functions + * Page support functions * ---------------------------------------------------------------- */ /* * PageInit -- - * Initializes the contents of a page. + * Initializes the contents of a page. */ void PageInit(Page page, Size pageSize, Size specialSize) { - PageHeader p = (PageHeader) page; - - Assert(pageSize == BLCKSZ); - Assert(pageSize > - specialSize + sizeof(PageHeaderData) - sizeof(ItemIdData)); - - specialSize = DOUBLEALIGN(specialSize); - - p->pd_lower = sizeof(PageHeaderData) - sizeof(ItemIdData); - p->pd_upper = pageSize - specialSize; - p->pd_special = pageSize - specialSize; - PageSetPageSize(page, pageSize); + PageHeader p = (PageHeader) page; + + Assert(pageSize == BLCKSZ); + Assert(pageSize > + specialSize + sizeof(PageHeaderData) - sizeof(ItemIdData)); + + specialSize = DOUBLEALIGN(specialSize); + + p->pd_lower = sizeof(PageHeaderData) - sizeof(ItemIdData); + p->pd_upper = pageSize - specialSize; + p->pd_special = pageSize - specialSize; + PageSetPageSize(page, pageSize); } /* * PageAddItem -- - * Adds item to the given page. + * Adds item to the given page. * * Note: - * This does not assume that the item resides on a single page. - * It is the responsiblity of the caller to act appropriately - * depending on this fact. The "pskip" routines provide a - * friendlier interface, in this case. - * - * This does change the status of any of the resources passed. - * The semantics may change in the future. + * This does not assume that the item resides on a single page. + * It is the responsiblity of the caller to act appropriately + * depending on this fact. The "pskip" routines provide a + * friendlier interface, in this case. + * + * This does change the status of any of the resources passed. + * The semantics may change in the future. * - * This routine should probably be combined with others? + * This routine should probably be combined with others? */ /* ---------------- - * PageAddItem + * PageAddItem * - * add an item to a page. + * add an item to a page. * - * Notes on interface: - * If offsetNumber is valid, shuffle ItemId's down to make room - * to use it, if PageManagerShuffle is true. If PageManagerShuffle is - * false, then overwrite the specified ItemId. (PageManagerShuffle is - * true by default, and is modified by calling PageManagerModeSet.) - * If offsetNumber is not valid, then assign one by finding the first - * one that is both unused and deallocated. + * Notes on interface: + * If offsetNumber is valid, shuffle ItemId's down to make room + * to use it, if PageManagerShuffle is true. If PageManagerShuffle is + * false, then overwrite the specified ItemId. (PageManagerShuffle is + * true by default, and is modified by calling PageManagerModeSet.) + * If offsetNumber is not valid, then assign one by finding the first + * one that is both unused and deallocated. * - * NOTE: If offsetNumber is valid, and PageManagerShuffle is true, it - * is assumed that there is room on the page to shuffle the ItemId's - * down by one. + * NOTE: If offsetNumber is valid, and PageManagerShuffle is true, it + * is assumed that there is room on the page to shuffle the ItemId's + * down by one. * ---------------- */ OffsetNumber PageAddItem(Page page, - Item item, - Size size, - OffsetNumber offsetNumber, - ItemIdFlags flags) + Item item, + Size size, + OffsetNumber offsetNumber, + ItemIdFlags flags) { - register i; - Size alignedSize; - Offset lower; - Offset upper; - ItemId itemId; - ItemId fromitemId, toitemId; - OffsetNumber limit; - - bool shuffled = false; - - /* - * Find first unallocated offsetNumber - */ - limit = OffsetNumberNext(PageGetMaxOffsetNumber(page)); - - /* was offsetNumber passed in? */ - if (OffsetNumberIsValid(offsetNumber)) { - if (PageManagerShuffle == true) { - /* shuffle ItemId's (Do the PageManager Shuffle...) */ - for (i = (limit - 1); i >= offsetNumber; i--) { - fromitemId = &((PageHeader)page)->pd_linp[i - 1]; - toitemId = &((PageHeader)page)->pd_linp[i]; - *toitemId = *fromitemId; - } - shuffled = true; /* need to increase "lower" */ - } else { /* overwrite mode */ - itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1]; - if (((*itemId).lp_flags & LP_USED) || - ((*itemId).lp_len != 0)) { - elog(WARN, "PageAddItem: tried overwrite of used ItemId"); - return (InvalidOffsetNumber); - } + register i; + Size alignedSize; + Offset lower; + Offset upper; + ItemId itemId; + ItemId fromitemId, + toitemId; + OffsetNumber limit; + + bool shuffled = false; + + /* + * Find first unallocated offsetNumber + */ + limit = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + + /* was offsetNumber passed in? */ + if (OffsetNumberIsValid(offsetNumber)) + { + if (PageManagerShuffle == true) + { + /* shuffle ItemId's (Do the PageManager Shuffle...) */ + for (i = (limit - 1); i >= offsetNumber; i--) + { + fromitemId = &((PageHeader) page)->pd_linp[i - 1]; + toitemId = &((PageHeader) page)->pd_linp[i]; + *toitemId = *fromitemId; + } + shuffled = true; /* need to increase "lower" */ + } + else + { /* overwrite mode */ + itemId = &((PageHeader) page)->pd_linp[offsetNumber - 1]; + if (((*itemId).lp_flags & LP_USED) || + ((*itemId).lp_len != 0)) + { + elog(WARN, "PageAddItem: tried overwrite of used ItemId"); + return (InvalidOffsetNumber); + } + } } - } else { /* offsetNumber was not passed in, so find one */ - /* look for "recyclable" (unused & deallocated) ItemId */ - for (offsetNumber = 1; offsetNumber < limit; offsetNumber++) { - itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1]; - if ((((*itemId).lp_flags & LP_USED) == 0) && - ((*itemId).lp_len == 0)) - break; + else + { /* offsetNumber was not passed in, so find + * one */ + /* look for "recyclable" (unused & deallocated) ItemId */ + for (offsetNumber = 1; offsetNumber < limit; offsetNumber++) + { + itemId = &((PageHeader) page)->pd_linp[offsetNumber - 1]; + if ((((*itemId).lp_flags & LP_USED) == 0) && + ((*itemId).lp_len == 0)) + break; + } } - } - if (offsetNumber > limit) - lower = (Offset) (((char *) (&((PageHeader)page)->pd_linp[offsetNumber])) - ((char *) page)); - else if (offsetNumber == limit || shuffled == true) - lower = ((PageHeader)page)->pd_lower + sizeof (ItemIdData); - else - lower = ((PageHeader)page)->pd_lower; - - alignedSize = DOUBLEALIGN(size); - - upper = ((PageHeader)page)->pd_upper - alignedSize; - - if (lower > upper) { - return (InvalidOffsetNumber); - } - - itemId = &((PageHeader)page)->pd_linp[offsetNumber - 1]; - (*itemId).lp_off = upper; - (*itemId).lp_len = size; - (*itemId).lp_flags = flags; - memmove((char *)page + upper, item, size); - ((PageHeader)page)->pd_lower = lower; - ((PageHeader)page)->pd_upper = upper; - - return (offsetNumber); + if (offsetNumber > limit) + lower = (Offset) (((char *) (&((PageHeader) page)->pd_linp[offsetNumber])) - ((char *) page)); + else if (offsetNumber == limit || shuffled == true) + lower = ((PageHeader) page)->pd_lower + sizeof(ItemIdData); + else + lower = ((PageHeader) page)->pd_lower; + + alignedSize = DOUBLEALIGN(size); + + upper = ((PageHeader) page)->pd_upper - alignedSize; + + if (lower > upper) + { + return (InvalidOffsetNumber); + } + + itemId = &((PageHeader) page)->pd_linp[offsetNumber - 1]; + (*itemId).lp_off = upper; + (*itemId).lp_len = size; + (*itemId).lp_flags = flags; + memmove((char *) page + upper, item, size); + ((PageHeader) page)->pd_lower = lower; + ((PageHeader) page)->pd_upper = upper; + + return (offsetNumber); } /* * PageGetTempPage -- - * Get a temporary page in local memory for special processing + * Get a temporary page in local memory for special processing */ Page PageGetTempPage(Page page, Size specialSize) { - Size pageSize; - Size size; - Page temp; - PageHeader thdr; - - pageSize = PageGetPageSize(page); - - if ((temp = (Page) palloc(pageSize)) == (Page) NULL) - elog(FATAL, "Cannot allocate %d bytes for temp page.", pageSize); - thdr = (PageHeader) temp; - - /* copy old page in */ - memmove(temp, page, pageSize); - - /* clear out the middle */ - size = (pageSize - sizeof(PageHeaderData)) + sizeof(ItemIdData); - size -= DOUBLEALIGN(specialSize); - memset((char *) &(thdr->pd_linp[0]), 0, size); - - /* set high, low water marks */ - thdr->pd_lower = sizeof (PageHeaderData) - sizeof (ItemIdData); - thdr->pd_upper = pageSize - DOUBLEALIGN(specialSize); - - return (temp); + Size pageSize; + Size size; + Page temp; + PageHeader thdr; + + pageSize = PageGetPageSize(page); + + if ((temp = (Page) palloc(pageSize)) == (Page) NULL) + elog(FATAL, "Cannot allocate %d bytes for temp page.", pageSize); + thdr = (PageHeader) temp; + + /* copy old page in */ + memmove(temp, page, pageSize); + + /* clear out the middle */ + size = (pageSize - sizeof(PageHeaderData)) + sizeof(ItemIdData); + size -= DOUBLEALIGN(specialSize); + memset((char *) &(thdr->pd_linp[0]), 0, size); + + /* set high, low water marks */ + thdr->pd_lower = sizeof(PageHeaderData) - sizeof(ItemIdData); + thdr->pd_upper = pageSize - DOUBLEALIGN(specialSize); + + return (temp); } /* * PageRestoreTempPage -- - * Copy temporary page back to permanent page after special processing - * and release the temporary page. + * Copy temporary page back to permanent page after special processing + * and release the temporary page. */ void PageRestoreTempPage(Page tempPage, Page oldPage) { - Size pageSize; - - pageSize = PageGetPageSize(tempPage); - memmove((char *) oldPage, (char *) tempPage, pageSize); - - pfree(tempPage); + Size pageSize; + + pageSize = PageGetPageSize(tempPage); + memmove((char *) oldPage, (char *) tempPage, pageSize); + + pfree(tempPage); } /* * PageGetMaxOffsetNumber -- - * Returns the maximum offset number used by the given page. + * Returns the maximum offset number used by the given page. * - * NOTE: The offset is invalid if the page is non-empty. - * Test whether PageIsEmpty before calling this routine - * and/or using its return value. + * NOTE: The offset is invalid if the page is non-empty. + * Test whether PageIsEmpty before calling this routine + * and/or using its return value. */ OffsetNumber PageGetMaxOffsetNumber(Page page) { - LocationIndex low; - OffsetNumber i; - - low = ((PageHeader) page)->pd_lower; - i = (low - (sizeof(PageHeaderData) - sizeof(ItemIdData))) - / sizeof(ItemIdData); - - return(i); -} + LocationIndex low; + OffsetNumber i; + + low = ((PageHeader) page)->pd_lower; + i = (low - (sizeof(PageHeaderData) - sizeof(ItemIdData))) + / sizeof(ItemIdData); + + return (i); +} /* ---------------- - * itemid stuff for PageRepairFragmentation + * itemid stuff for PageRepairFragmentation * ---------------- */ -struct itemIdSortData { - int offsetindex; /* linp array index */ - ItemIdData itemiddata; +struct itemIdSortData +{ + int offsetindex;/* linp array index */ + ItemIdData itemiddata; }; static int itemidcompare(void *itemidp1, void *itemidp2) { - if (((struct itemIdSortData *)itemidp1)->itemiddata.lp_off == - ((struct itemIdSortData *)itemidp2)->itemiddata.lp_off) - return(0); - else if (((struct itemIdSortData *)itemidp1)->itemiddata.lp_off < - ((struct itemIdSortData *)itemidp2)->itemiddata.lp_off) - return(1); - else - return(-1); + if (((struct itemIdSortData *) itemidp1)->itemiddata.lp_off == + ((struct itemIdSortData *) itemidp2)->itemiddata.lp_off) + return (0); + else if (((struct itemIdSortData *) itemidp1)->itemiddata.lp_off < + ((struct itemIdSortData *) itemidp2)->itemiddata.lp_off) + return (1); + else + return (-1); } /* * PageRepairFragmentation -- - * Frees fragmented space on a page. + * Frees fragmented space on a page. */ void PageRepairFragmentation(Page page) { - int i; - struct itemIdSortData *itemidbase, *itemidptr; - ItemId lp; - int nline, nused; - Offset upper; - Size alignedSize; - - nline = (int16) PageGetMaxOffsetNumber(page); - nused = 0; - for (i=0; i<nline; i++) { - lp = ((PageHeader)page)->pd_linp + i; - if ((*lp).lp_flags & LP_USED) - nused++; - } - - if (nused == 0) { - for (i=0; i<nline; i++) { - lp = ((PageHeader)page)->pd_linp + i; - if ((*lp).lp_len > 0) /* unused, but allocated */ - (*lp).lp_len = 0; /* indicate unused & deallocated */ + int i; + struct itemIdSortData *itemidbase, + *itemidptr; + ItemId lp; + int nline, + nused; + Offset upper; + Size alignedSize; + + nline = (int16) PageGetMaxOffsetNumber(page); + nused = 0; + for (i = 0; i < nline; i++) + { + lp = ((PageHeader) page)->pd_linp + i; + if ((*lp).lp_flags & LP_USED) + nused++; } - - ((PageHeader)page)->pd_upper = ((PageHeader)page)->pd_special; - } else { /* nused != 0 */ - itemidbase = (struct itemIdSortData *) - palloc(sizeof(struct itemIdSortData) * nused); - memset((char *) itemidbase, 0, sizeof(struct itemIdSortData) * nused); - itemidptr = itemidbase; - for (i=0; i<nline; i++) { - lp = ((PageHeader)page)->pd_linp + i; - if ((*lp).lp_flags & LP_USED) { - itemidptr->offsetindex = i; - itemidptr->itemiddata = *lp; - itemidptr++; - } else { - if ((*lp).lp_len > 0) /* unused, but allocated */ - (*lp).lp_len = 0; /* indicate unused & deallocated */ - } + + if (nused == 0) + { + for (i = 0; i < nline; i++) + { + lp = ((PageHeader) page)->pd_linp + i; + if ((*lp).lp_len > 0) /* unused, but allocated */ + (*lp).lp_len = 0; /* indicate unused & deallocated */ + } + + ((PageHeader) page)->pd_upper = ((PageHeader) page)->pd_special; } - - /* sort itemIdSortData array...*/ - pg_qsort((char *) itemidbase, nused, sizeof(struct itemIdSortData), - itemidcompare); - - /* compactify page */ - ((PageHeader)page)->pd_upper = ((PageHeader)page)->pd_special; - - for (i=0, itemidptr = itemidbase; i<nused; i++, itemidptr++) { - lp = ((PageHeader)page)->pd_linp + itemidptr->offsetindex; - alignedSize = DOUBLEALIGN((*lp).lp_len); - upper = ((PageHeader)page)->pd_upper - alignedSize; - memmove((char *) page + upper, - (char *)page + (*lp).lp_off, - (*lp).lp_len); - (*lp).lp_off = upper; - ((PageHeader)page)->pd_upper = upper; + else + { /* nused != 0 */ + itemidbase = (struct itemIdSortData *) + palloc(sizeof(struct itemIdSortData) * nused); + memset((char *) itemidbase, 0, sizeof(struct itemIdSortData) * nused); + itemidptr = itemidbase; + for (i = 0; i < nline; i++) + { + lp = ((PageHeader) page)->pd_linp + i; + if ((*lp).lp_flags & LP_USED) + { + itemidptr->offsetindex = i; + itemidptr->itemiddata = *lp; + itemidptr++; + } + else + { + if ((*lp).lp_len > 0) /* unused, but allocated */ + (*lp).lp_len = 0; /* indicate unused & deallocated */ + } + } + + /* sort itemIdSortData array... */ + pg_qsort((char *) itemidbase, nused, sizeof(struct itemIdSortData), + itemidcompare); + + /* compactify page */ + ((PageHeader) page)->pd_upper = ((PageHeader) page)->pd_special; + + for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++) + { + lp = ((PageHeader) page)->pd_linp + itemidptr->offsetindex; + alignedSize = DOUBLEALIGN((*lp).lp_len); + upper = ((PageHeader) page)->pd_upper - alignedSize; + memmove((char *) page + upper, + (char *) page + (*lp).lp_off, + (*lp).lp_len); + (*lp).lp_off = upper; + ((PageHeader) page)->pd_upper = upper; + } + + pfree(itemidbase); } - - pfree(itemidbase); - } } /* * PageGetFreeSpace -- - * Returns the size of the free (allocatable) space on a page. + * Returns the size of the free (allocatable) space on a page. */ Size PageGetFreeSpace(Page page) { - Size space; - - - space = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower; - - if (space < sizeof (ItemIdData)) { - return (0); - } - space -= sizeof (ItemIdData); /* XXX not always true */ - - return (space); + Size space; + + + space = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower; + + if (space < sizeof(ItemIdData)) + { + return (0); + } + space -= sizeof(ItemIdData);/* XXX not always true */ + + return (space); } /* * PageManagerModeSet -- * - * Sets mode to either: ShufflePageManagerMode (the default) or - * OverwritePageManagerMode. For use by access methods code - * for determining semantics of PageAddItem when the offsetNumber - * argument is passed in. + * Sets mode to either: ShufflePageManagerMode (the default) or + * OverwritePageManagerMode. For use by access methods code + * for determining semantics of PageAddItem when the offsetNumber + * argument is passed in. */ void PageManagerModeSet(PageManagerMode mode) { - if (mode == ShufflePageManagerMode) - PageManagerShuffle = true; - else if (mode == OverwritePageManagerMode) - PageManagerShuffle = false; + if (mode == ShufflePageManagerMode) + PageManagerShuffle = true; + else if (mode == OverwritePageManagerMode) + PageManagerShuffle = false; } /* @@ -368,65 +395,64 @@ PageManagerModeSet(PageManagerMode mode) * PageIndexTupleDelete *---------------------------------------------------------------- * - * This routine does the work of removing a tuple from an index page. + * This routine does the work of removing a tuple from an index page. */ void PageIndexTupleDelete(Page page, OffsetNumber offnum) { - PageHeader phdr; - char *addr; - ItemId tup; - Size size; - char *locn; - int nbytes; - int offidx; - - phdr = (PageHeader) page; - - /* change offset number to offset index */ - offidx = offnum - 1; - - tup = PageGetItemId(page, offnum); - size = ItemIdGetLength(tup); - size = DOUBLEALIGN(size); - - /* location of deleted tuple data */ - locn = (char *) (page + ItemIdGetOffset(tup)); - - /* - * First, we want to get rid of the pd_linp entry for the index - * tuple. We copy all subsequent linp's back one slot in the - * array. - */ - - nbytes = phdr->pd_lower - - ((char *)&phdr->pd_linp[offidx + 1] - (char *) phdr); - memmove((char *) &(phdr->pd_linp[offidx]), - (char *) &(phdr->pd_linp[offidx + 1]), - nbytes); - - /* - * Now move everything between the old upper bound (beginning of tuple - * space) and the beginning of the deleted tuple forward, so that - * space in the middle of the page is left free. If we've just deleted - * the tuple at the beginning of tuple space, then there's no need - * to do the copy (and bcopy on some architectures SEGV's if asked - * to move zero bytes). - */ - - /* beginning of tuple space */ - addr = (char *) (page + phdr->pd_upper); - - if (locn != addr) - memmove(addr + size, addr, (int) (locn - addr)); - - /* adjust free space boundary pointers */ - phdr->pd_upper += size; - phdr->pd_lower -= sizeof (ItemIdData); - - /* finally, we need to adjust the linp entries that remain */ - if (!PageIsEmpty(page)) - PageIndexTupleDeleteAdjustLinePointers(phdr, locn, size); + PageHeader phdr; + char *addr; + ItemId tup; + Size size; + char *locn; + int nbytes; + int offidx; + + phdr = (PageHeader) page; + + /* change offset number to offset index */ + offidx = offnum - 1; + + tup = PageGetItemId(page, offnum); + size = ItemIdGetLength(tup); + size = DOUBLEALIGN(size); + + /* location of deleted tuple data */ + locn = (char *) (page + ItemIdGetOffset(tup)); + + /* + * First, we want to get rid of the pd_linp entry for the index tuple. + * We copy all subsequent linp's back one slot in the array. + */ + + nbytes = phdr->pd_lower - + ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr); + memmove((char *) &(phdr->pd_linp[offidx]), + (char *) &(phdr->pd_linp[offidx + 1]), + nbytes); + + /* + * Now move everything between the old upper bound (beginning of tuple + * space) and the beginning of the deleted tuple forward, so that + * space in the middle of the page is left free. If we've just + * deleted the tuple at the beginning of tuple space, then there's no + * need to do the copy (and bcopy on some architectures SEGV's if + * asked to move zero bytes). + */ + + /* beginning of tuple space */ + addr = (char *) (page + phdr->pd_upper); + + if (locn != addr) + memmove(addr + size, addr, (int) (locn - addr)); + + /* adjust free space boundary pointers */ + phdr->pd_upper += size; + phdr->pd_lower -= sizeof(ItemIdData); + + /* finally, we need to adjust the linp entries that remain */ + if (!PageIsEmpty(page)) + PageIndexTupleDeleteAdjustLinePointers(phdr, locn, size); } /* @@ -434,33 +460,35 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum) * PageIndexTupleDeleteAdjustLinePointers *---------------------------------------------------------------- * - * Once the line pointers and tuple data have been shifted around - * on the page, we need to go down the line pointer vector and - * adjust pointers to reflect new locations. Anything that used - * to be before the deleted tuple's data was moved forward by the - * size of the deleted tuple. + * Once the line pointers and tuple data have been shifted around + * on the page, we need to go down the line pointer vector and + * adjust pointers to reflect new locations. Anything that used + * to be before the deleted tuple's data was moved forward by the + * size of the deleted tuple. * - * This routine does the work of adjusting the line pointers. - * Location is where the tuple data used to lie; size is how - * much space it occupied. We assume that size has been aligned - * as required by the time we get here. + * This routine does the work of adjusting the line pointers. + * Location is where the tuple data used to lie; size is how + * much space it occupied. We assume that size has been aligned + * as required by the time we get here. * - * This routine should never be called on an empty page. + * This routine should never be called on an empty page. */ static void PageIndexTupleDeleteAdjustLinePointers(PageHeader phdr, - char *location, - Size size) + char *location, + Size size) { - int i; - unsigned offset; - - /* location is an index into the page... */ - offset = (unsigned)(location - (char *)phdr); - - for (i = PageGetMaxOffsetNumber((Page) phdr) - 1; i >= 0; i--) { - if (phdr->pd_linp[i].lp_off <= offset) { - phdr->pd_linp[i].lp_off += size; + int i; + unsigned offset; + + /* location is an index into the page... */ + offset = (unsigned) (location - (char *) phdr); + + for (i = PageGetMaxOffsetNumber((Page) phdr) - 1; i >= 0; i--) + { + if (phdr->pd_linp[i].lp_off <= offset) + { + phdr->pd_linp[i].lp_off += size; + } } - } } diff --git a/src/backend/storage/page/itemptr.c b/src/backend/storage/page/itemptr.c index 608fbf03379..25daebab23c 100644 --- a/src/backend/storage/page/itemptr.c +++ b/src/backend/storage/page/itemptr.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * itemptr.c-- - * POSTGRES disk item pointer code. + * POSTGRES disk item pointer code. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/page/itemptr.c,v 1.2 1996/11/03 05:07:46 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/page/itemptr.c,v 1.3 1997/09/07 04:49:07 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -20,21 +20,20 @@ /* * ItemPointerEquals -- - * Returns true if both item pointers point to the same item, - * otherwise returns false. + * Returns true if both item pointers point to the same item, + * otherwise returns false. * * Note: - * Assumes that the disk item pointers are not NULL. + * Assumes that the disk item pointers are not NULL. */ bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2) { - if (ItemPointerGetBlockNumber(pointer1) == - ItemPointerGetBlockNumber(pointer2) && - ItemPointerGetOffsetNumber(pointer1) == - ItemPointerGetOffsetNumber(pointer2)) - return(true); - else - return(false); + if (ItemPointerGetBlockNumber(pointer1) == + ItemPointerGetBlockNumber(pointer2) && + ItemPointerGetOffsetNumber(pointer1) == + ItemPointerGetOffsetNumber(pointer2)) + return (true); + else + return (false); } - diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 2688ad3aed1..7a2903fff5c 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -1,28 +1,28 @@ /*------------------------------------------------------------------------- * * md.c-- - * This code manages relations that reside on magnetic disk. + * This code manages relations that reside on magnetic disk. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.18 1997/08/18 20:53:14 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.19 1997/09/07 04:49:17 momjian Exp $ * *------------------------------------------------------------------------- */ #include <unistd.h> -#include <stdio.h> /* for sprintf() */ +#include <stdio.h> /* for sprintf() */ #include <string.h> -#include <fcntl.h> /* for open() flags */ +#include <fcntl.h> /* for open() flags */ #include <sys/file.h> #include "postgres.h" -#include "miscadmin.h" /* for DataDir */ +#include "miscadmin.h" /* for DataDir */ #include "storage/block.h" #include "storage/fd.h" -#include "storage/smgr.h" /* where the declarations go */ +#include "storage/smgr.h" /* where the declarations go */ #include "storage/fd.h" #include "utils/mcxt.h" #include "utils/rel.h" @@ -32,764 +32,802 @@ #undef DIAGNOSTIC /* - * The magnetic disk storage manager keeps track of open file descriptors - * in its own descriptor pool. This happens for two reasons. First, at - * transaction boundaries, we walk the list of descriptors and flush - * anything that we've dirtied in the current transaction. Second, we - * have to support relations of > 4GBytes. In order to do this, we break - * relations up into chunks of < 2GBytes and store one chunk in each of - * several files that represent the relation. + * The magnetic disk storage manager keeps track of open file descriptors + * in its own descriptor pool. This happens for two reasons. First, at + * transaction boundaries, we walk the list of descriptors and flush + * anything that we've dirtied in the current transaction. Second, we + * have to support relations of > 4GBytes. In order to do this, we break + * relations up into chunks of < 2GBytes and store one chunk in each of + * several files that represent the relation. */ -typedef struct _MdfdVec { - int mdfd_vfd; /* fd number in vfd pool */ - uint16 mdfd_flags; /* clean, dirty, free */ - int mdfd_lstbcnt; /* most recent block count */ - int mdfd_nextFree; /* next free vector */ - struct _MdfdVec *mdfd_chain; /* for large relations */ -} MdfdVec; +typedef struct _MdfdVec +{ + int mdfd_vfd; /* fd number in vfd pool */ + uint16 mdfd_flags; /* clean, dirty, free */ + int mdfd_lstbcnt; /* most recent block count */ + int mdfd_nextFree; /* next free vector */ + struct _MdfdVec *mdfd_chain;/* for large relations */ +} MdfdVec; -static int Nfds = 100; -static MdfdVec *Md_fdvec = (MdfdVec *) NULL; -static int Md_Free = -1; -static int CurFd = 0; -static MemoryContext MdCxt; +static int Nfds = 100; +static MdfdVec *Md_fdvec = (MdfdVec *) NULL; +static int Md_Free = -1; +static int CurFd = 0; +static MemoryContext MdCxt; -#define MDFD_DIRTY (uint16) 0x01 -#define MDFD_FREE (uint16) 0x02 +#define MDFD_DIRTY (uint16) 0x01 +#define MDFD_FREE (uint16) 0x02 -#define RELSEG_SIZE 262144 /* (2 ** 31) / 8192 -- 2GB file */ +#define RELSEG_SIZE 262144 /* (2 ** 31) / 8192 -- 2GB file */ /* routines declared here */ -static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags); -static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag); -static int _fdvec_alloc (void); -static void _fdvec_free (int); +static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags); +static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag); +static int _fdvec_alloc(void); +static void _fdvec_free(int); static BlockNumber _mdnblocks(File file, Size blcksz); /* - * mdinit() -- Initialize private state for magnetic disk storage manager. + * mdinit() -- Initialize private state for magnetic disk storage manager. * - * We keep a private table of all file descriptors. Whenever we do - * a write to one, we mark it dirty in our table. Whenever we force - * changes to disk, we mark the file descriptor clean. At transaction - * commit, we force changes to disk for all dirty file descriptors. - * This routine allocates and initializes the table. + * We keep a private table of all file descriptors. Whenever we do + * a write to one, we mark it dirty in our table. Whenever we force + * changes to disk, we mark the file descriptor clean. At transaction + * commit, we force changes to disk for all dirty file descriptors. + * This routine allocates and initializes the table. * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. + * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int mdinit() { - MemoryContext oldcxt; - int i; + MemoryContext oldcxt; + int i; - MdCxt = (MemoryContext) CreateGlobalMemory("MdSmgr"); - if (MdCxt == (MemoryContext) NULL) - return (SM_FAIL); + MdCxt = (MemoryContext) CreateGlobalMemory("MdSmgr"); + if (MdCxt == (MemoryContext) NULL) + return (SM_FAIL); - oldcxt = MemoryContextSwitchTo(MdCxt); - Md_fdvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec)); - MemoryContextSwitchTo(oldcxt); + oldcxt = MemoryContextSwitchTo(MdCxt); + Md_fdvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec)); + MemoryContextSwitchTo(oldcxt); - if (Md_fdvec == (MdfdVec *) NULL) - return (SM_FAIL); + if (Md_fdvec == (MdfdVec *) NULL) + return (SM_FAIL); - memset(Md_fdvec, 0, Nfds * sizeof(MdfdVec)); + memset(Md_fdvec, 0, Nfds * sizeof(MdfdVec)); - /* Set free list */ - for (i = 0; i < Nfds; i++ ) - { - Md_fdvec[i].mdfd_nextFree = i + 1; - Md_fdvec[i].mdfd_flags = MDFD_FREE; - } - Md_Free = 0; - Md_fdvec[Nfds - 1].mdfd_nextFree = -1; + /* Set free list */ + for (i = 0; i < Nfds; i++) + { + Md_fdvec[i].mdfd_nextFree = i + 1; + Md_fdvec[i].mdfd_flags = MDFD_FREE; + } + Md_Free = 0; + Md_fdvec[Nfds - 1].mdfd_nextFree = -1; - return (SM_SUCCESS); + return (SM_SUCCESS); } int mdcreate(Relation reln) { - int fd, vfd; - char *path; - - path = relpath(&(reln->rd_rel->relname.data[0])); - fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600); - - /* - * If the file already exists and is empty, we pretend that the - * create succeeded. During bootstrap processing, we skip that check, - * because pg_time, pg_variable, and pg_log get created before their - * .bki file entries are processed. - * - * As the result of this pretence it was possible to have in - * pg_class > 1 records with the same relname. Actually, it - * should be fixed in upper levels, too, but... - vadim 05/06/97 - */ - - if (fd < 0) - { - if ( !IsBootstrapProcessingMode() ) - return (-1); - fd = FileNameOpenFile(path, O_RDWR, 0600); /* Bootstrap */ - if ( fd < 0 ) - return (-1); - } - - vfd = _fdvec_alloc (); - if ( vfd < 0 ) - return (-1); - - Md_fdvec[vfd].mdfd_vfd = fd; - Md_fdvec[vfd].mdfd_flags = (uint16) 0; - Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL; - Md_fdvec[vfd].mdfd_lstbcnt = 0; - - return (vfd); + int fd, + vfd; + char *path; + + path = relpath(&(reln->rd_rel->relname.data[0])); + fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL, 0600); + + /* + * If the file already exists and is empty, we pretend that the create + * succeeded. During bootstrap processing, we skip that check, + * because pg_time, pg_variable, and pg_log get created before their + * .bki file entries are processed. + * + * As the result of this pretence it was possible to have in pg_class > 1 + * records with the same relname. Actually, it should be fixed in + * upper levels, too, but... - vadim 05/06/97 + */ + + if (fd < 0) + { + if (!IsBootstrapProcessingMode()) + return (-1); + fd = FileNameOpenFile(path, O_RDWR, 0600); /* Bootstrap */ + if (fd < 0) + return (-1); + } + + vfd = _fdvec_alloc(); + if (vfd < 0) + return (-1); + + Md_fdvec[vfd].mdfd_vfd = fd; + Md_fdvec[vfd].mdfd_flags = (uint16) 0; + Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL; + Md_fdvec[vfd].mdfd_lstbcnt = 0; + + return (vfd); } /* - * mdunlink() -- Unlink a relation. + * mdunlink() -- Unlink a relation. */ int mdunlink(Relation reln) { - int fd; - int i; - MdfdVec *v, *ov; - MemoryContext oldcxt; - char fname[NAMEDATALEN]; - char tname[NAMEDATALEN+10]; /* leave room for overflow suffixes*/ - - /* On Windows NT you can't unlink a file if it is open so we have - ** to do this. - */ + int fd; + int i; + MdfdVec *v, + *ov; + MemoryContext oldcxt; + char fname[NAMEDATALEN]; + char tname[NAMEDATALEN + 10]; /* leave room for overflow + * suffixes */ + + /* + * On Windows NT you can't unlink a file if it is open so we have * to + * do this. + */ + + strNcpy(fname, RelationGetRelationName(reln)->data, NAMEDATALEN - 1); + + if (FileNameUnlink(fname) < 0) + return (SM_FAIL); + + /* unlink all the overflow files for large relations */ + for (i = 1;; i++) + { + sprintf(tname, "%s.%d", fname, i); + if (FileNameUnlink(tname) < 0) + break; + } + + /* finally, clean out the mdfd vector */ + fd = RelationGetFile(reln); + Md_fdvec[fd].mdfd_flags = (uint16) 0; + + oldcxt = MemoryContextSwitchTo(MdCxt); + for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;) + { + FileUnlink(v->mdfd_vfd); + ov = v; + v = v->mdfd_chain; + if (ov != &Md_fdvec[fd]) + pfree(ov); + } + Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL; + MemoryContextSwitchTo(oldcxt); - strNcpy(fname, RelationGetRelationName(reln)->data, NAMEDATALEN-1); - - if (FileNameUnlink(fname) < 0) - return (SM_FAIL); - - /* unlink all the overflow files for large relations */ - for (i = 1; ; i++) { - sprintf(tname, "%s.%d", fname, i); - if (FileNameUnlink(tname) < 0) - break; - } - - /* finally, clean out the mdfd vector */ - fd = RelationGetFile(reln); - Md_fdvec[fd].mdfd_flags = (uint16) 0; - - oldcxt = MemoryContextSwitchTo(MdCxt); - for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; ) - { - FileUnlink(v->mdfd_vfd); - ov = v; - v = v->mdfd_chain; - if (ov != &Md_fdvec[fd]) - pfree(ov); - } - Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL; - MemoryContextSwitchTo(oldcxt); - - _fdvec_free (fd); - - return (SM_SUCCESS); + _fdvec_free(fd); + + return (SM_SUCCESS); } /* - * mdextend() -- Add a block to the specified relation. + * mdextend() -- Add a block to the specified relation. * - * This routine returns SM_FAIL or SM_SUCCESS, with errno set as - * appropriate. + * This routine returns SM_FAIL or SM_SUCCESS, with errno set as + * appropriate. */ int mdextend(Relation reln, char *buffer) { - long pos; - int nblocks; - MdfdVec *v; + long pos; + int nblocks; + MdfdVec *v; - nblocks = mdnblocks(reln); - v = _mdfd_getseg(reln, nblocks, O_CREAT); + nblocks = mdnblocks(reln); + v = _mdfd_getseg(reln, nblocks, O_CREAT); - if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0) - return (SM_FAIL); + if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0) + return (SM_FAIL); - if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ) - return (SM_FAIL); + if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ) + return (SM_FAIL); - /* remember that we did a write, so we can sync at xact commit */ - v->mdfd_flags |= MDFD_DIRTY; + /* remember that we did a write, so we can sync at xact commit */ + v->mdfd_flags |= MDFD_DIRTY; - /* try to keep the last block count current, though it's just a hint */ - if ((v->mdfd_lstbcnt = (++nblocks % RELSEG_SIZE)) == 0) - v->mdfd_lstbcnt = RELSEG_SIZE; + /* try to keep the last block count current, though it's just a hint */ + if ((v->mdfd_lstbcnt = (++nblocks % RELSEG_SIZE)) == 0) + v->mdfd_lstbcnt = RELSEG_SIZE; #ifdef DIAGNOSTIC - if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE - || v->mdfd_lstbcnt > RELSEG_SIZE) - elog(FATAL, "segment too big!"); + if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE + || v->mdfd_lstbcnt > RELSEG_SIZE) + elog(FATAL, "segment too big!"); #endif - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * mdopen() -- Open the specified relation. + * mdopen() -- Open the specified relation. */ int mdopen(Relation reln) { - char *path; - int fd; - int vfd; + char *path; + int fd; + int vfd; - path = relpath(&(reln->rd_rel->relname.data[0])); + path = relpath(&(reln->rd_rel->relname.data[0])); - fd = FileNameOpenFile(path, O_RDWR, 0600); + fd = FileNameOpenFile(path, O_RDWR, 0600); - /* this should only happen during bootstrap processing */ - if (fd < 0) - fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600); + /* this should only happen during bootstrap processing */ + if (fd < 0) + fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL, 0600); - vfd = _fdvec_alloc (); - if ( vfd < 0 ) - return (-1); + vfd = _fdvec_alloc(); + if (vfd < 0) + return (-1); - Md_fdvec[vfd].mdfd_vfd = fd; - Md_fdvec[vfd].mdfd_flags = (uint16) 0; - Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL; - Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ); + Md_fdvec[vfd].mdfd_vfd = fd; + Md_fdvec[vfd].mdfd_flags = (uint16) 0; + Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL; + Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ); #ifdef DIAGNOSTIC - if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE) - elog(FATAL, "segment too big on relopen!"); + if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE) + elog(FATAL, "segment too big on relopen!"); #endif - return (vfd); + return (vfd); } /* - * mdclose() -- Close the specified relation + * mdclose() -- Close the specified relation * - * AND FREE fd vector! It may be re-used for other relation! - * reln should be flushed from cache after closing !.. + * AND FREE fd vector! It may be re-used for other relation! + * reln should be flushed from cache after closing !.. * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. + * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int mdclose(Relation reln) { - int fd; - MdfdVec *v, *ov; - MemoryContext oldcxt; + int fd; + MdfdVec *v, + *ov; + MemoryContext oldcxt; - fd = RelationGetFile(reln); + fd = RelationGetFile(reln); - oldcxt = MemoryContextSwitchTo(MdCxt); - for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; ) - { - /* if not closed already */ - if ( v->mdfd_vfd >= 0 ) + oldcxt = MemoryContextSwitchTo(MdCxt); + for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;) { - /* - * We sync the file descriptor so that we don't need to reopen it at - * transaction commit to force changes to disk. - */ + /* if not closed already */ + if (v->mdfd_vfd >= 0) + { + + /* + * We sync the file descriptor so that we don't need to reopen + * it at transaction commit to force changes to disk. + */ + + FileSync(v->mdfd_vfd); + FileClose(v->mdfd_vfd); + + /* mark this file descriptor as clean in our private table */ + v->mdfd_flags &= ~MDFD_DIRTY; + } + /* Now free vector */ + ov = v; + v = v->mdfd_chain; + if (ov != &Md_fdvec[fd]) + pfree(ov); + } - FileSync(v->mdfd_vfd); - FileClose(v->mdfd_vfd); + MemoryContextSwitchTo(oldcxt); + Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL; - /* mark this file descriptor as clean in our private table */ - v->mdfd_flags &= ~MDFD_DIRTY; - } - /* Now free vector */ - ov = v; - v = v->mdfd_chain; - if (ov != &Md_fdvec[fd]) - pfree(ov); - } - - MemoryContextSwitchTo(oldcxt); - Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL; - - _fdvec_free (fd); - - return (SM_SUCCESS); + _fdvec_free(fd); + + return (SM_SUCCESS); } /* - * mdread() -- Read the specified block from a relation. + * mdread() -- Read the specified block from a relation. * - * Returns SM_SUCCESS or SM_FAIL. + * Returns SM_SUCCESS or SM_FAIL. */ int mdread(Relation reln, BlockNumber blocknum, char *buffer) { - int status; - long seekpos; - int nbytes; - MdfdVec *v; + int status; + long seekpos; + int nbytes; + MdfdVec *v; - v = _mdfd_getseg(reln, blocknum, 0); + v = _mdfd_getseg(reln, blocknum, 0); - seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); + seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); #ifdef DIAGNOSTIC - if (seekpos >= BLCKSZ * RELSEG_SIZE) - elog(FATAL, "seekpos too big!"); + if (seekpos >= BLCKSZ * RELSEG_SIZE) + elog(FATAL, "seekpos too big!"); #endif - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) { - return (SM_FAIL); - } + if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) + { + return (SM_FAIL); + } - status = SM_SUCCESS; - if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { - if (nbytes == 0) { - memset(buffer, 0, BLCKSZ); - } else { - status = SM_FAIL; + status = SM_SUCCESS; + if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) + { + if (nbytes == 0) + { + memset(buffer, 0, BLCKSZ); + } + else + { + status = SM_FAIL; + } } - } - return (status); + return (status); } /* - * mdwrite() -- Write the supplied block at the appropriate location. + * mdwrite() -- Write the supplied block at the appropriate location. * - * Returns SM_SUCCESS or SM_FAIL. + * Returns SM_SUCCESS or SM_FAIL. */ int mdwrite(Relation reln, BlockNumber blocknum, char *buffer) { - int status; - long seekpos; - MdfdVec *v; + int status; + long seekpos; + MdfdVec *v; - v = _mdfd_getseg(reln, blocknum, 0); + v = _mdfd_getseg(reln, blocknum, 0); - seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); + seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); #ifdef DIAGNOSTIC - if (seekpos >= BLCKSZ * RELSEG_SIZE) - elog(FATAL, "seekpos too big!"); + if (seekpos >= BLCKSZ * RELSEG_SIZE) + elog(FATAL, "seekpos too big!"); #endif - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) { - return (SM_FAIL); - } + if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) + { + return (SM_FAIL); + } - status = SM_SUCCESS; - if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ) - status = SM_FAIL; + status = SM_SUCCESS; + if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ) + status = SM_FAIL; - v->mdfd_flags |= MDFD_DIRTY; + v->mdfd_flags |= MDFD_DIRTY; - return (status); + return (status); } /* - * mdflush() -- Synchronously write a block to disk. + * mdflush() -- Synchronously write a block to disk. * - * This is exactly like mdwrite(), but doesn't return until the file - * system buffer cache has been flushed. + * This is exactly like mdwrite(), but doesn't return until the file + * system buffer cache has been flushed. */ int mdflush(Relation reln, BlockNumber blocknum, char *buffer) { - int status; - long seekpos; - MdfdVec *v; + int status; + long seekpos; + MdfdVec *v; - v = _mdfd_getseg(reln, blocknum, 0); + v = _mdfd_getseg(reln, blocknum, 0); - seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); + seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE)); #ifdef DIAGNOSTIC - if (seekpos >= BLCKSZ * RELSEG_SIZE) - elog(FATAL, "seekpos too big!"); + if (seekpos >= BLCKSZ * RELSEG_SIZE) + elog(FATAL, "seekpos too big!"); #endif - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) { - return (SM_FAIL); - } + if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) + { + return (SM_FAIL); + } - /* write and sync the block */ - status = SM_SUCCESS; - if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ - || FileSync(v->mdfd_vfd) < 0) - status = SM_FAIL; + /* write and sync the block */ + status = SM_SUCCESS; + if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ + || FileSync(v->mdfd_vfd) < 0) + status = SM_FAIL; - /* - * By here, the block is written and changes have been forced to stable - * storage. Mark the descriptor as clean until the next write, so we - * don't sync it again unnecessarily at transaction commit. - */ + /* + * By here, the block is written and changes have been forced to + * stable storage. Mark the descriptor as clean until the next write, + * so we don't sync it again unnecessarily at transaction commit. + */ - v->mdfd_flags &= ~MDFD_DIRTY; + v->mdfd_flags &= ~MDFD_DIRTY; - return (status); + return (status); } /* - * mdblindwrt() -- Write a block to disk blind. + * mdblindwrt() -- Write a block to disk blind. * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. This - * is a synchronous write. + * We have to be able to do this using only the name and OID of + * the database and relation in which the block belongs. This + * is a synchronous write. */ int mdblindwrt(char *dbstr, - char *relstr, - Oid dbid, - Oid relid, - BlockNumber blkno, - char *buffer) + char *relstr, + Oid dbid, + Oid relid, + BlockNumber blkno, + char *buffer) { - int fd; - int segno; - long seekpos; - int status; - char *path; - int nchars; - - /* be sure we have enough space for the '.segno', if any */ - segno = blkno / RELSEG_SIZE; - if (segno > 0) - nchars = 10; - else - nchars = 0; - - /* construct the path to the file and open it */ - if (dbid == (Oid) 0) { - path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars); - if (segno == 0) - sprintf(path, "%s/%s", DataDir, relstr); + int fd; + int segno; + long seekpos; + int status; + char *path; + int nchars; + + /* be sure we have enough space for the '.segno', if any */ + segno = blkno / RELSEG_SIZE; + if (segno > 0) + nchars = 10; else - sprintf(path, "%s/%s.%d", DataDir, relstr, segno); - } else { - path = (char *) palloc(strlen(DataDir) + strlen("/base/") + 2 * sizeof(NameData) + 2 + nchars); - if (segno == 0) - sprintf(path, "%s/base/%s/%s", DataDir, - dbstr, relstr); + nchars = 0; + + /* construct the path to the file and open it */ + if (dbid == (Oid) 0) + { + path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars); + if (segno == 0) + sprintf(path, "%s/%s", DataDir, relstr); + else + sprintf(path, "%s/%s.%d", DataDir, relstr, segno); + } else - sprintf(path, "%s/base/%s/%s.%d", DataDir, dbstr, - relstr, segno); - } + { + path = (char *) palloc(strlen(DataDir) + strlen("/base/") + 2 * sizeof(NameData) + 2 + nchars); + if (segno == 0) + sprintf(path, "%s/base/%s/%s", DataDir, + dbstr, relstr); + else + sprintf(path, "%s/base/%s/%s.%d", DataDir, dbstr, + relstr, segno); + } - if ((fd = open(path, O_RDWR, 0600)) < 0) - return (SM_FAIL); + if ((fd = open(path, O_RDWR, 0600)) < 0) + return (SM_FAIL); - /* seek to the right spot */ - seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE)); - if (lseek(fd, seekpos, SEEK_SET) != seekpos) { - close(fd); - return (SM_FAIL); - } + /* seek to the right spot */ + seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE)); + if (lseek(fd, seekpos, SEEK_SET) != seekpos) + { + close(fd); + return (SM_FAIL); + } - status = SM_SUCCESS; + status = SM_SUCCESS; - /* write and sync the block */ - if (write(fd, buffer, BLCKSZ) != BLCKSZ || (pg_fsync(fd) < 0)) - status = SM_FAIL; + /* write and sync the block */ + if (write(fd, buffer, BLCKSZ) != BLCKSZ || (pg_fsync(fd) < 0)) + status = SM_FAIL; - if (close(fd) < 0) - status = SM_FAIL; + if (close(fd) < 0) + status = SM_FAIL; - pfree(path); + pfree(path); - return (status); + return (status); } /* - * mdnblocks() -- Get the number of blocks stored in a relation. + * mdnblocks() -- Get the number of blocks stored in a relation. * - * Returns # of blocks or -1 on error. + * Returns # of blocks or -1 on error. */ int mdnblocks(Relation reln) { - int fd; - MdfdVec *v; - int nblocks; - int segno; + int fd; + MdfdVec *v; + int nblocks; + int segno; - fd = RelationGetFile(reln); - v = &Md_fdvec[fd]; + fd = RelationGetFile(reln); + v = &Md_fdvec[fd]; #ifdef DIAGNOSTIC - if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE) - elog(FATAL, "segment too big in getseg!"); + if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE) + elog(FATAL, "segment too big in getseg!"); #endif - segno = 0; - for (;;) { - if (v->mdfd_lstbcnt == RELSEG_SIZE - || (nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ)) == RELSEG_SIZE) { - - v->mdfd_lstbcnt = RELSEG_SIZE; - segno++; - - if (v->mdfd_chain == (MdfdVec *) NULL) { - v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); - if (v->mdfd_chain == (MdfdVec *) NULL) - elog(WARN, "cannot count blocks for %.16s -- open failed", - RelationGetRelationName(reln)); - } - - v = v->mdfd_chain; - } else { - return ((segno * RELSEG_SIZE) + nblocks); + segno = 0; + for (;;) + { + if (v->mdfd_lstbcnt == RELSEG_SIZE + || (nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ)) == RELSEG_SIZE) + { + + v->mdfd_lstbcnt = RELSEG_SIZE; + segno++; + + if (v->mdfd_chain == (MdfdVec *) NULL) + { + v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); + if (v->mdfd_chain == (MdfdVec *) NULL) + elog(WARN, "cannot count blocks for %.16s -- open failed", + RelationGetRelationName(reln)); + } + + v = v->mdfd_chain; + } + else + { + return ((segno * RELSEG_SIZE) + nblocks); + } } - } } /* - * mdtruncate() -- Truncate relation to specified number of blocks. + * mdtruncate() -- Truncate relation to specified number of blocks. * - * Returns # of blocks or -1 on error. + * Returns # of blocks or -1 on error. */ int -mdtruncate (Relation reln, int nblocks) +mdtruncate(Relation reln, int nblocks) { - int fd; - MdfdVec *v; - int curnblk; + int fd; + MdfdVec *v; + int curnblk; - curnblk = mdnblocks (reln); - if ( curnblk / RELSEG_SIZE > 0 ) - { - elog (NOTICE, "Can't truncate multi-segments relation %s", - &(reln->rd_rel->relname.data[0])); - return (curnblk); - } + curnblk = mdnblocks(reln); + if (curnblk / RELSEG_SIZE > 0) + { + elog(NOTICE, "Can't truncate multi-segments relation %s", + &(reln->rd_rel->relname.data[0])); + return (curnblk); + } + + fd = RelationGetFile(reln); + v = &Md_fdvec[fd]; - fd = RelationGetFile(reln); - v = &Md_fdvec[fd]; + if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) + return (-1); - if ( FileTruncate (v->mdfd_vfd, nblocks * BLCKSZ) < 0 ) - return (-1); - - return (nblocks); + return (nblocks); -} /* mdtruncate */ +} /* mdtruncate */ /* - * mdcommit() -- Commit a transaction. + * mdcommit() -- Commit a transaction. * - * All changes to magnetic disk relations must be forced to stable - * storage. This routine makes a pass over the private table of - * file descriptors. Any descriptors to which we have done writes, - * but not synced, are synced here. + * All changes to magnetic disk relations must be forced to stable + * storage. This routine makes a pass over the private table of + * file descriptors. Any descriptors to which we have done writes, + * but not synced, are synced here. * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. + * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int mdcommit() { - int i; - MdfdVec *v; + int i; + MdfdVec *v; - for (i = 0; i < CurFd; i++) { - for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) { - if (v->mdfd_flags & MDFD_DIRTY) { - if (FileSync(v->mdfd_vfd) < 0) - return (SM_FAIL); - - v->mdfd_flags &= ~MDFD_DIRTY; - } + for (i = 0; i < CurFd; i++) + { + for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) + { + if (v->mdfd_flags & MDFD_DIRTY) + { + if (FileSync(v->mdfd_vfd) < 0) + return (SM_FAIL); + + v->mdfd_flags &= ~MDFD_DIRTY; + } + } } - } - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * mdabort() -- Abort a transaction. + * mdabort() -- Abort a transaction. * - * Changes need not be forced to disk at transaction abort. We mark - * all file descriptors as clean here. Always returns SM_SUCCESS. + * Changes need not be forced to disk at transaction abort. We mark + * all file descriptors as clean here. Always returns SM_SUCCESS. */ int mdabort() { - int i; - MdfdVec *v; + int i; + MdfdVec *v; - for (i = 0; i < CurFd; i++) { - for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) { - v->mdfd_flags &= ~MDFD_DIRTY; + for (i = 0; i < CurFd; i++) + { + for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) + { + v->mdfd_flags &= ~MDFD_DIRTY; + } } - } - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * _fdvec_alloc () -- grab a free (or new) md file descriptor vector. + * _fdvec_alloc () -- grab a free (or new) md file descriptor vector. * */ static -int _fdvec_alloc () +int +_fdvec_alloc() { - MdfdVec *nvec; - int fdvec, i; - MemoryContext oldcxt; - - if ( Md_Free >= 0 ) /* get from free list */ - { - fdvec = Md_Free; - Md_Free = Md_fdvec[fdvec].mdfd_nextFree; - Assert ( Md_fdvec[fdvec].mdfd_flags == MDFD_FREE ); - Md_fdvec[fdvec].mdfd_flags = 0; - if ( fdvec >= CurFd ) + MdfdVec *nvec; + int fdvec, + i; + MemoryContext oldcxt; + + if (Md_Free >= 0) /* get from free list */ { - Assert ( fdvec == CurFd ); - CurFd++; + fdvec = Md_Free; + Md_Free = Md_fdvec[fdvec].mdfd_nextFree; + Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE); + Md_fdvec[fdvec].mdfd_flags = 0; + if (fdvec >= CurFd) + { + Assert(fdvec == CurFd); + CurFd++; + } + return (fdvec); } - return (fdvec); - } - /* Must allocate more room */ - - if ( Nfds != CurFd ) - elog (FATAL, "_fdvec_alloc error"); - - Nfds *= 2; + /* Must allocate more room */ + + if (Nfds != CurFd) + elog(FATAL, "_fdvec_alloc error"); - oldcxt = MemoryContextSwitchTo(MdCxt); + Nfds *= 2; - nvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec)); - memset(nvec, 0, Nfds * sizeof(MdfdVec)); - memmove(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec)); - pfree(Md_fdvec); + oldcxt = MemoryContextSwitchTo(MdCxt); - MemoryContextSwitchTo(oldcxt); + nvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec)); + memset(nvec, 0, Nfds * sizeof(MdfdVec)); + memmove(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec)); + pfree(Md_fdvec); - Md_fdvec = nvec; + MemoryContextSwitchTo(oldcxt); - /* Set new free list */ - for (i = CurFd; i < Nfds; i++ ) - { - Md_fdvec[i].mdfd_nextFree = i + 1; - Md_fdvec[i].mdfd_flags = MDFD_FREE; - } - Md_fdvec[Nfds - 1].mdfd_nextFree = -1; - Md_Free = CurFd + 1; + Md_fdvec = nvec; - fdvec = CurFd; - CurFd++; - Md_fdvec[fdvec].mdfd_flags = 0; + /* Set new free list */ + for (i = CurFd; i < Nfds; i++) + { + Md_fdvec[i].mdfd_nextFree = i + 1; + Md_fdvec[i].mdfd_flags = MDFD_FREE; + } + Md_fdvec[Nfds - 1].mdfd_nextFree = -1; + Md_Free = CurFd + 1; - return (fdvec); + fdvec = CurFd; + CurFd++; + Md_fdvec[fdvec].mdfd_flags = 0; + + return (fdvec); } /* - * _fdvec_free () -- free md file descriptor vector. + * _fdvec_free () -- free md file descriptor vector. * */ static -void _fdvec_free (int fdvec) +void +_fdvec_free(int fdvec) { - - Assert ( Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE ); - Md_fdvec[fdvec].mdfd_nextFree = Md_Free; - Md_fdvec[fdvec].mdfd_flags = MDFD_FREE; - Md_Free = fdvec; + + Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE); + Md_fdvec[fdvec].mdfd_nextFree = Md_Free; + Md_fdvec[fdvec].mdfd_flags = MDFD_FREE; + Md_Free = fdvec; } static MdfdVec * _mdfd_openseg(Relation reln, int segno, int oflags) { - MemoryContext oldcxt; - MdfdVec *v; - int fd; - bool dofree; - char *path, *fullpath; - - /* be sure we have enough space for the '.segno', if any */ - path = relpath(RelationGetRelationName(reln)->data); - - dofree = false; - if (segno > 0) { - dofree = true; - fullpath = (char *) palloc(strlen(path) + 12); - sprintf(fullpath, "%s.%d", path, segno); - } else - fullpath = path; - - /* open the file */ - fd = PathNameOpenFile(fullpath, O_RDWR|oflags, 0600); - - if (dofree) - pfree(fullpath); - - if (fd < 0) - return ((MdfdVec *) NULL); - - /* allocate an mdfdvec entry for it */ - oldcxt = MemoryContextSwitchTo(MdCxt); - v = (MdfdVec *) palloc(sizeof(MdfdVec)); - MemoryContextSwitchTo(oldcxt); - - /* fill the entry */ - v->mdfd_vfd = fd; - v->mdfd_flags = (uint16) 0; - v->mdfd_chain = (MdfdVec *) NULL; - v->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ); + MemoryContext oldcxt; + MdfdVec *v; + int fd; + bool dofree; + char *path, + *fullpath; + + /* be sure we have enough space for the '.segno', if any */ + path = relpath(RelationGetRelationName(reln)->data); + + dofree = false; + if (segno > 0) + { + dofree = true; + fullpath = (char *) palloc(strlen(path) + 12); + sprintf(fullpath, "%s.%d", path, segno); + } + else + fullpath = path; + + /* open the file */ + fd = PathNameOpenFile(fullpath, O_RDWR | oflags, 0600); + + if (dofree) + pfree(fullpath); + + if (fd < 0) + return ((MdfdVec *) NULL); + + /* allocate an mdfdvec entry for it */ + oldcxt = MemoryContextSwitchTo(MdCxt); + v = (MdfdVec *) palloc(sizeof(MdfdVec)); + MemoryContextSwitchTo(oldcxt); + + /* fill the entry */ + v->mdfd_vfd = fd; + v->mdfd_flags = (uint16) 0; + v->mdfd_chain = (MdfdVec *) NULL; + v->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ); #ifdef DIAGNOSTIC - if (v->mdfd_lstbcnt > RELSEG_SIZE) - elog(FATAL, "segment too big on open!"); + if (v->mdfd_lstbcnt > RELSEG_SIZE) + elog(FATAL, "segment too big on open!"); #endif - /* all done */ - return (v); + /* all done */ + return (v); } static MdfdVec * _mdfd_getseg(Relation reln, int blkno, int oflag) { - MdfdVec *v; - int segno; - int fd; - int i; - - fd = RelationGetFile(reln); - if (fd < 0) { - if ((fd = mdopen(reln)) < 0) - elog(WARN, "cannot open relation %.16s", - RelationGetRelationName(reln)); - reln->rd_fd = fd; - } - - for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1; - segno > 0; - i++, segno--) { - - if (v->mdfd_chain == (MdfdVec *) NULL) { - v->mdfd_chain = _mdfd_openseg(reln, i, oflag); - - if (v->mdfd_chain == (MdfdVec *) NULL) - elog(WARN, "cannot open segment %d of relation %.16s", - i, RelationGetRelationName(reln)); + MdfdVec *v; + int segno; + int fd; + int i; + + fd = RelationGetFile(reln); + if (fd < 0) + { + if ((fd = mdopen(reln)) < 0) + elog(WARN, "cannot open relation %.16s", + RelationGetRelationName(reln)); + reln->rd_fd = fd; + } + + for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1; + segno > 0; + i++, segno--) + { + + if (v->mdfd_chain == (MdfdVec *) NULL) + { + v->mdfd_chain = _mdfd_openseg(reln, i, oflag); + + if (v->mdfd_chain == (MdfdVec *) NULL) + elog(WARN, "cannot open segment %d of relation %.16s", + i, RelationGetRelationName(reln)); + } + v = v->mdfd_chain; } - v = v->mdfd_chain; - } - return (v); + return (v); } -static BlockNumber +static BlockNumber _mdnblocks(File file, Size blcksz) { - long len; - - len = FileSeek(file, 0L, SEEK_END) - 1; - return((BlockNumber)((len < 0) ? 0 : 1 + len / blcksz)); + long len; + + len = FileSeek(file, 0L, SEEK_END) - 1; + return ((BlockNumber) ((len < 0) ? 0 : 1 + len / blcksz)); } diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c index fd6c32da3a3..d0015e4f138 100644 --- a/src/backend/storage/smgr/mm.c +++ b/src/backend/storage/smgr/mm.c @@ -1,16 +1,16 @@ /*------------------------------------------------------------------------- * * mm.c-- - * main memory storage manager + * main memory storage manager * - * This code manages relations that reside in (presumably stable) - * main memory. + * This code manages relations that reside in (presumably stable) + * main memory. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.4 1996/11/08 05:59:11 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.5 1997/09/07 04:49:22 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -20,7 +20,7 @@ #include <math.h> #include "storage/ipc.h" -#include "storage/smgr.h" /* where the declarations go */ +#include "storage/smgr.h" /* where the declarations go */ #include "storage/block.h" #include "storage/shmem.h" #include "storage/spin.h" @@ -31,555 +31,582 @@ #include "utils/memutils.h" /* - * MMCacheTag -- Unique triplet for blocks stored by the main memory - * storage manager. + * MMCacheTag -- Unique triplet for blocks stored by the main memory + * storage manager. */ -typedef struct MMCacheTag { - Oid mmct_dbid; - Oid mmct_relid; - BlockNumber mmct_blkno; -} MMCacheTag; +typedef struct MMCacheTag +{ + Oid mmct_dbid; + Oid mmct_relid; + BlockNumber mmct_blkno; +} MMCacheTag; /* - * Shared-memory hash table for main memory relations contains - * entries of this form. + * Shared-memory hash table for main memory relations contains + * entries of this form. */ -typedef struct MMHashEntry { - MMCacheTag mmhe_tag; - int mmhe_bufno; -} MMHashEntry; +typedef struct MMHashEntry +{ + MMCacheTag mmhe_tag; + int mmhe_bufno; +} MMHashEntry; /* * MMRelTag -- Unique identifier for each relation that is stored in the - * main-memory storage manager. + * main-memory storage manager. */ -typedef struct MMRelTag { - Oid mmrt_dbid; - Oid mmrt_relid; -} MMRelTag; +typedef struct MMRelTag +{ + Oid mmrt_dbid; + Oid mmrt_relid; +} MMRelTag; /* - * Shared-memory hash table for # blocks in main memory relations contains - * entries of this form. + * Shared-memory hash table for # blocks in main memory relations contains + * entries of this form. */ -typedef struct MMRelHashEntry { - MMRelTag mmrhe_tag; - int mmrhe_nblocks; -} MMRelHashEntry; +typedef struct MMRelHashEntry +{ + MMRelTag mmrhe_tag; + int mmrhe_nblocks; +} MMRelHashEntry; -#define MMNBUFFERS 10 +#define MMNBUFFERS 10 #define MMNRELATIONS 2 -SPINLOCK MMCacheLock; -extern bool IsPostmaster; -extern Oid MyDatabaseId; +SPINLOCK MMCacheLock; +extern bool IsPostmaster; +extern Oid MyDatabaseId; -static int *MMCurTop; -static int *MMCurRelno; -static MMCacheTag *MMBlockTags; -static char *MMBlockCache; -static HTAB *MMCacheHT; -static HTAB *MMRelCacheHT; +static int *MMCurTop; +static int *MMCurRelno; +static MMCacheTag *MMBlockTags; +static char *MMBlockCache; +static HTAB *MMCacheHT; +static HTAB *MMRelCacheHT; int mminit() { - char *mmcacheblk; - int mmsize = 0; - bool found; - HASHCTL info; + char *mmcacheblk; + int mmsize = 0; + bool found; + HASHCTL info; - SpinAcquire(MMCacheLock); + SpinAcquire(MMCacheLock); - mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS); - mmsize += MAXALIGN(sizeof(*MMCurTop)); - mmsize += MAXALIGN(sizeof(*MMCurRelno)); - mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag))); - mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found); + mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS); + mmsize += MAXALIGN(sizeof(*MMCurTop)); + mmsize += MAXALIGN(sizeof(*MMCurRelno)); + mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag))); + mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found); - if (mmcacheblk == (char *) NULL) { - SpinRelease(MMCacheLock); - return (SM_FAIL); - } + if (mmcacheblk == (char *) NULL) + { + SpinRelease(MMCacheLock); + return (SM_FAIL); + } - info.keysize = sizeof(MMCacheTag); - info.datasize = sizeof(int); - info.hash = tag_hash; + info.keysize = sizeof(MMCacheTag); + info.datasize = sizeof(int); + info.hash = tag_hash; - MMCacheHT = (HTAB *) ShmemInitHash("Main memory store HT", - MMNBUFFERS, MMNBUFFERS, - &info, (HASH_ELEM|HASH_FUNCTION)); + MMCacheHT = (HTAB *) ShmemInitHash("Main memory store HT", + MMNBUFFERS, MMNBUFFERS, + &info, (HASH_ELEM | HASH_FUNCTION)); - if (MMCacheHT == (HTAB *) NULL) { - SpinRelease(MMCacheLock); - return (SM_FAIL); - } + if (MMCacheHT == (HTAB *) NULL) + { + SpinRelease(MMCacheLock); + return (SM_FAIL); + } - info.keysize = sizeof(MMRelTag); - info.datasize = sizeof(int); - info.hash = tag_hash; + info.keysize = sizeof(MMRelTag); + info.datasize = sizeof(int); + info.hash = tag_hash; - MMRelCacheHT = (HTAB *) ShmemInitHash("Main memory rel HT", - MMNRELATIONS, MMNRELATIONS, - &info, (HASH_ELEM|HASH_FUNCTION)); + MMRelCacheHT = (HTAB *) ShmemInitHash("Main memory rel HT", + MMNRELATIONS, MMNRELATIONS, + &info, (HASH_ELEM | HASH_FUNCTION)); - if (MMRelCacheHT == (HTAB *) NULL) { - SpinRelease(MMCacheLock); - return (SM_FAIL); - } + if (MMRelCacheHT == (HTAB *) NULL) + { + SpinRelease(MMCacheLock); + return (SM_FAIL); + } - if (IsPostmaster) { - memset(mmcacheblk, 0, mmsize); - SpinRelease(MMCacheLock); - return (SM_SUCCESS); - } + if (IsPostmaster) + { + memset(mmcacheblk, 0, mmsize); + SpinRelease(MMCacheLock); + return (SM_SUCCESS); + } - SpinRelease(MMCacheLock); + SpinRelease(MMCacheLock); - MMCurTop = (int *) mmcacheblk; - mmcacheblk += sizeof(int); - MMCurRelno = (int *) mmcacheblk; - mmcacheblk += sizeof(int); - MMBlockTags = (MMCacheTag *) mmcacheblk; - mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag)); - MMBlockCache = mmcacheblk; + MMCurTop = (int *) mmcacheblk; + mmcacheblk += sizeof(int); + MMCurRelno = (int *) mmcacheblk; + mmcacheblk += sizeof(int); + MMBlockTags = (MMCacheTag *) mmcacheblk; + mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag)); + MMBlockCache = mmcacheblk; - return (SM_SUCCESS); + return (SM_SUCCESS); } int mmshutdown() { - return (SM_SUCCESS); + return (SM_SUCCESS); } int mmcreate(Relation reln) { - MMRelHashEntry *entry; - bool found; - MMRelTag tag; + MMRelHashEntry *entry; + bool found; + MMRelTag tag; - SpinAcquire(MMCacheLock); + SpinAcquire(MMCacheLock); - if (*MMCurRelno == MMNRELATIONS) { - SpinRelease(MMCacheLock); - return (SM_FAIL); - } + if (*MMCurRelno == MMNRELATIONS) + { + SpinRelease(MMCacheLock); + return (SM_FAIL); + } - (*MMCurRelno)++; + (*MMCurRelno)++; - tag.mmrt_relid = reln->rd_id; - if (reln->rd_rel->relisshared) - tag.mmrt_dbid = (Oid) 0; - else - tag.mmrt_dbid = MyDatabaseId; + tag.mmrt_relid = reln->rd_id; + if (reln->rd_rel->relisshared) + tag.mmrt_dbid = (Oid) 0; + else + tag.mmrt_dbid = MyDatabaseId; - entry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (char *) &tag, HASH_ENTER, &found); + entry = (MMRelHashEntry *) hash_search(MMRelCacheHT, + (char *) &tag, HASH_ENTER, &found); - if (entry == (MMRelHashEntry *) NULL) { - SpinRelease(MMCacheLock); - elog(FATAL, "main memory storage mgr rel cache hash table corrupt"); - } + if (entry == (MMRelHashEntry *) NULL) + { + SpinRelease(MMCacheLock); + elog(FATAL, "main memory storage mgr rel cache hash table corrupt"); + } - if (found) { - /* already exists */ - SpinRelease(MMCacheLock); - return (SM_FAIL); - } + if (found) + { + /* already exists */ + SpinRelease(MMCacheLock); + return (SM_FAIL); + } - entry->mmrhe_nblocks = 0; + entry->mmrhe_nblocks = 0; - SpinRelease(MMCacheLock); + SpinRelease(MMCacheLock); - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * mmunlink() -- Unlink a relation. + * mmunlink() -- Unlink a relation. */ int mmunlink(Relation reln) { - int i; - Oid reldbid; - MMHashEntry *entry; - MMRelHashEntry *rentry; - bool found; - MMRelTag rtag; - - if (reln->rd_rel->relisshared) - reldbid = (Oid) 0; - else - reldbid = MyDatabaseId; - - SpinAcquire(MMCacheLock); - - for (i = 0; i < MMNBUFFERS; i++) { - if (MMBlockTags[i].mmct_dbid == reldbid - && MMBlockTags[i].mmct_relid == reln->rd_id) { - entry = (MMHashEntry *) hash_search(MMCacheHT, - (char *) &MMBlockTags[i], - HASH_REMOVE, &found); - if (entry == (MMHashEntry *) NULL || !found) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmunlink: cache hash table corrupted"); - } - MMBlockTags[i].mmct_dbid = (Oid) 0; - MMBlockTags[i].mmct_relid = (Oid) 0; - MMBlockTags[i].mmct_blkno = (BlockNumber) 0; + int i; + Oid reldbid; + MMHashEntry *entry; + MMRelHashEntry *rentry; + bool found; + MMRelTag rtag; + + if (reln->rd_rel->relisshared) + reldbid = (Oid) 0; + else + reldbid = MyDatabaseId; + + SpinAcquire(MMCacheLock); + + for (i = 0; i < MMNBUFFERS; i++) + { + if (MMBlockTags[i].mmct_dbid == reldbid + && MMBlockTags[i].mmct_relid == reln->rd_id) + { + entry = (MMHashEntry *) hash_search(MMCacheHT, + (char *) &MMBlockTags[i], + HASH_REMOVE, &found); + if (entry == (MMHashEntry *) NULL || !found) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmunlink: cache hash table corrupted"); + } + MMBlockTags[i].mmct_dbid = (Oid) 0; + MMBlockTags[i].mmct_relid = (Oid) 0; + MMBlockTags[i].mmct_blkno = (BlockNumber) 0; + } } - } - rtag.mmrt_dbid = reldbid; - rtag.mmrt_relid = reln->rd_id; + rtag.mmrt_dbid = reldbid; + rtag.mmrt_relid = reln->rd_id; - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, - HASH_REMOVE, &found); + rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, + HASH_REMOVE, &found); - if (rentry == (MMRelHashEntry *) NULL || !found) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmunlink: rel cache hash table corrupted"); - } + if (rentry == (MMRelHashEntry *) NULL || !found) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmunlink: rel cache hash table corrupted"); + } - (*MMCurRelno)--; + (*MMCurRelno)--; - SpinRelease(MMCacheLock); - return 1; + SpinRelease(MMCacheLock); + return 1; } /* - * mmextend() -- Add a block to the specified relation. + * mmextend() -- Add a block to the specified relation. * - * This routine returns SM_FAIL or SM_SUCCESS, with errno set as - * appropriate. + * This routine returns SM_FAIL or SM_SUCCESS, with errno set as + * appropriate. */ int mmextend(Relation reln, char *buffer) { - MMRelHashEntry *rentry; - MMHashEntry *entry; - int i; - Oid reldbid; - int offset; - bool found; - MMRelTag rtag; - MMCacheTag tag; - - if (reln->rd_rel->relisshared) - reldbid = (Oid) 0; - else - reldbid = MyDatabaseId; - - tag.mmct_dbid = rtag.mmrt_dbid = reldbid; - tag.mmct_relid = rtag.mmrt_relid = reln->rd_id; - - SpinAcquire(MMCacheLock); - - if (*MMCurTop == MMNBUFFERS) { - for (i = 0; i < MMNBUFFERS; i++) { - if (MMBlockTags[i].mmct_dbid == 0 && - MMBlockTags[i].mmct_relid == 0) - break; + MMRelHashEntry *rentry; + MMHashEntry *entry; + int i; + Oid reldbid; + int offset; + bool found; + MMRelTag rtag; + MMCacheTag tag; + + if (reln->rd_rel->relisshared) + reldbid = (Oid) 0; + else + reldbid = MyDatabaseId; + + tag.mmct_dbid = rtag.mmrt_dbid = reldbid; + tag.mmct_relid = rtag.mmrt_relid = reln->rd_id; + + SpinAcquire(MMCacheLock); + + if (*MMCurTop == MMNBUFFERS) + { + for (i = 0; i < MMNBUFFERS; i++) + { + if (MMBlockTags[i].mmct_dbid == 0 && + MMBlockTags[i].mmct_relid == 0) + break; + } + if (i == MMNBUFFERS) + { + SpinRelease(MMCacheLock); + return (SM_FAIL); + } } - if (i == MMNBUFFERS) { - SpinRelease(MMCacheLock); - return (SM_FAIL); + else + { + i = *MMCurTop; + (*MMCurTop)++; } - } else { - i = *MMCurTop; - (*MMCurTop)++; - } - - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, - HASH_FIND, &found); - if (rentry == (MMRelHashEntry *) NULL || !found) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmextend: rel cache hash table corrupt"); - } - tag.mmct_blkno = rentry->mmrhe_nblocks; + rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, + HASH_FIND, &found); + if (rentry == (MMRelHashEntry *) NULL || !found) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmextend: rel cache hash table corrupt"); + } - entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, - HASH_ENTER, &found); - if (entry == (MMHashEntry *) NULL || found) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmextend: cache hash table corrupt"); - } + tag.mmct_blkno = rentry->mmrhe_nblocks; - entry->mmhe_bufno = i; - MMBlockTags[i].mmct_dbid = reldbid; - MMBlockTags[i].mmct_relid = reln->rd_id; - MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks; + entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, + HASH_ENTER, &found); + if (entry == (MMHashEntry *) NULL || found) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmextend: cache hash table corrupt"); + } - /* page numbers are zero-based, so we increment this at the end */ - (rentry->mmrhe_nblocks)++; + entry->mmhe_bufno = i; + MMBlockTags[i].mmct_dbid = reldbid; + MMBlockTags[i].mmct_relid = reln->rd_id; + MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks; - /* write the extended page */ - offset = (i * BLCKSZ); - memmove(&(MMBlockCache[offset]), buffer, BLCKSZ); + /* page numbers are zero-based, so we increment this at the end */ + (rentry->mmrhe_nblocks)++; - SpinRelease(MMCacheLock); + /* write the extended page */ + offset = (i * BLCKSZ); + memmove(&(MMBlockCache[offset]), buffer, BLCKSZ); - return (SM_SUCCESS); + SpinRelease(MMCacheLock); + + return (SM_SUCCESS); } /* - * mmopen() -- Open the specified relation. + * mmopen() -- Open the specified relation. */ int mmopen(Relation reln) { - /* automatically successful */ - return (0); + /* automatically successful */ + return (0); } /* - * mmclose() -- Close the specified relation. + * mmclose() -- Close the specified relation. * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. + * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int mmclose(Relation reln) { - /* automatically successful */ - return (SM_SUCCESS); + /* automatically successful */ + return (SM_SUCCESS); } /* - * mmread() -- Read the specified block from a relation. + * mmread() -- Read the specified block from a relation. * - * Returns SM_SUCCESS or SM_FAIL. + * Returns SM_SUCCESS or SM_FAIL. */ int mmread(Relation reln, BlockNumber blocknum, char *buffer) { - MMHashEntry *entry; - bool found; - int offset; - MMCacheTag tag; + MMHashEntry *entry; + bool found; + int offset; + MMCacheTag tag; - if (reln->rd_rel->relisshared) - tag.mmct_dbid = (Oid) 0; - else - tag.mmct_dbid = MyDatabaseId; + if (reln->rd_rel->relisshared) + tag.mmct_dbid = (Oid) 0; + else + tag.mmct_dbid = MyDatabaseId; - tag.mmct_relid = reln->rd_id; - tag.mmct_blkno = blocknum; + tag.mmct_relid = reln->rd_id; + tag.mmct_blkno = blocknum; - SpinAcquire(MMCacheLock); - entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, - HASH_FIND, &found); + SpinAcquire(MMCacheLock); + entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, + HASH_FIND, &found); - if (entry == (MMHashEntry *) NULL) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmread: hash table corrupt"); - } + if (entry == (MMHashEntry *) NULL) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmread: hash table corrupt"); + } - if (!found) { - /* reading nonexistent pages is defined to fill them with zeroes */ - SpinRelease(MMCacheLock); - memset(buffer, 0, BLCKSZ); - return (SM_SUCCESS); - } + if (!found) + { + /* reading nonexistent pages is defined to fill them with zeroes */ + SpinRelease(MMCacheLock); + memset(buffer, 0, BLCKSZ); + return (SM_SUCCESS); + } - offset = (entry->mmhe_bufno * BLCKSZ); - memmove(buffer, &MMBlockCache[offset], BLCKSZ); + offset = (entry->mmhe_bufno * BLCKSZ); + memmove(buffer, &MMBlockCache[offset], BLCKSZ); - SpinRelease(MMCacheLock); + SpinRelease(MMCacheLock); - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * mmwrite() -- Write the supplied block at the appropriate location. + * mmwrite() -- Write the supplied block at the appropriate location. * - * Returns SM_SUCCESS or SM_FAIL. + * Returns SM_SUCCESS or SM_FAIL. */ int mmwrite(Relation reln, BlockNumber blocknum, char *buffer) { - MMHashEntry *entry; - bool found; - int offset; - MMCacheTag tag; + MMHashEntry *entry; + bool found; + int offset; + MMCacheTag tag; - if (reln->rd_rel->relisshared) - tag.mmct_dbid = (Oid) 0; - else - tag.mmct_dbid = MyDatabaseId; + if (reln->rd_rel->relisshared) + tag.mmct_dbid = (Oid) 0; + else + tag.mmct_dbid = MyDatabaseId; - tag.mmct_relid = reln->rd_id; - tag.mmct_blkno = blocknum; + tag.mmct_relid = reln->rd_id; + tag.mmct_blkno = blocknum; - SpinAcquire(MMCacheLock); - entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, - HASH_FIND, &found); + SpinAcquire(MMCacheLock); + entry = (MMHashEntry *) hash_search(MMCacheHT, (char *) &tag, + HASH_FIND, &found); - if (entry == (MMHashEntry *) NULL) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmread: hash table corrupt"); - } + if (entry == (MMHashEntry *) NULL) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmread: hash table corrupt"); + } - if (!found) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmwrite: hash table missing requested page"); - } + if (!found) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmwrite: hash table missing requested page"); + } - offset = (entry->mmhe_bufno * BLCKSZ); - memmove(&MMBlockCache[offset], buffer, BLCKSZ); + offset = (entry->mmhe_bufno * BLCKSZ); + memmove(&MMBlockCache[offset], buffer, BLCKSZ); - SpinRelease(MMCacheLock); + SpinRelease(MMCacheLock); - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * mmflush() -- Synchronously write a block to stable storage. + * mmflush() -- Synchronously write a block to stable storage. * - * For main-memory relations, this is exactly equivalent to mmwrite(). + * For main-memory relations, this is exactly equivalent to mmwrite(). */ int mmflush(Relation reln, BlockNumber blocknum, char *buffer) { - return (mmwrite(reln, blocknum, buffer)); + return (mmwrite(reln, blocknum, buffer)); } /* - * mmblindwrt() -- Write a block to stable storage blind. + * mmblindwrt() -- Write a block to stable storage blind. * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. + * We have to be able to do this using only the name and OID of + * the database and relation in which the block belongs. */ int mmblindwrt(char *dbstr, - char *relstr, - Oid dbid, - Oid relid, - BlockNumber blkno, - char *buffer) + char *relstr, + Oid dbid, + Oid relid, + BlockNumber blkno, + char *buffer) { - return (SM_FAIL); + return (SM_FAIL); } /* - * mmnblocks() -- Get the number of blocks stored in a relation. + * mmnblocks() -- Get the number of blocks stored in a relation. * - * Returns # of blocks or -1 on error. + * Returns # of blocks or -1 on error. */ int mmnblocks(Relation reln) { - MMRelTag rtag; - MMRelHashEntry *rentry; - bool found; - int nblocks; + MMRelTag rtag; + MMRelHashEntry *rentry; + bool found; + int nblocks; - if (reln->rd_rel->relisshared) - rtag.mmrt_dbid = (Oid) 0; - else - rtag.mmrt_dbid = MyDatabaseId; + if (reln->rd_rel->relisshared) + rtag.mmrt_dbid = (Oid) 0; + else + rtag.mmrt_dbid = MyDatabaseId; - rtag.mmrt_relid = reln->rd_id; + rtag.mmrt_relid = reln->rd_id; - SpinAcquire(MMCacheLock); + SpinAcquire(MMCacheLock); - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, - HASH_FIND, &found); + rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, (char *) &rtag, + HASH_FIND, &found); - if (rentry == (MMRelHashEntry *) NULL) { - SpinRelease(MMCacheLock); - elog(FATAL, "mmnblocks: rel cache hash table corrupt"); - } + if (rentry == (MMRelHashEntry *) NULL) + { + SpinRelease(MMCacheLock); + elog(FATAL, "mmnblocks: rel cache hash table corrupt"); + } - if (found) - nblocks = rentry->mmrhe_nblocks; - else - nblocks = -1; + if (found) + nblocks = rentry->mmrhe_nblocks; + else + nblocks = -1; - SpinRelease(MMCacheLock); + SpinRelease(MMCacheLock); - return (nblocks); + return (nblocks); } /* - * mmcommit() -- Commit a transaction. + * mmcommit() -- Commit a transaction. * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. + * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int mmcommit() { - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * mmabort() -- Abort a transaction. + * mmabort() -- Abort a transaction. */ int mmabort() { - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * MMShmemSize() -- Declare amount of shared memory we require. + * MMShmemSize() -- Declare amount of shared memory we require. * - * The shared memory initialization code creates a block of shared - * memory exactly big enough to hold all the structures it needs to. - * This routine declares how much space the main memory storage - * manager will use. + * The shared memory initialization code creates a block of shared + * memory exactly big enough to hold all the structures it needs to. + * This routine declares how much space the main memory storage + * manager will use. */ int MMShmemSize() { - int size = 0; - int nbuckets; - int nsegs; - int tmp; - - /* - * first compute space occupied by the (dbid,relid,blkno) hash table - */ - - nbuckets = 1 << (int)my_log2((MMNBUFFERS - 1) / DEF_FFACTOR + 1); - nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1); - - size += MAXALIGN(my_log2(MMNBUFFERS) * sizeof(void *)); - size += MAXALIGN(sizeof(HHDR)); - size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - tmp = (int)ceil((double)MMNBUFFERS/BUCKET_ALLOC_INCR); - size += tmp * BUCKET_ALLOC_INCR * - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(sizeof(MMHashEntry))); /* contains hash key */ - - /* - * now do the same for the rel hash table - */ - - size += MAXALIGN(my_log2(MMNRELATIONS) * sizeof(void *)); - size += MAXALIGN(sizeof(HHDR)); - size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); - tmp = (int)ceil((double)MMNRELATIONS/BUCKET_ALLOC_INCR); - size += tmp * BUCKET_ALLOC_INCR * - (MAXALIGN(sizeof(BUCKET_INDEX)) + - MAXALIGN(sizeof(MMRelHashEntry))); /* contains hash key */ - - /* - * finally, add in the memory block we use directly - */ - - size += MAXALIGN(BLCKSZ * MMNBUFFERS); - size += MAXALIGN(sizeof(*MMCurTop)); - size += MAXALIGN(sizeof(*MMCurRelno)); - size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag)); - - return (size); + int size = 0; + int nbuckets; + int nsegs; + int tmp; + + /* + * first compute space occupied by the (dbid,relid,blkno) hash table + */ + + nbuckets = 1 << (int) my_log2((MMNBUFFERS - 1) / DEF_FFACTOR + 1); + nsegs = 1 << (int) my_log2((nbuckets - 1) / DEF_SEGSIZE + 1); + + size += MAXALIGN(my_log2(MMNBUFFERS) * sizeof(void *)); + size += MAXALIGN(sizeof(HHDR)); + size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + tmp = (int) ceil((double) MMNBUFFERS / BUCKET_ALLOC_INCR); + size += tmp * BUCKET_ALLOC_INCR * + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(sizeof(MMHashEntry))); /* contains hash key */ + + /* + * now do the same for the rel hash table + */ + + size += MAXALIGN(my_log2(MMNRELATIONS) * sizeof(void *)); + size += MAXALIGN(sizeof(HHDR)); + size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT)); + tmp = (int) ceil((double) MMNRELATIONS / BUCKET_ALLOC_INCR); + size += tmp * BUCKET_ALLOC_INCR * + (MAXALIGN(sizeof(BUCKET_INDEX)) + + MAXALIGN(sizeof(MMRelHashEntry))); /* contains hash key */ + + /* + * finally, add in the memory block we use directly + */ + + size += MAXALIGN(BLCKSZ * MMNBUFFERS); + size += MAXALIGN(sizeof(*MMCurTop)); + size += MAXALIGN(sizeof(*MMCurRelno)); + size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag)); + + return (size); } -#endif /* MAIN_MEMORY */ +#endif /* MAIN_MEMORY */ diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 89ac5e92cb7..9fc395da0d9 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -1,16 +1,16 @@ /*------------------------------------------------------------------------- * * smgr.c-- - * public interface routines to storage manager switch. + * public interface routines to storage manager switch. * - * All file system operations in POSTGRES dispatch through these - * routines. + * All file system operations in POSTGRES dispatch through these + * routines. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.8 1997/08/19 21:33:38 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.9 1997/09/07 04:49:25 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -23,380 +23,390 @@ #include "utils/rel.h" #include "utils/palloc.h" -static void smgrshutdown(int dummy); - -typedef struct f_smgr { - int (*smgr_init)(); /* may be NULL */ - int (*smgr_shutdown)(); /* may be NULL */ - int (*smgr_create)(); - int (*smgr_unlink)(); - int (*smgr_extend)(); - int (*smgr_open)(); - int (*smgr_close)(); - int (*smgr_read)(); - int (*smgr_write)(); - int (*smgr_flush)(); - int (*smgr_blindwrt)(); - int (*smgr_nblocks)(); - int (*smgr_truncate)(); - int (*smgr_commit)(); /* may be NULL */ - int (*smgr_abort)(); /* may be NULL */ -} f_smgr; +static void smgrshutdown(int dummy); + +typedef struct f_smgr +{ + int (*smgr_init) (); /* may be NULL */ + int (*smgr_shutdown) (); /* may be NULL */ + int (*smgr_create) (); + int (*smgr_unlink) (); + int (*smgr_extend) (); + int (*smgr_open) (); + int (*smgr_close) (); + int (*smgr_read) (); + int (*smgr_write) (); + int (*smgr_flush) (); + int (*smgr_blindwrt) (); + int (*smgr_nblocks) (); + int (*smgr_truncate) (); + int (*smgr_commit) (); /* may be NULL */ + int (*smgr_abort) (); /* may be NULL */ +} f_smgr; /* - * The weird placement of commas in this init block is to keep the compiler - * happy, regardless of what storage managers we have (or don't have). + * The weird placement of commas in this init block is to keep the compiler + * happy, regardless of what storage managers we have (or don't have). */ -static f_smgr smgrsw[] = { +static f_smgr smgrsw[] = { - /* magnetic disk */ - { mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, - mdread, mdwrite, mdflush, mdblindwrt, mdnblocks, mdtruncate, - mdcommit, mdabort }, + /* magnetic disk */ + {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, + mdread, mdwrite, mdflush, mdblindwrt, mdnblocks, mdtruncate, + mdcommit, mdabort}, #ifdef MAIN_MEMORY - /* main memory */ - { mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose, - mmread, mmwrite, mmflush, mmblindwrt, mmnblocks, NULL, - mmcommit, mmabort }, + /* main memory */ + {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose, + mmread, mmwrite, mmflush, mmblindwrt, mmnblocks, NULL, + mmcommit, mmabort}, -#endif /* MAIN_MEMORY */ +#endif /* MAIN_MEMORY */ }; /* - * This array records which storage managers are write-once, and which - * support overwrite. A 'true' entry means that the storage manager is - * write-once. In the best of all possible worlds, there would be no - * write-once storage managers. + * This array records which storage managers are write-once, and which + * support overwrite. A 'true' entry means that the storage manager is + * write-once. In the best of all possible worlds, there would be no + * write-once storage managers. */ -static bool smgrwo[] = { - false, /* magnetic disk */ +static bool smgrwo[] = { + false, /* magnetic disk */ #ifdef MAIN_MEMORY - false, /* main memory*/ -#endif /* MAIN_MEMORY */ + false, /* main memory */ +#endif /* MAIN_MEMORY */ }; -static int NSmgr = lengthof(smgrsw); +static int NSmgr = lengthof(smgrsw); /* - * smgrinit(), smgrshutdown() -- Initialize or shut down all storage - * managers. + * smgrinit(), smgrshutdown() -- Initialize or shut down all storage + * managers. * */ int smgrinit() { - int i; - - for (i = 0; i < NSmgr; i++) { - if (smgrsw[i].smgr_init) { - if ((*(smgrsw[i].smgr_init))() == SM_FAIL) - elog(FATAL, "initialization failed on %s", smgrout(i)); + int i; + + for (i = 0; i < NSmgr; i++) + { + if (smgrsw[i].smgr_init) + { + if ((*(smgrsw[i].smgr_init)) () == SM_FAIL) + elog(FATAL, "initialization failed on %s", smgrout(i)); + } } - } - /* register the shutdown proc */ - on_exitpg(smgrshutdown, 0); + /* register the shutdown proc */ + on_exitpg(smgrshutdown, 0); - return (SM_SUCCESS); + return (SM_SUCCESS); } static void smgrshutdown(int dummy) { - int i; - - for (i = 0; i < NSmgr; i++) { - if (smgrsw[i].smgr_shutdown) { - if ((*(smgrsw[i].smgr_shutdown))() == SM_FAIL) - elog(FATAL, "shutdown failed on %s", smgrout(i)); + int i; + + for (i = 0; i < NSmgr; i++) + { + if (smgrsw[i].smgr_shutdown) + { + if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL) + elog(FATAL, "shutdown failed on %s", smgrout(i)); + } } - } } /* - * smgrcreate() -- Create a new relation. + * smgrcreate() -- Create a new relation. * - * This routine takes a reldesc, creates the relation on the appropriate - * device, and returns a file descriptor for it. + * This routine takes a reldesc, creates the relation on the appropriate + * device, and returns a file descriptor for it. */ int smgrcreate(int16 which, Relation reln) { - int fd; + int fd; - if ((fd = (*(smgrsw[which].smgr_create))(reln)) < 0) - elog(WARN, "cannot open %s", - &(reln->rd_rel->relname.data[0])); + if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0) + elog(WARN, "cannot open %s", + &(reln->rd_rel->relname.data[0])); - return (fd); + return (fd); } /* - * smgrunlink() -- Unlink a relation. + * smgrunlink() -- Unlink a relation. * - * The relation is removed from the store. + * The relation is removed from the store. */ int smgrunlink(int16 which, Relation reln) { - int status; + int status; - if ((status = (*(smgrsw[which].smgr_unlink))(reln)) == SM_FAIL) - elog(WARN, "cannot unlink %s", - &(reln->rd_rel->relname.data[0])); + if ((status = (*(smgrsw[which].smgr_unlink)) (reln)) == SM_FAIL) + elog(WARN, "cannot unlink %s", + &(reln->rd_rel->relname.data[0])); - return (status); + return (status); } /* - * smgrextend() -- Add a new block to a file. + * smgrextend() -- Add a new block to a file. * - * Returns SM_SUCCESS on success; aborts the current transaction on - * failure. + * Returns SM_SUCCESS on success; aborts the current transaction on + * failure. */ int smgrextend(int16 which, Relation reln, char *buffer) { - int status; + int status; - status = (*(smgrsw[which].smgr_extend))(reln, buffer); + status = (*(smgrsw[which].smgr_extend)) (reln, buffer); - if (status == SM_FAIL) - elog(WARN, "%s: cannot extend", - &(reln->rd_rel->relname.data[0])); + if (status == SM_FAIL) + elog(WARN, "%s: cannot extend", + &(reln->rd_rel->relname.data[0])); - return (status); + return (status); } /* - * smgropen() -- Open a relation using a particular storage manager. + * smgropen() -- Open a relation using a particular storage manager. * - * Returns the fd for the open relation on success, aborts the - * transaction on failure. + * Returns the fd for the open relation on success, aborts the + * transaction on failure. */ int smgropen(int16 which, Relation reln) { - int fd; + int fd; - if ((fd = (*(smgrsw[which].smgr_open))(reln)) < 0) - elog(WARN, "cannot open %s", - &(reln->rd_rel->relname.data[0])); + if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0) + elog(WARN, "cannot open %s", + &(reln->rd_rel->relname.data[0])); - return (fd); + return (fd); } /* - * smgrclose() -- Close a relation. + * smgrclose() -- Close a relation. * - * NOTE: mdclose frees fd vector! It may be re-used for other relation! - * reln should be flushed from cache after closing !.. - * Currently, smgrclose is calling by - * relcache.c:RelationPurgeLocalRelation() only. - * It would be nice to have smgrfree(), but because of - * smgrclose is called from single place... - vadim 05/22/97 + * NOTE: mdclose frees fd vector! It may be re-used for other relation! + * reln should be flushed from cache after closing !.. + * Currently, smgrclose is calling by + * relcache.c:RelationPurgeLocalRelation() only. + * It would be nice to have smgrfree(), but because of + * smgrclose is called from single place... - vadim 05/22/97 * - * Returns SM_SUCCESS on success, aborts on failure. + * Returns SM_SUCCESS on success, aborts on failure. */ int smgrclose(int16 which, Relation reln) { - if ((*(smgrsw[which].smgr_close))(reln) == SM_FAIL) - elog(WARN, "cannot close %s", - &(reln->rd_rel->relname.data[0])); + if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL) + elog(WARN, "cannot close %s", + &(reln->rd_rel->relname.data[0])); - return (SM_SUCCESS); + return (SM_SUCCESS); } /* - * smgrread() -- read a particular block from a relation into the supplied - * buffer. + * smgrread() -- read a particular block from a relation into the supplied + * buffer. * - * This routine is called from the buffer manager in order to - * instantiate pages in the shared buffer cache. All storage managers - * return pages in the format that POSTGRES expects. This routine - * dispatches the read. On success, it returns SM_SUCCESS. On failure, - * the current transaction is aborted. + * This routine is called from the buffer manager in order to + * instantiate pages in the shared buffer cache. All storage managers + * return pages in the format that POSTGRES expects. This routine + * dispatches the read. On success, it returns SM_SUCCESS. On failure, + * the current transaction is aborted. */ int smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer) { - int status; + int status; - status = (*(smgrsw[which].smgr_read))(reln, blocknum, buffer); + status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer); - if (status == SM_FAIL) - elog(WARN, "cannot read block %d of %s", - blocknum, &(reln->rd_rel->relname.data[0])); + if (status == SM_FAIL) + elog(WARN, "cannot read block %d of %s", + blocknum, &(reln->rd_rel->relname.data[0])); - return (status); + return (status); } /* - * smgrwrite() -- Write the supplied buffer out. + * smgrwrite() -- Write the supplied buffer out. * - * This is not a synchronous write -- the interface for that is - * smgrflush(). The buffer is written out via the appropriate - * storage manager. This routine returns SM_SUCCESS or aborts - * the current transaction. + * This is not a synchronous write -- the interface for that is + * smgrflush(). The buffer is written out via the appropriate + * storage manager. This routine returns SM_SUCCESS or aborts + * the current transaction. */ int smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer) { - int status; + int status; - status = (*(smgrsw[which].smgr_write))(reln, blocknum, buffer); + status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer); - if (status == SM_FAIL) - elog(WARN, "cannot write block %d of %s", - blocknum, &(reln->rd_rel->relname.data[0])); + if (status == SM_FAIL) + elog(WARN, "cannot write block %d of %s", + blocknum, &(reln->rd_rel->relname.data[0])); - return (status); + return (status); } /* - * smgrflush() -- A synchronous smgrwrite(). + * smgrflush() -- A synchronous smgrwrite(). */ int smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer) { - int status; + int status; - status = (*(smgrsw[which].smgr_flush))(reln, blocknum, buffer); + status = (*(smgrsw[which].smgr_flush)) (reln, blocknum, buffer); - if (status == SM_FAIL) - elog(WARN, "cannot flush block %d of %s to stable store", - blocknum, &(reln->rd_rel->relname.data[0])); + if (status == SM_FAIL) + elog(WARN, "cannot flush block %d of %s to stable store", + blocknum, &(reln->rd_rel->relname.data[0])); - return (status); + return (status); } /* - * smgrblindwrt() -- Write a page out blind. + * smgrblindwrt() -- Write a page out blind. * - * In some cases, we may find a page in the buffer cache that we - * can't make a reldesc for. This happens, for example, when we - * want to reuse a dirty page that was written by a transaction - * that has not yet committed, which created a new relation. In - * this case, the buffer manager will call smgrblindwrt() with - * the name and OID of the database and the relation to which the - * buffer belongs. Every storage manager must be able to force - * this page down to stable storage in this circumstance. + * In some cases, we may find a page in the buffer cache that we + * can't make a reldesc for. This happens, for example, when we + * want to reuse a dirty page that was written by a transaction + * that has not yet committed, which created a new relation. In + * this case, the buffer manager will call smgrblindwrt() with + * the name and OID of the database and the relation to which the + * buffer belongs. Every storage manager must be able to force + * this page down to stable storage in this circumstance. */ int smgrblindwrt(int16 which, - char *dbname, - char *relname, - Oid dbid, - Oid relid, - BlockNumber blkno, - char *buffer) + char *dbname, + char *relname, + Oid dbid, + Oid relid, + BlockNumber blkno, + char *buffer) { - char *dbstr; - char *relstr; - int status; + char *dbstr; + char *relstr; + int status; - dbstr = pstrdup(dbname); - relstr = pstrdup(relname); + dbstr = pstrdup(dbname); + relstr = pstrdup(relname); - status = (*(smgrsw[which].smgr_blindwrt))(dbstr, relstr, dbid, relid, - blkno, buffer); + status = (*(smgrsw[which].smgr_blindwrt)) (dbstr, relstr, dbid, relid, + blkno, buffer); - if (status == SM_FAIL) - elog(WARN, "cannot write block %d of %s [%s] blind", - blkno, relstr, dbstr); + if (status == SM_FAIL) + elog(WARN, "cannot write block %d of %s [%s] blind", + blkno, relstr, dbstr); - pfree(dbstr); - pfree(relstr); + pfree(dbstr); + pfree(relstr); - return (status); + return (status); } /* - * smgrnblocks() -- Calculate the number of POSTGRES blocks in the - * supplied relation. + * smgrnblocks() -- Calculate the number of POSTGRES blocks in the + * supplied relation. * - * Returns the number of blocks on success, aborts the current - * transaction on failure. + * Returns the number of blocks on success, aborts the current + * transaction on failure. */ int smgrnblocks(int16 which, Relation reln) { - int nblocks; + int nblocks; - if ((nblocks = (*(smgrsw[which].smgr_nblocks))(reln)) < 0) - elog(WARN, "cannot count blocks for %s", - &(reln->rd_rel->relname.data[0])); + if ((nblocks = (*(smgrsw[which].smgr_nblocks)) (reln)) < 0) + elog(WARN, "cannot count blocks for %s", + &(reln->rd_rel->relname.data[0])); - return (nblocks); + return (nblocks); } /* - * smgrtruncate() -- Truncate supplied relation to a specified number - * of blocks + * smgrtruncate() -- Truncate supplied relation to a specified number + * of blocks * - * Returns the number of blocks on success, aborts the current - * transaction on failure. + * Returns the number of blocks on success, aborts the current + * transaction on failure. */ int smgrtruncate(int16 which, Relation reln, int nblocks) { - int newblks; - - newblks = nblocks; - if (smgrsw[which].smgr_truncate) - { - if ((newblks = (*(smgrsw[which].smgr_truncate))(reln, nblocks)) < 0) - elog(WARN, "cannot truncate %s to %d blocks", - &(reln->rd_rel->relname.data[0]), nblocks); - } - - return (newblks); + int newblks; + + newblks = nblocks; + if (smgrsw[which].smgr_truncate) + { + if ((newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks)) < 0) + elog(WARN, "cannot truncate %s to %d blocks", + &(reln->rd_rel->relname.data[0]), nblocks); + } + + return (newblks); } /* - * smgrcommit(), smgrabort() -- Commit or abort changes made during the - * current transaction. + * smgrcommit(), smgrabort() -- Commit or abort changes made during the + * current transaction. */ int smgrcommit() { - int i; - - for (i = 0; i < NSmgr; i++) { - if (smgrsw[i].smgr_commit) { - if ((*(smgrsw[i].smgr_commit))() == SM_FAIL) - elog(FATAL, "transaction commit failed on %s", smgrout(i)); + int i; + + for (i = 0; i < NSmgr; i++) + { + if (smgrsw[i].smgr_commit) + { + if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL) + elog(FATAL, "transaction commit failed on %s", smgrout(i)); + } } - } - return (SM_SUCCESS); + return (SM_SUCCESS); } #ifdef NOT_USED int smgrabort() { - int i; - - for (i = 0; i < NSmgr; i++) { - if (smgrsw[i].smgr_abort) { - if ((*(smgrsw[i].smgr_abort))() == SM_FAIL) - elog(FATAL, "transaction abort failed on %s", smgrout(i)); + int i; + + for (i = 0; i < NSmgr; i++) + { + if (smgrsw[i].smgr_abort) + { + if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL) + elog(FATAL, "transaction abort failed on %s", smgrout(i)); + } } - } - return (SM_SUCCESS); + return (SM_SUCCESS); } + #endif bool smgriswo(int16 smgrno) { - if (smgrno < 0 || smgrno >= NSmgr) - elog(WARN, "illegal storage manager number %d", smgrno); + if (smgrno < 0 || smgrno >= NSmgr) + elog(WARN, "illegal storage manager number %d", smgrno); - return (smgrwo[smgrno]); + return (smgrwo[smgrno]); } diff --git a/src/backend/storage/smgr/smgrtype.c b/src/backend/storage/smgr/smgrtype.c index cb32d458b82..64fb53c9c2e 100644 --- a/src/backend/storage/smgr/smgrtype.c +++ b/src/backend/storage/smgr/smgrtype.c @@ -1,81 +1,83 @@ /*------------------------------------------------------------------------- * * smgrtype.c-- - * storage manager type + * storage manager type * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgrtype.c,v 1.2 1996/11/03 05:08:01 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgrtype.c,v 1.3 1997/09/07 04:49:26 momjian Exp $ * *------------------------------------------------------------------------- */ #include <string.h> #include "postgres.h" -#include "utils/builtins.h" /* where the declarations go */ +#include "utils/builtins.h" /* where the declarations go */ #include "utils/palloc.h" #include "storage/smgr.h" -typedef struct smgrid { - char *smgr_name; -} smgrid; +typedef struct smgrid +{ + char *smgr_name; +} smgrid; /* - * StorageManager[] -- List of defined storage managers. + * StorageManager[] -- List of defined storage managers. * - * The weird comma placement is to keep compilers happy no matter - * which of these is (or is not) defined. + * The weird comma placement is to keep compilers happy no matter + * which of these is (or is not) defined. */ -static smgrid StorageManager[] = { +static smgrid StorageManager[] = { {"magnetic disk"}, #ifdef MAIN_MEMORY {"main memory"} -#endif /* MAIN_MEMORY */ +#endif /* MAIN_MEMORY */ }; -static int NStorageManagers = lengthof(StorageManager); +static int NStorageManagers = lengthof(StorageManager); int2 smgrin(char *s) { - int i; + int i; - for (i = 0; i < NStorageManagers; i++) { - if (strcmp(s, StorageManager[i].smgr_name) == 0) - return((int2) i); - } - elog(WARN, "smgrin: illegal storage manager name %s", s); - return 0; + for (i = 0; i < NStorageManagers; i++) + { + if (strcmp(s, StorageManager[i].smgr_name) == 0) + return ((int2) i); + } + elog(WARN, "smgrin: illegal storage manager name %s", s); + return 0; } -char * +char * smgrout(int2 i) { - char *s; + char *s; - if (i >= NStorageManagers || i < 0) - elog(WARN, "Illegal storage manager id %d", i); + if (i >= NStorageManagers || i < 0) + elog(WARN, "Illegal storage manager id %d", i); - s = (char *) palloc(strlen(StorageManager[i].smgr_name) + 1); - strcpy(s, StorageManager[i].smgr_name); - return (s); + s = (char *) palloc(strlen(StorageManager[i].smgr_name) + 1); + strcpy(s, StorageManager[i].smgr_name); + return (s); } bool smgreq(int2 a, int2 b) { - if (a == b) - return (true); - return (false); + if (a == b) + return (true); + return (false); } bool smgrne(int2 a, int2 b) { - if (a == b) - return (false); - return (true); + if (a == b) + return (false); + return (true); } |