diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-05-05 00:03:29 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-05-05 00:03:29 +0000 |
commit | 72a3902a664c7fbceb2034e28e444b28f96fa717 (patch) | |
tree | ff42e4494af6ea1c1cdf524f3feb5fc670217f0c /src/backend/port | |
parent | 91fc10fdacfcbadc123fd9d8ee16a4568f8c636b (diff) | |
download | postgresql-72a3902a664c7fbceb2034e28e444b28f96fa717.tar.gz postgresql-72a3902a664c7fbceb2034e28e444b28f96fa717.zip |
Create an internal semaphore API that is not tied to SysV semaphores.
As proof of concept, provide an alternate implementation based on POSIX
semaphores. Also push the SysV shared-memory implementation into a
separate file so that it can be replaced conveniently.
Diffstat (limited to 'src/backend/port')
-rw-r--r-- | src/backend/port/Makefile | 4 | ||||
-rw-r--r-- | src/backend/port/posix_sema.c | 357 | ||||
-rw-r--r-- | src/backend/port/sysv_sema.c | 522 | ||||
-rw-r--r-- | src/backend/port/sysv_shmem.c | 400 |
4 files changed, 1281 insertions, 2 deletions
diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile index 93823b44cd7..1370cdbb78b 100644 --- a/src/backend/port/Makefile +++ b/src/backend/port/Makefile @@ -13,7 +13,7 @@ # be converted to Method 2. # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/port/Makefile,v 1.11 2002/03/13 00:05:06 petere Exp $ +# $Header: /cvsroot/pgsql/src/backend/port/Makefile,v 1.12 2002/05/05 00:03:28 tgl Exp $ # #------------------------------------------------------------------------- @@ -21,7 +21,7 @@ subdir = src/backend/port top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = dynloader.o +OBJS = dynloader.o pg_sema.o pg_shmem.o OBJS += $(GETHOSTNAME) $(GETRUSAGE) $(INET_ATON) $(ISINF) $(MEMCMP) \ $(MISSING_RANDOM) $(SNPRINTF) $(SRANDOM) $(STRCASECMP) $(STRERROR) \ diff --git a/src/backend/port/posix_sema.c b/src/backend/port/posix_sema.c new file mode 100644 index 00000000000..1dd02f8def6 --- /dev/null +++ b/src/backend/port/posix_sema.c @@ -0,0 +1,357 @@ +/*------------------------------------------------------------------------- + * + * posix_sema.c + * Implement PGSemaphores using POSIX semaphore facilities + * + * We prefer the unnamed style of POSIX semaphore (the kind made with + * sem_init). We can cope with the kind made with sem_open, however. + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/port/posix_sema.c,v 1.1 2002/05/05 00:03:28 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <errno.h> +#include <signal.h> +#include <unistd.h> + +#include "storage/pg_sema.h" + + +#ifdef USE_NAMED_POSIX_SEMAPHORES +/* PGSemaphore is pointer to pointer to sem_t */ +#define PG_SEM_REF(x) (*(x)) +#else +/* PGSemaphore is pointer to sem_t */ +#define PG_SEM_REF(x) (x) +#endif + + +#define IPCProtection (0600) /* access/modify by user only */ + +static sem_t **mySemPointers; /* keep track of created semaphores */ +static int numSems; /* number of semas acquired so far */ +static int maxSems; /* allocated size of mySemaPointers array */ +static int nextSemKey; /* next name to try */ + + +static void ReleaseSemaphores(int status, Datum arg); + + +#ifdef USE_NAMED_POSIX_SEMAPHORES + +/* + * PosixSemaphoreCreate + * + * Attempt to create a new named semaphore. + * + * If we fail with a failure code other than collision-with-existing-sema, + * print out an error and abort. Other types of errors suggest nonrecoverable + * problems. + */ +static sem_t * +PosixSemaphoreCreate(void) +{ + int semKey; + char semname[64]; + sem_t *mySem; + + for (;;) + { + semKey = nextSemKey++; + + snprintf(semname, sizeof(semname), "/pgsql-%d", semKey); + + mySem = sem_open(semname, O_CREAT | O_EXCL, + (mode_t) IPCProtection, (unsigned) 1); + if (mySem != SEM_FAILED) + break; + + /* Loop if error indicates a collision */ + if (errno == EEXIST || errno == EACCES || errno == EINTR) + continue; + + /* + * Else complain and abort + */ + fprintf(stderr, "PosixSemaphoreCreate: sem_open(%s) failed: %s\n", + semname, strerror(errno)); + proc_exit(1); + } + + /* + * Unlink the semaphore immediately, so it can't be accessed externally. + * This also ensures that it will go away if we crash. + */ + sem_unlink(semname); + + return mySem; +} + +#else /* !USE_NAMED_POSIX_SEMAPHORES */ + +/* + * PosixSemaphoreCreate + * + * Attempt to create a new unnamed semaphore. + */ +static void +PosixSemaphoreCreate(sem_t *sem) +{ + if (sem_init(sem, 1, 1) < 0) + { + fprintf(stderr, "PosixSemaphoreCreate: sem_init failed: %s\n", + strerror(errno)); + proc_exit(1); + } +} + +#endif /* USE_NAMED_POSIX_SEMAPHORES */ + + +/* + * PosixSemaphoreKill - removes a semaphore + */ +static void +PosixSemaphoreKill(sem_t *sem) +{ +#ifdef USE_NAMED_POSIX_SEMAPHORES + /* Got to use sem_close for named semaphores */ + if (sem_close(sem) < 0) + fprintf(stderr, "PosixSemaphoreKill: sem_close failed: %s\n", + strerror(errno)); +#else + /* Got to use sem_destroy for unnamed semaphores */ + if (sem_destroy(sem) < 0) + fprintf(stderr, "PosixSemaphoreKill: sem_destroy failed: %s\n", + strerror(errno)); +#endif +} + + +/* + * PGReserveSemaphores --- initialize semaphore support + * + * This is called during postmaster start or shared memory reinitialization. + * It should do whatever is needed to be able to support up to maxSemas + * subsequent PGSemaphoreCreate calls. Also, if any system resources + * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit + * callback to release them. + * + * The port number is passed for possible use as a key (for Posix, we use + * it to generate the starting semaphore name). In a standalone backend, + * zero will be passed. + * + * In the Posix implementation, we acquire semaphores on-demand; the + * maxSemas parameter is just used to size the array that keeps track of + * acquired semas for subsequent releasing. + */ +void +PGReserveSemaphores(int maxSemas, int port) +{ + mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *)); + if (mySemPointers == NULL) + elog(PANIC, "Out of memory in PGReserveSemaphores"); + numSems = 0; + maxSems = maxSemas; + nextSemKey = port * 1000; + + on_shmem_exit(ReleaseSemaphores, 0); +} + +/* + * Release semaphores at shutdown or shmem reinitialization + * + * (called as an on_shmem_exit callback, hence funny argument list) + */ +static void +ReleaseSemaphores(int status, Datum arg) +{ + int i; + + for (i = 0; i < numSems; i++) + PosixSemaphoreKill(mySemPointers[i]); + free(mySemPointers); +} + +/* + * PGSemaphoreCreate + * + * Initialize a PGSemaphore structure to represent a sema with count 1 + */ +void +PGSemaphoreCreate(PGSemaphore sema) +{ + sem_t *newsem; + + /* Can't do this in a backend, because static state is postmaster's */ + Assert(!IsUnderPostmaster); + + if (numSems >= maxSems) + elog(PANIC, "PGSemaphoreCreate: too many semaphores created"); + +#ifdef USE_NAMED_POSIX_SEMAPHORES + *sema = newsem = PosixSemaphoreCreate(); +#else + PosixSemaphoreCreate(sema); + newsem = sema; +#endif + + /* Remember new sema for ReleaseSemaphores */ + mySemPointers[numSems++] = newsem; +} + +/* + * PGSemaphoreReset + * + * Reset a previously-initialized PGSemaphore to have count 0 + */ +void +PGSemaphoreReset(PGSemaphore sema) +{ + /* + * There's no direct API for this in POSIX, so we have to ratchet the + * semaphore down to 0 with repeated trywait's. + */ + for (;;) + { + if (sem_trywait(PG_SEM_REF(sema)) < 0) + { + if (errno == EAGAIN || errno == EDEADLK) + break; /* got it down to 0 */ + if (errno == EINTR) + continue; /* can this happen? */ + fprintf(stderr, "PGSemaphoreReset: sem_trywait failed: %s\n", + strerror(errno)); + proc_exit(1); + } + } +} + +/* + * PGSemaphoreLock + * + * Lock a semaphore (decrement count), blocking if count would be < 0 + */ +void +PGSemaphoreLock(PGSemaphore sema, bool interruptOK) +{ + int errStatus; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + * + * Each time around the loop, we check for a cancel/die interrupt. We + * assume that if such an interrupt comes in while we are waiting, it + * will cause the sem_wait() call to exit with errno == EINTR, so that we + * will be able to service the interrupt (if not in a critical section + * already). + * + * Once we acquire the lock, we do NOT check for an interrupt before + * returning. The caller needs to be able to record ownership of the + * lock before any interrupt can be accepted. + * + * There is a window of a few instructions between CHECK_FOR_INTERRUPTS + * and entering the sem_wait() call. If a cancel/die interrupt occurs in + * that window, we would fail to notice it until after we acquire the + * lock (or get another interrupt to escape the sem_wait()). We can + * avoid this problem by temporarily setting ImmediateInterruptOK to + * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in + * this interval will execute directly. However, there is a huge + * pitfall: there is another window of a few instructions after the + * sem_wait() before we are able to reset ImmediateInterruptOK. If an + * interrupt occurs then, we'll lose control, which means that the + * lock has been acquired but our caller did not get a chance to + * record the fact. Therefore, we only set ImmediateInterruptOK if the + * caller tells us it's OK to do so, ie, the caller does not need to + * record acquiring the lock. (This is currently true for lockmanager + * locks, since the process that granted us the lock did all the + * necessary state updates. It's not true for Posix semaphores used to + * implement LW locks or emulate spinlocks --- but the wait time for + * such locks should not be very long, anyway.) + */ + do + { + ImmediateInterruptOK = interruptOK; + CHECK_FOR_INTERRUPTS(); + errStatus = sem_wait(PG_SEM_REF(sema)); + ImmediateInterruptOK = false; + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreLock: sem_wait failed: %s\n", + strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreUnlock + * + * Unlock a semaphore (increment count) + */ +void +PGSemaphoreUnlock(PGSemaphore sema) +{ + int errStatus; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and unlock the semaphore again. Not clear this + * can really happen, but might as well cope. + */ + do + { + errStatus = sem_post(PG_SEM_REF(sema)); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreUnlock: sem_post failed: %s\n", + strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreTryLock + * + * Lock a semaphore only if able to do so without blocking + */ +bool +PGSemaphoreTryLock(PGSemaphore sema) +{ + int errStatus; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + */ + do + { + errStatus = sem_trywait(PG_SEM_REF(sema)); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + if (errno == EAGAIN || errno == EDEADLK) + return false; /* failed to lock it */ + /* Otherwise we got trouble */ + fprintf(stderr, "PGSemaphoreTryLock: sem_trywait failed: %s\n", + strerror(errno)); + proc_exit(255); + } + + return true; +} diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c new file mode 100644 index 00000000000..d868602de2e --- /dev/null +++ b/src/backend/port/sysv_sema.c @@ -0,0 +1,522 @@ +/*------------------------------------------------------------------------- + * + * sysv_sema.c + * Implement PGSemaphores using SysV semaphore facilities + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/port/sysv_sema.c,v 1.1 2002/05/05 00:03:28 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <errno.h> +#include <signal.h> +#include <unistd.h> +#include <sys/file.h> +#include <sys/types.h> +#ifdef HAVE_SYS_IPC_H +#include <sys/ipc.h> +#endif +#ifdef HAVE_SYS_SEM_H +#include <sys/sem.h> +#endif +#ifdef HAVE_KERNEL_OS_H +#include <kernel/OS.h> +#endif + +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/pg_sema.h" + + +#ifndef HAVE_UNION_SEMUN +union semun +{ + int val; + struct semid_ds *buf; + unsigned short *array; +}; +#endif + +typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */ +typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */ + +/* + * SEMAS_PER_SET is the number of useful semaphores in each semaphore set + * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores + * per set) parameter, which is often around 25. (Less than, because we + * allocate one extra sema in each set for identification purposes.) + */ +#define SEMAS_PER_SET 16 + +#define IPCProtection (0600) /* access/modify by user only */ + +#define PGSemaMagic 537 /* must be less than SEMVMX */ + + +static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */ +static int numSemaSets; /* number of sema sets acquired so far */ +static int maxSemaSets; /* allocated size of mySemaSets array */ +static IpcSemaphoreKey nextSemaKey; /* next key to try using */ +static int nextSemaNumber; /* next free sem num in last sema set */ + + +static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, + int numSems); +static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, + int value); +static void IpcSemaphoreKill(IpcSemaphoreId semId); +static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum); +static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum); +static IpcSemaphoreId IpcSemaphoreCreate(int numSems); +static void ReleaseSemaphores(int status, Datum arg); + + +/* + * InternalIpcSemaphoreCreate + * + * Attempt to create a new semaphore set with the specified key. + * Will fail (return -1) if such a set already exists. + * + * If we fail with a failure code other than collision-with-existing-set, + * print out an error and abort. Other types of errors suggest nonrecoverable + * problems. + */ +static IpcSemaphoreId +InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems) +{ + int semId; + + semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection); + + if (semId < 0) + { + /* + * Fail quietly if error indicates a collision with existing set. + * One would expect EEXIST, given that we said IPC_EXCL, but + * perhaps we could get a permission violation instead? Also, + * EIDRM might occur if an old set is slated for destruction but + * not gone yet. + */ + if (errno == EEXIST || errno == EACCES +#ifdef EIDRM + || errno == EIDRM +#endif + ) + return -1; + + /* + * Else complain and abort + */ + fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n", + (int) semKey, numSems, (IPC_CREAT | IPC_EXCL | IPCProtection), + strerror(errno)); + + if (errno == ENOSPC) + fprintf(stderr, + "\nThis error does *not* mean that you have run out of disk space.\n" + "\n" + "It occurs when either the system limit for the maximum number of\n" + "semaphore sets (SEMMNI), or the system wide maximum number of\n" + "semaphores (SEMMNS), would be exceeded. You need to raise the\n" + "respective kernel parameter. Alternatively, reduce PostgreSQL's\n" + "consumption of semaphores by reducing its max_connections parameter\n" + "(currently %d).\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "configuring your system for PostgreSQL.\n\n", + MaxBackends); + + proc_exit(1); + } + + return semId; +} + +/* + * Initialize a semaphore to the specified value. + */ +static void +IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value) +{ + union semun semun; + + semun.val = value; + if (semctl(semId, semNum, SETVAL, semun) < 0) + { + fprintf(stderr, "IpcSemaphoreInitialize: semctl(id=%d, %d, SETVAL, %d) failed: %s\n", + semId, semNum, value, strerror(errno)); + + if (errno == ERANGE) + fprintf(stderr, + "You possibly need to raise your kernel's SEMVMX value to be at least\n" + "%d. Look into the PostgreSQL documentation for details.\n", + value); + + proc_exit(1); + } +} + +/* + * IpcSemaphoreKill(semId) - removes a semaphore set + */ +static void +IpcSemaphoreKill(IpcSemaphoreId semId) +{ + union semun semun; + + semun.val = 0; /* unused, but keep compiler quiet */ + + if (semctl(semId, 0, IPC_RMID, semun) < 0) + fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n", + semId, strerror(errno)); + + /* + * We used to report a failure via elog(WARNING), but that's pretty + * pointless considering any client has long since disconnected ... + */ +} + +/* Get the current value (semval) of the semaphore */ +static int +IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETVAL, dummy); +} + +/* Get the PID of the last process to do semop() on the semaphore */ +static pid_t +IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETPID, dummy); +} + + +/* + * Create a semaphore set with the given number of useful semaphores + * (an additional sema is actually allocated to serve as identifier). + * Dead Postgres sema sets are recycled if found, but we do not fail + * upon collision with non-Postgres sema sets. + * + * The idea here is to detect and re-use keys that may have been assigned + * by a crashed postmaster or backend. + */ +static IpcSemaphoreId +IpcSemaphoreCreate(int numSems) +{ + IpcSemaphoreId semId; + union semun semun; + PGSemaphoreData mysema; + + /* Loop till we find a free IPC key */ + for (nextSemaKey++; ; nextSemaKey++) + { + pid_t creatorPID; + + /* Try to create new semaphore set */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* See if it looks to be leftover from a dead Postgres process */ + semId = semget(nextSemaKey, numSems + 1, 0); + if (semId < 0) + continue; /* failed: must be some other app's */ + if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic) + continue; /* sema belongs to a non-Postgres app */ + + /* + * If the creator PID is my own PID or does not belong to any + * extant process, it's safe to zap it. + */ + creatorPID = IpcSemaphoreGetLastPID(semId, numSems); + if (creatorPID <= 0) + continue; /* oops, GETPID failed */ + if (creatorPID != getpid()) + { + if (kill(creatorPID, 0) == 0 || + errno != ESRCH) + continue; /* sema belongs to a live process */ + } + + /* + * The sema set appears to be from a dead Postgres process, or + * from a previous cycle of life in this same process. Zap it, if + * possible. This probably shouldn't fail, but if it does, assume + * the sema set belongs to someone else after all, and continue + * quietly. + */ + semun.val = 0; /* unused, but keep compiler quiet */ + if (semctl(semId, 0, IPC_RMID, semun) < 0) + continue; + + /* + * Now try again to create the sema set. + */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* + * Can only get here if some other process managed to create the + * same sema key before we did. Let him have that one, loop + * around to try next key. + */ + } + + /* + * OK, we created a new sema set. Mark it as created by this process. + * We do this by setting the spare semaphore to PGSemaMagic-1 and then + * incrementing it with semop(). That leaves it with value + * PGSemaMagic and sempid referencing this process. + */ + IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1); + mysema.semId = semId; + mysema.semNum = numSems; + PGSemaphoreUnlock(&mysema); + + return semId; +} + + +/* + * PGReserveSemaphores --- initialize semaphore support + * + * This is called during postmaster start or shared memory reinitialization. + * It should do whatever is needed to be able to support up to maxSemas + * subsequent PGSemaphoreCreate calls. Also, if any system resources + * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit + * callback to release them. + * + * The port number is passed for possible use as a key (for SysV, we use + * it to generate the starting semaphore key). In a standalone backend, + * zero will be passed. + * + * In the SysV implementation, we acquire semaphore sets on-demand; the + * maxSemas parameter is just used to size the array that keeps track of + * acquired sets for subsequent releasing. + */ +void +PGReserveSemaphores(int maxSemas, int port) +{ + maxSemaSets = (maxSemas + SEMAS_PER_SET-1) / SEMAS_PER_SET; + mySemaSets = (IpcSemaphoreId *) + malloc(maxSemaSets * sizeof(IpcSemaphoreId)); + if (mySemaSets == NULL) + elog(PANIC, "Out of memory in PGReserveSemaphores"); + numSemaSets = 0; + nextSemaKey = port * 1000; + nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */ + + on_shmem_exit(ReleaseSemaphores, 0); +} + +/* + * Release semaphores at shutdown or shmem reinitialization + * + * (called as an on_shmem_exit callback, hence funny argument list) + */ +static void +ReleaseSemaphores(int status, Datum arg) +{ + int i; + + for (i = 0; i < numSemaSets; i++) + IpcSemaphoreKill(mySemaSets[i]); + free(mySemaSets); +} + +/* + * PGSemaphoreCreate + * + * Initialize a PGSemaphore structure to represent a sema with count 1 + */ +void +PGSemaphoreCreate(PGSemaphore sema) +{ + /* Can't do this in a backend, because static state is postmaster's */ + Assert(!IsUnderPostmaster); + + if (nextSemaNumber >= SEMAS_PER_SET) + { + /* Time to allocate another semaphore set */ + if (numSemaSets >= maxSemaSets) + elog(PANIC, "PGSemaphoreCreate: too many semaphores created"); + mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET); + numSemaSets++; + nextSemaNumber = 0; + } + /* Assign the next free semaphore in the current set */ + sema->semId = mySemaSets[numSemaSets-1]; + sema->semNum = nextSemaNumber++; + /* Initialize it to count 1 */ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 1); +} + +/* + * PGSemaphoreReset + * + * Reset a previously-initialized PGSemaphore to have count 0 + */ +void +PGSemaphoreReset(PGSemaphore sema) +{ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 0); +} + +/* + * PGSemaphoreLock + * + * Lock a semaphore (decrement count), blocking if count would be < 0 + */ +void +PGSemaphoreLock(PGSemaphore sema, bool interruptOK) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + * + * Each time around the loop, we check for a cancel/die interrupt. We + * assume that if such an interrupt comes in while we are waiting, it + * will cause the semop() call to exit with errno == EINTR, so that we + * will be able to service the interrupt (if not in a critical section + * already). + * + * Once we acquire the lock, we do NOT check for an interrupt before + * returning. The caller needs to be able to record ownership of the + * lock before any interrupt can be accepted. + * + * There is a window of a few instructions between CHECK_FOR_INTERRUPTS + * and entering the semop() call. If a cancel/die interrupt occurs in + * that window, we would fail to notice it until after we acquire the + * lock (or get another interrupt to escape the semop()). We can + * avoid this problem by temporarily setting ImmediateInterruptOK to + * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in + * this interval will execute directly. However, there is a huge + * pitfall: there is another window of a few instructions after the + * semop() before we are able to reset ImmediateInterruptOK. If an + * interrupt occurs then, we'll lose control, which means that the + * lock has been acquired but our caller did not get a chance to + * record the fact. Therefore, we only set ImmediateInterruptOK if the + * caller tells us it's OK to do so, ie, the caller does not need to + * record acquiring the lock. (This is currently true for lockmanager + * locks, since the process that granted us the lock did all the + * necessary state updates. It's not true for SysV semaphores used to + * implement LW locks or emulate spinlocks --- but the wait time for + * such locks should not be very long, anyway.) + */ + do + { + ImmediateInterruptOK = interruptOK; + CHECK_FOR_INTERRUPTS(); + errStatus = semop(sema->semId, &sops, 1); + ImmediateInterruptOK = false; + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreLock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreUnlock + * + * Unlock a semaphore (increment count) + */ +void +PGSemaphoreUnlock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = 1; /* increment */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and unlock the semaphore again. Not clear this + * can really happen, but might as well cope. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreUnlock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreTryLock + * + * Lock a semaphore only if able to do so without blocking + */ +bool +PGSemaphoreTryLock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = IPC_NOWAIT; /* but don't block */ + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */ +#ifdef EAGAIN + if (errno == EAGAIN) + return false; /* failed to lock it */ +#endif +#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN)) + if (errno == EWOULDBLOCK) + return false; /* failed to lock it */ +#endif + /* Otherwise we got trouble */ + fprintf(stderr, "PGSemaphoreTryLock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } + + return true; +} diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c new file mode 100644 index 00000000000..41d5bdd374c --- /dev/null +++ b/src/backend/port/sysv_shmem.c @@ -0,0 +1,400 @@ +/*------------------------------------------------------------------------- + * + * sysv_shmem.c + * Implement shared memory using SysV facilities + * + * These routines represent a fairly thin layer on top of SysV shared + * memory functionality. + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/port/sysv_shmem.c,v 1.1 2002/05/05 00:03:28 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <errno.h> +#include <signal.h> +#include <unistd.h> +#include <sys/file.h> +#include <sys/types.h> +#ifdef HAVE_SYS_IPC_H +#include <sys/ipc.h> +#endif +#ifdef HAVE_SYS_SHM_H +#include <sys/shm.h> +#endif +#ifdef HAVE_KERNEL_OS_H +#include <kernel/OS.h> +#endif + +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/pg_shmem.h" + + +typedef uint32 IpcMemoryKey; /* shared memory key passed to shmget(2) */ +typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ + +#define IPCProtection (0600) /* access/modify by user only */ + + +static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size); +static void IpcMemoryDetach(int status, Datum shmaddr); +static void IpcMemoryDelete(int status, Datum shmId); +static void *PrivateMemoryCreate(uint32 size); +static void PrivateMemoryDelete(int status, Datum memaddr); + + +/* + * InternalIpcMemoryCreate(memKey, size) + * + * Attempt to create a new shared memory segment with the specified key. + * Will fail (return NULL) if such a segment already exists. If successful, + * attach the segment to the current process and return its attached address. + * On success, callbacks are registered with on_shmem_exit to detach and + * delete the segment when on_shmem_exit is called. + * + * If we fail with a failure code other than collision-with-existing-segment, + * print out an error and abort. Other types of errors are not recoverable. + */ +static void * +InternalIpcMemoryCreate(IpcMemoryKey memKey, uint32 size) +{ + IpcMemoryId shmid; + void *memAddress; + + shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection); + + if (shmid < 0) + { + /* + * Fail quietly if error indicates a collision with existing + * segment. One would expect EEXIST, given that we said IPC_EXCL, + * but perhaps we could get a permission violation instead? Also, + * EIDRM might occur if an old seg is slated for destruction but + * not gone yet. + */ + if (errno == EEXIST || errno == EACCES +#ifdef EIDRM + || errno == EIDRM +#endif + ) + return NULL; + + /* + * Else complain and abort + */ + fprintf(stderr, "IpcMemoryCreate: shmget(key=%d, size=%u, 0%o) failed: %s\n", + (int) memKey, size, (IPC_CREAT | IPC_EXCL | IPCProtection), + strerror(errno)); + + if (errno == EINVAL) + fprintf(stderr, + "\nThis error usually means that PostgreSQL's request for a shared memory\n" + "segment exceeded your kernel's SHMMAX parameter. You can either\n" + "reduce the request size or reconfigure the kernel with larger SHMMAX.\n" + "To reduce the request size (currently %u bytes), reduce\n" + "PostgreSQL's shared_buffers parameter (currently %d) and/or\n" + "its max_connections parameter (currently %d).\n" + "\n" + "If the request size is already small, it's possible that it is less than\n" + "your kernel's SHMMIN parameter, in which case raising the request size or\n" + "reconfiguring SHMMIN is called for.\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "shared memory configuration.\n\n", + size, NBuffers, MaxBackends); + + else if (errno == ENOMEM) + fprintf(stderr, + "\nThis error usually means that PostgreSQL's request for a shared\n" + "memory segment exceeded available memory or swap space.\n" + "To reduce the request size (currently %u bytes), reduce\n" + "PostgreSQL's shared_buffers parameter (currently %d) and/or\n" + "its max_connections parameter (currently %d).\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "shared memory configuration.\n\n", + size, NBuffers, MaxBackends); + + else if (errno == ENOSPC) + fprintf(stderr, + "\nThis error does *not* mean that you have run out of disk space.\n" + "\n" + "It occurs either if all available shared memory IDs have been taken,\n" + "in which case you need to raise the SHMMNI parameter in your kernel,\n" + "or because the system's overall limit for shared memory has been\n" + "reached. If you cannot increase the shared memory limit,\n" + "reduce PostgreSQL's shared memory request (currently %u bytes),\n" + "by reducing its shared_buffers parameter (currently %d) and/or\n" + "its max_connections parameter (currently %d).\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "shared memory configuration.\n\n", + size, NBuffers, MaxBackends); + + proc_exit(1); + } + + /* Register on-exit routine to delete the new segment */ + on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid)); + + /* OK, should be able to attach to the segment */ +#if defined(solaris) && defined(__sparc__) + /* use intimate shared memory on SPARC Solaris */ + memAddress = shmat(shmid, 0, SHM_SHARE_MMU); +#else + memAddress = shmat(shmid, 0, 0); +#endif + + if (memAddress == (void *) -1) + { + fprintf(stderr, "IpcMemoryCreate: shmat(id=%d) failed: %s\n", + shmid, strerror(errno)); + proc_exit(1); + } + + /* Register on-exit routine to detach new segment before deleting */ + on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress)); + + /* Record key and ID in lockfile for data directory. */ + RecordSharedMemoryInLockFile((unsigned long) memKey, + (unsigned long) shmid); + + return memAddress; +} + +/****************************************************************************/ +/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */ +/* from process' address spaceq */ +/* (called as an on_shmem_exit callback, hence funny argument list) */ +/****************************************************************************/ +static void +IpcMemoryDetach(int status, Datum shmaddr) +{ + if (shmdt(DatumGetPointer(shmaddr)) < 0) + fprintf(stderr, "IpcMemoryDetach: shmdt(%p) failed: %s\n", + DatumGetPointer(shmaddr), strerror(errno)); + + /* + * We used to report a failure via elog(WARNING), but that's pretty + * pointless considering any client has long since disconnected ... + */ +} + +/****************************************************************************/ +/* IpcMemoryDelete(status, shmId) deletes a shared memory segment */ +/* (called as an on_shmem_exit callback, hence funny argument list) */ +/****************************************************************************/ +static void +IpcMemoryDelete(int status, Datum shmId) +{ + if (shmctl(DatumGetInt32(shmId), IPC_RMID, (struct shmid_ds *) NULL) < 0) + fprintf(stderr, "IpcMemoryDelete: shmctl(%d, %d, 0) failed: %s\n", + DatumGetInt32(shmId), IPC_RMID, strerror(errno)); + + /* + * We used to report a failure via elog(WARNING), but that's pretty + * pointless considering any client has long since disconnected ... + */ +} + +/* + * PGSharedMemoryIsInUse + * + * Is a previously-existing shmem segment still existing and in use? + */ +bool +PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) +{ + IpcMemoryId shmId = (IpcMemoryId) id2; + struct shmid_ds shmStat; + + /* + * We detect whether a shared memory segment is in use by seeing + * whether it (a) exists and (b) has any processes are attached to it. + * + * If we are unable to perform the stat operation for a reason other than + * nonexistence of the segment (most likely, because it doesn't belong + * to our userid), assume it is in use. + */ + if (shmctl(shmId, IPC_STAT, &shmStat) < 0) + { + /* + * EINVAL actually has multiple possible causes documented in the + * shmctl man page, but we assume it must mean the segment no + * longer exists. + */ + if (errno == EINVAL) + return false; + /* Else assume segment is in use */ + return true; + } + /* If it has attached processes, it's in use */ + if (shmStat.shm_nattch != 0) + return true; + return false; +} + + +/* ---------------------------------------------------------------- + * private memory support + * + * Rather than allocating shmem segments with IPC_PRIVATE key, we + * just malloc() the requested amount of space. This code emulates + * the needed shmem functions. + * ---------------------------------------------------------------- + */ + +static void * +PrivateMemoryCreate(uint32 size) +{ + void *memAddress; + + memAddress = malloc(size); + if (!memAddress) + { + fprintf(stderr, "PrivateMemoryCreate: malloc(%u) failed\n", size); + proc_exit(1); + } + MemSet(memAddress, 0, size); /* keep Purify quiet */ + + /* Register on-exit routine to release storage */ + on_shmem_exit(PrivateMemoryDelete, PointerGetDatum(memAddress)); + + return memAddress; +} + +static void +PrivateMemoryDelete(int status, Datum memaddr) +{ + free(DatumGetPointer(memaddr)); +} + + +/* + * PGSharedMemoryCreate + * + * Create a shared memory segment of the given size and initialize its + * standard header. Also, register an on_shmem_exit callback to release + * the storage. + * + * Dead Postgres segments are recycled if found, but we do not fail upon + * collision with non-Postgres shmem segments. The idea here is to detect and + * re-use keys that may have been assigned by a crashed postmaster or backend. + * + * The port number is passed for possible use as a key (for SysV, we use + * it to generate the starting shmem key). In a standalone backend, + * zero will be passed. + */ +PGShmemHeader * +PGSharedMemoryCreate(uint32 size, bool makePrivate, int port) +{ + IpcMemoryKey NextShmemSegID; + void *memAddress; + PGShmemHeader *hdr; + + /* Room for a header? */ + Assert(size > MAXALIGN(sizeof(PGShmemHeader))); + + /* Loop till we find a free IPC key */ + NextShmemSegID = port * 1000; + + for (NextShmemSegID++;; NextShmemSegID++) + { + IpcMemoryId shmid; + + /* Special case if creating a private segment --- just malloc() it */ + if (makePrivate) + { + memAddress = PrivateMemoryCreate(size); + break; + } + + /* Try to create new segment */ + memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + if (memAddress) + break; /* successful create and attach */ + + /* See if it looks to be leftover from a dead Postgres process */ + shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0); + if (shmid < 0) + continue; /* failed: must be some other app's */ + +#if defined(solaris) && defined(__sparc__) + /* use intimate shared memory on SPARC Solaris */ + memAddress = shmat(shmid, 0, SHM_SHARE_MMU); +#else + memAddress = shmat(shmid, 0, 0); +#endif + + if (memAddress == (void *) -1) + continue; /* failed: must be some other app's */ + hdr = (PGShmemHeader *) memAddress; + if (hdr->magic != PGShmemMagic) + { + shmdt(memAddress); + continue; /* segment belongs to a non-Postgres app */ + } + + /* + * If the creator PID is my own PID or does not belong to any + * extant process, it's safe to zap it. + */ + if (hdr->creatorPID != getpid()) + { + if (kill(hdr->creatorPID, 0) == 0 || + errno != ESRCH) + { + shmdt(memAddress); + continue; /* segment belongs to a live process */ + } + } + + /* + * The segment appears to be from a dead Postgres process, or from + * a previous cycle of life in this same process. Zap it, if + * possible. This probably shouldn't fail, but if it does, assume + * the segment belongs to someone else after all, and continue + * quietly. + */ + shmdt(memAddress); + if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0) + continue; + + /* + * Now try again to create the segment. + */ + memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); + if (memAddress) + break; /* successful create and attach */ + + /* + * Can only get here if some other process managed to create the + * same shmem key before we did. Let him have that one, loop + * around to try next key. + */ + } + + /* + * OK, we created a new segment. Mark it as created by this process. + * The order of assignments here is critical so that another Postgres + * process can't see the header as valid but belonging to an invalid + * PID! + */ + hdr = (PGShmemHeader *) memAddress; + hdr->creatorPID = getpid(); + hdr->magic = PGShmemMagic; + + /* + * Initialize space allocation status for segment. + */ + hdr->totalsize = size; + hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); + + return hdr; +} |