diff options
Diffstat (limited to 'src/backend/port/sysv_sema.c')
-rw-r--r-- | src/backend/port/sysv_sema.c | 522 |
1 files changed, 522 insertions, 0 deletions
diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c new file mode 100644 index 00000000000..d868602de2e --- /dev/null +++ b/src/backend/port/sysv_sema.c @@ -0,0 +1,522 @@ +/*------------------------------------------------------------------------- + * + * sysv_sema.c + * Implement PGSemaphores using SysV semaphore facilities + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/port/sysv_sema.c,v 1.1 2002/05/05 00:03:28 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <errno.h> +#include <signal.h> +#include <unistd.h> +#include <sys/file.h> +#include <sys/types.h> +#ifdef HAVE_SYS_IPC_H +#include <sys/ipc.h> +#endif +#ifdef HAVE_SYS_SEM_H +#include <sys/sem.h> +#endif +#ifdef HAVE_KERNEL_OS_H +#include <kernel/OS.h> +#endif + +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/pg_sema.h" + + +#ifndef HAVE_UNION_SEMUN +union semun +{ + int val; + struct semid_ds *buf; + unsigned short *array; +}; +#endif + +typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */ +typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */ + +/* + * SEMAS_PER_SET is the number of useful semaphores in each semaphore set + * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores + * per set) parameter, which is often around 25. (Less than, because we + * allocate one extra sema in each set for identification purposes.) + */ +#define SEMAS_PER_SET 16 + +#define IPCProtection (0600) /* access/modify by user only */ + +#define PGSemaMagic 537 /* must be less than SEMVMX */ + + +static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */ +static int numSemaSets; /* number of sema sets acquired so far */ +static int maxSemaSets; /* allocated size of mySemaSets array */ +static IpcSemaphoreKey nextSemaKey; /* next key to try using */ +static int nextSemaNumber; /* next free sem num in last sema set */ + + +static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, + int numSems); +static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, + int value); +static void IpcSemaphoreKill(IpcSemaphoreId semId); +static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum); +static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum); +static IpcSemaphoreId IpcSemaphoreCreate(int numSems); +static void ReleaseSemaphores(int status, Datum arg); + + +/* + * InternalIpcSemaphoreCreate + * + * Attempt to create a new semaphore set with the specified key. + * Will fail (return -1) if such a set already exists. + * + * If we fail with a failure code other than collision-with-existing-set, + * print out an error and abort. Other types of errors suggest nonrecoverable + * problems. + */ +static IpcSemaphoreId +InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems) +{ + int semId; + + semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection); + + if (semId < 0) + { + /* + * Fail quietly if error indicates a collision with existing set. + * One would expect EEXIST, given that we said IPC_EXCL, but + * perhaps we could get a permission violation instead? Also, + * EIDRM might occur if an old set is slated for destruction but + * not gone yet. + */ + if (errno == EEXIST || errno == EACCES +#ifdef EIDRM + || errno == EIDRM +#endif + ) + return -1; + + /* + * Else complain and abort + */ + fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n", + (int) semKey, numSems, (IPC_CREAT | IPC_EXCL | IPCProtection), + strerror(errno)); + + if (errno == ENOSPC) + fprintf(stderr, + "\nThis error does *not* mean that you have run out of disk space.\n" + "\n" + "It occurs when either the system limit for the maximum number of\n" + "semaphore sets (SEMMNI), or the system wide maximum number of\n" + "semaphores (SEMMNS), would be exceeded. You need to raise the\n" + "respective kernel parameter. Alternatively, reduce PostgreSQL's\n" + "consumption of semaphores by reducing its max_connections parameter\n" + "(currently %d).\n" + "\n" + "The PostgreSQL Administrator's Guide contains more information about\n" + "configuring your system for PostgreSQL.\n\n", + MaxBackends); + + proc_exit(1); + } + + return semId; +} + +/* + * Initialize a semaphore to the specified value. + */ +static void +IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value) +{ + union semun semun; + + semun.val = value; + if (semctl(semId, semNum, SETVAL, semun) < 0) + { + fprintf(stderr, "IpcSemaphoreInitialize: semctl(id=%d, %d, SETVAL, %d) failed: %s\n", + semId, semNum, value, strerror(errno)); + + if (errno == ERANGE) + fprintf(stderr, + "You possibly need to raise your kernel's SEMVMX value to be at least\n" + "%d. Look into the PostgreSQL documentation for details.\n", + value); + + proc_exit(1); + } +} + +/* + * IpcSemaphoreKill(semId) - removes a semaphore set + */ +static void +IpcSemaphoreKill(IpcSemaphoreId semId) +{ + union semun semun; + + semun.val = 0; /* unused, but keep compiler quiet */ + + if (semctl(semId, 0, IPC_RMID, semun) < 0) + fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n", + semId, strerror(errno)); + + /* + * We used to report a failure via elog(WARNING), but that's pretty + * pointless considering any client has long since disconnected ... + */ +} + +/* Get the current value (semval) of the semaphore */ +static int +IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETVAL, dummy); +} + +/* Get the PID of the last process to do semop() on the semaphore */ +static pid_t +IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETPID, dummy); +} + + +/* + * Create a semaphore set with the given number of useful semaphores + * (an additional sema is actually allocated to serve as identifier). + * Dead Postgres sema sets are recycled if found, but we do not fail + * upon collision with non-Postgres sema sets. + * + * The idea here is to detect and re-use keys that may have been assigned + * by a crashed postmaster or backend. + */ +static IpcSemaphoreId +IpcSemaphoreCreate(int numSems) +{ + IpcSemaphoreId semId; + union semun semun; + PGSemaphoreData mysema; + + /* Loop till we find a free IPC key */ + for (nextSemaKey++; ; nextSemaKey++) + { + pid_t creatorPID; + + /* Try to create new semaphore set */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* See if it looks to be leftover from a dead Postgres process */ + semId = semget(nextSemaKey, numSems + 1, 0); + if (semId < 0) + continue; /* failed: must be some other app's */ + if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic) + continue; /* sema belongs to a non-Postgres app */ + + /* + * If the creator PID is my own PID or does not belong to any + * extant process, it's safe to zap it. + */ + creatorPID = IpcSemaphoreGetLastPID(semId, numSems); + if (creatorPID <= 0) + continue; /* oops, GETPID failed */ + if (creatorPID != getpid()) + { + if (kill(creatorPID, 0) == 0 || + errno != ESRCH) + continue; /* sema belongs to a live process */ + } + + /* + * The sema set appears to be from a dead Postgres process, or + * from a previous cycle of life in this same process. Zap it, if + * possible. This probably shouldn't fail, but if it does, assume + * the sema set belongs to someone else after all, and continue + * quietly. + */ + semun.val = 0; /* unused, but keep compiler quiet */ + if (semctl(semId, 0, IPC_RMID, semun) < 0) + continue; + + /* + * Now try again to create the sema set. + */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* + * Can only get here if some other process managed to create the + * same sema key before we did. Let him have that one, loop + * around to try next key. + */ + } + + /* + * OK, we created a new sema set. Mark it as created by this process. + * We do this by setting the spare semaphore to PGSemaMagic-1 and then + * incrementing it with semop(). That leaves it with value + * PGSemaMagic and sempid referencing this process. + */ + IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1); + mysema.semId = semId; + mysema.semNum = numSems; + PGSemaphoreUnlock(&mysema); + + return semId; +} + + +/* + * PGReserveSemaphores --- initialize semaphore support + * + * This is called during postmaster start or shared memory reinitialization. + * It should do whatever is needed to be able to support up to maxSemas + * subsequent PGSemaphoreCreate calls. Also, if any system resources + * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit + * callback to release them. + * + * The port number is passed for possible use as a key (for SysV, we use + * it to generate the starting semaphore key). In a standalone backend, + * zero will be passed. + * + * In the SysV implementation, we acquire semaphore sets on-demand; the + * maxSemas parameter is just used to size the array that keeps track of + * acquired sets for subsequent releasing. + */ +void +PGReserveSemaphores(int maxSemas, int port) +{ + maxSemaSets = (maxSemas + SEMAS_PER_SET-1) / SEMAS_PER_SET; + mySemaSets = (IpcSemaphoreId *) + malloc(maxSemaSets * sizeof(IpcSemaphoreId)); + if (mySemaSets == NULL) + elog(PANIC, "Out of memory in PGReserveSemaphores"); + numSemaSets = 0; + nextSemaKey = port * 1000; + nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */ + + on_shmem_exit(ReleaseSemaphores, 0); +} + +/* + * Release semaphores at shutdown or shmem reinitialization + * + * (called as an on_shmem_exit callback, hence funny argument list) + */ +static void +ReleaseSemaphores(int status, Datum arg) +{ + int i; + + for (i = 0; i < numSemaSets; i++) + IpcSemaphoreKill(mySemaSets[i]); + free(mySemaSets); +} + +/* + * PGSemaphoreCreate + * + * Initialize a PGSemaphore structure to represent a sema with count 1 + */ +void +PGSemaphoreCreate(PGSemaphore sema) +{ + /* Can't do this in a backend, because static state is postmaster's */ + Assert(!IsUnderPostmaster); + + if (nextSemaNumber >= SEMAS_PER_SET) + { + /* Time to allocate another semaphore set */ + if (numSemaSets >= maxSemaSets) + elog(PANIC, "PGSemaphoreCreate: too many semaphores created"); + mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET); + numSemaSets++; + nextSemaNumber = 0; + } + /* Assign the next free semaphore in the current set */ + sema->semId = mySemaSets[numSemaSets-1]; + sema->semNum = nextSemaNumber++; + /* Initialize it to count 1 */ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 1); +} + +/* + * PGSemaphoreReset + * + * Reset a previously-initialized PGSemaphore to have count 0 + */ +void +PGSemaphoreReset(PGSemaphore sema) +{ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 0); +} + +/* + * PGSemaphoreLock + * + * Lock a semaphore (decrement count), blocking if count would be < 0 + */ +void +PGSemaphoreLock(PGSemaphore sema, bool interruptOK) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + * + * Each time around the loop, we check for a cancel/die interrupt. We + * assume that if such an interrupt comes in while we are waiting, it + * will cause the semop() call to exit with errno == EINTR, so that we + * will be able to service the interrupt (if not in a critical section + * already). + * + * Once we acquire the lock, we do NOT check for an interrupt before + * returning. The caller needs to be able to record ownership of the + * lock before any interrupt can be accepted. + * + * There is a window of a few instructions between CHECK_FOR_INTERRUPTS + * and entering the semop() call. If a cancel/die interrupt occurs in + * that window, we would fail to notice it until after we acquire the + * lock (or get another interrupt to escape the semop()). We can + * avoid this problem by temporarily setting ImmediateInterruptOK to + * true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in + * this interval will execute directly. However, there is a huge + * pitfall: there is another window of a few instructions after the + * semop() before we are able to reset ImmediateInterruptOK. If an + * interrupt occurs then, we'll lose control, which means that the + * lock has been acquired but our caller did not get a chance to + * record the fact. Therefore, we only set ImmediateInterruptOK if the + * caller tells us it's OK to do so, ie, the caller does not need to + * record acquiring the lock. (This is currently true for lockmanager + * locks, since the process that granted us the lock did all the + * necessary state updates. It's not true for SysV semaphores used to + * implement LW locks or emulate spinlocks --- but the wait time for + * such locks should not be very long, anyway.) + */ + do + { + ImmediateInterruptOK = interruptOK; + CHECK_FOR_INTERRUPTS(); + errStatus = semop(sema->semId, &sops, 1); + ImmediateInterruptOK = false; + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreLock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreUnlock + * + * Unlock a semaphore (increment count) + */ +void +PGSemaphoreUnlock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = 1; /* increment */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and unlock the semaphore again. Not clear this + * can really happen, but might as well cope. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + fprintf(stderr, "PGSemaphoreUnlock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } +} + +/* + * PGSemaphoreTryLock + * + * Lock a semaphore only if able to do so without blocking + */ +bool +PGSemaphoreTryLock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = IPC_NOWAIT; /* but don't block */ + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were sent a + * signal. So we try and lock the semaphore again. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */ +#ifdef EAGAIN + if (errno == EAGAIN) + return false; /* failed to lock it */ +#endif +#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN)) + if (errno == EWOULDBLOCK) + return false; /* failed to lock it */ +#endif + /* Otherwise we got trouble */ + fprintf(stderr, "PGSemaphoreTryLock: semop(id=%d) failed: %s\n", + sema->semId, strerror(errno)); + proc_exit(255); + } + + return true; +} |