aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2021-03-01 12:06:09 +1300
committerThomas Munro <tmunro@postgresql.org>2021-03-01 14:12:02 +1300
commit6a2a70a02018d6362f9841cc2f499cc45405e86b (patch)
tree28a7760914dd066dbd2310cc29e893ac7c6b13a0
parent83709a0d5a46559db016c50ded1a95fd3b0d3be6 (diff)
downloadpostgresql-6a2a70a02018d6362f9841cc2f499cc45405e86b.tar.gz
postgresql-6a2a70a02018d6362f9841cc2f499cc45405e86b.zip
Use signalfd(2) for epoll latches.
Cut down on system calls and other overheads by reading from a signalfd instead of using a signal handler and self-pipe. Affects Linux sytems, and possibly others including illumos that implement the Linux epoll and signalfd interfaces. Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CA+hUKGJjxPDpzBE0a3hyUywBvaZuC89yx3jK9RFZgfv_KHU7gg@mail.gmail.com
-rw-r--r--src/backend/libpq/pqsignal.c4
-rw-r--r--src/backend/storage/ipc/latch.c159
-rw-r--r--src/backend/utils/init/miscinit.c10
3 files changed, 118 insertions, 55 deletions
diff --git a/src/backend/libpq/pqsignal.c b/src/backend/libpq/pqsignal.c
index b43af220303..dedf3a456d8 100644
--- a/src/backend/libpq/pqsignal.c
+++ b/src/backend/libpq/pqsignal.c
@@ -35,13 +35,15 @@ sigset_t UnBlockSig,
* collection; it's essentially BlockSig minus SIGTERM, SIGQUIT, SIGALRM.
*
* UnBlockSig is the set of signals to block when we don't want to block
- * signals (is this ever nonzero??)
+ * signals.
*/
void
pqinitmask(void)
{
sigemptyset(&UnBlockSig);
+ /* Note: InitializeLatchSupport() modifies UnBlockSig. */
+
/* First set all signals, then clear some. */
sigfillset(&BlockSig);
sigfillset(&StartupBlockSig);
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 0f274280e65..ea6d9948abe 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -3,21 +3,20 @@
* latch.c
* Routines for inter-process latches
*
- * The Unix implementation uses the so-called self-pipe trick to overcome the
- * race condition involved with poll() (or epoll_wait() on linux) and setting
- * a global flag in the signal handler. When a latch is set and the current
- * process is waiting for it, the signal handler wakes up the poll() in
- * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
- * poll() on all platforms, and even on platforms where it does, a signal that
- * arrives just before the poll() call does not prevent poll() from entering
- * sleep. An incoming byte on a pipe however reliably interrupts the sleep,
- * and causes poll() to return immediately even if the signal arrives before
- * poll() begins.
+ * The poll() implementation uses the so-called self-pipe trick to overcome the
+ * race condition involved with poll() and setting a global flag in the signal
+ * handler. When a latch is set and the current process is waiting for it, the
+ * signal handler wakes up the poll() in WaitLatch by writing a byte to a pipe.
+ * A signal by itself doesn't interrupt poll() on all platforms, and even on
+ * platforms where it does, a signal that arrives just before the poll() call
+ * does not prevent poll() from entering sleep. An incoming byte on a pipe
+ * however reliably interrupts the sleep, and causes poll() to return
+ * immediately even if the signal arrives before poll() begins.
*
- * When SetLatch is called from the same process that owns the latch,
- * SetLatch writes the byte directly to the pipe. If it's owned by another
- * process, SIGURG is sent and the signal handler in the waiting process
- * writes the byte to the pipe on behalf of the signaling process.
+ * The epoll() implementation overcomes the race with a different technique: it
+ * keeps SIGURG blocked and consumes from a signalfd() descriptor instead. We
+ * don't need to register a signal handler or create our own self-pipe. We
+ * assume that any system that has Linux epoll() also has Linux signalfd().
*
* The Windows implementation uses Windows events that are inherited by all
* postmaster child processes. There's no need for the self-pipe trick there.
@@ -46,6 +45,7 @@
#include <poll.h>
#endif
+#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/atomics.h"
@@ -79,6 +79,10 @@
#error "no wait set implementation available"
#endif
+#ifdef WAIT_USE_EPOLL
+#include <sys/signalfd.h>
+#endif
+
/* typedef in latch.h */
struct WaitEventSet
{
@@ -139,7 +143,14 @@ static WaitEventSet *LatchWaitSet;
#ifndef WIN32
/* Are we currently in WaitLatch? The signal handler would like to know. */
static volatile sig_atomic_t waiting = false;
+#endif
+#ifdef WAIT_USE_EPOLL
+/* On Linux, we'll receive SIGURG via a signalfd file descriptor. */
+static int signal_fd = -1;
+#endif
+
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
/* Read and write ends of the self-pipe */
static int selfpipe_readfd = -1;
static int selfpipe_writefd = -1;
@@ -150,8 +161,11 @@ static int selfpipe_owner_pid = 0;
/* Private function prototypes */
static void latch_sigurg_handler(SIGNAL_ARGS);
static void sendSelfPipeByte(void);
-static void drainSelfPipe(void);
-#endif /* WIN32 */
+#endif
+
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
+static void drain(void);
+#endif
#if defined(WAIT_USE_EPOLL)
static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action);
@@ -175,7 +189,7 @@ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
void
InitializeLatchSupport(void)
{
-#ifndef WIN32
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
int pipefd[2];
if (IsUnderPostmaster)
@@ -247,8 +261,21 @@ InitializeLatchSupport(void)
ReserveExternalFD();
pqsignal(SIGURG, latch_sigurg_handler);
-#else
- /* currently, nothing to do here for Windows */
+#endif
+
+#ifdef WAIT_USE_EPOLL
+ sigset_t signalfd_mask;
+
+ /* Block SIGURG, because we'll receive it through a signalfd. */
+ sigaddset(&UnBlockSig, SIGURG);
+
+ /* Set up the signalfd to receive SIGURG notifications. */
+ sigemptyset(&signalfd_mask);
+ sigaddset(&signalfd_mask, SIGURG);
+ signal_fd = signalfd(-1, &signalfd_mask, SFD_NONBLOCK | SFD_CLOEXEC);
+ if (signal_fd < 0)
+ elog(FATAL, "signalfd() failed");
+ ReserveExternalFD();
#endif
}
@@ -273,7 +300,9 @@ InitializeLatchWaitSet(void)
void
ShutdownLatchSupport(void)
{
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
pqsignal(SIGURG, SIG_IGN);
+#endif
if (LatchWaitSet)
{
@@ -281,11 +310,18 @@ ShutdownLatchSupport(void)
LatchWaitSet = NULL;
}
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
close(selfpipe_readfd);
close(selfpipe_writefd);
selfpipe_readfd = -1;
selfpipe_writefd = -1;
selfpipe_owner_pid = InvalidPid;
+#endif
+
+#if defined(WAIT_USE_EPOLL)
+ close(signal_fd);
+ signal_fd = -1;
+#endif
}
/*
@@ -299,10 +335,10 @@ InitLatch(Latch *latch)
latch->owner_pid = MyProcPid;
latch->is_shared = false;
-#ifndef WIN32
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
/* Assert InitializeLatchSupport has been called in this process */
Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
-#else
+#elif defined(WAIT_USE_WIN32)
latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
if (latch->event == NULL)
elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
@@ -363,7 +399,7 @@ OwnLatch(Latch *latch)
/* Sanity checks */
Assert(latch->is_shared);
-#ifndef WIN32
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
/* Assert InitializeLatchSupport has been called in this process */
Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
#endif
@@ -550,9 +586,9 @@ SetLatch(Latch *latch)
/*
* See if anyone's waiting for the latch. It can be the current process if
- * we're in a signal handler. We use the self-pipe to wake up the
- * poll()/epoll_wait() in that case. If it's another process, send a
- * signal.
+ * we're in a signal handler. We use the self-pipe or SIGURG to ourselves
+ * to wake up WaitEventSetWaitBlock() without races in that case. If it's
+ * another process, send a signal.
*
* Fetch owner_pid only once, in case the latch is concurrently getting
* owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
@@ -575,11 +611,17 @@ SetLatch(Latch *latch)
return;
else if (owner_pid == MyProcPid)
{
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
if (waiting)
sendSelfPipeByte();
+#else
+ if (waiting)
+ kill(MyProcPid, SIGURG);
+#endif
}
else
kill(owner_pid, SIGURG);
+
#else
/*
@@ -856,8 +898,13 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
{
set->latch = latch;
set->latch_pos = event->pos;
-#ifndef WIN32
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
event->fd = selfpipe_readfd;
+#elif defined(WAIT_USE_EPOLL)
+ event->fd = signal_fd;
+#else
+ event->fd = PGINVALID_SOCKET;
+ return event->pos;
#endif
}
else if (events == WL_POSTMASTER_DEATH)
@@ -932,12 +979,13 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
if (latch && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process");
set->latch = latch;
+
/*
* On Unix, we don't need to modify the kernel object because the
- * underlying pipe is the same for all latches so we can return
- * immediately. On Windows, we need to update our array of handles,
- * but we leave the old one in place and tolerate spurious wakeups if
- * the latch is disabled.
+ * underlying pipe (if there is one) is the same for all latches so we
+ * can return immediately. On Windows, we need to update our array of
+ * handles, but we leave the old one in place and tolerate spurious
+ * wakeups if the latch is disabled.
*/
#if defined(WAIT_USE_WIN32)
if (!latch)
@@ -1421,8 +1469,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
if (cur_event->events == WL_LATCH_SET &&
cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
{
- /* There's data in the self-pipe, clear it. */
- drainSelfPipe();
+ /* Drain the signalfd. */
+ drain();
if (set->latch && set->latch->is_set)
{
@@ -1575,7 +1623,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
cur_kqueue_event->filter == EVFILT_READ)
{
/* There's data in the self-pipe, clear it. */
- drainSelfPipe();
+ drain();
if (set->latch && set->latch->is_set)
{
@@ -1691,7 +1739,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
(cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
{
/* There's data in the self-pipe, clear it. */
- drainSelfPipe();
+ drain();
if (set->latch && set->latch->is_set)
{
@@ -1951,7 +1999,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
}
#endif
-#ifndef WIN32
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
+
/*
* SetLatch uses SIGURG to wake up the process waiting on the latch.
*
@@ -1967,10 +2016,8 @@ latch_sigurg_handler(SIGNAL_ARGS)
errno = save_errno;
}
-#endif /* !WIN32 */
/* Send one byte to the self-pipe, to wake up WaitLatch */
-#ifndef WIN32
static void
sendSelfPipeByte(void)
{
@@ -2000,45 +2047,58 @@ retry:
return;
}
}
-#endif /* !WIN32 */
+
+#endif
+
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
/*
- * Read all available data from the self-pipe
+ * Read all available data from self-pipe or signalfd.
*
* Note: this is only called when waiting = true. If it fails and doesn't
* return, it must reset that flag first (though ideally, this will never
* happen).
*/
-#ifndef WIN32
static void
-drainSelfPipe(void)
+drain(void)
{
- /*
- * There shouldn't normally be more than one byte in the pipe, or maybe a
- * few bytes if multiple processes run SetLatch at the same instant.
- */
- char buf[16];
+ char buf[1024];
int rc;
+ int fd;
+
+#ifdef WAIT_USE_POLL
+ fd = selfpipe_readfd;
+#else
+ fd = signal_fd;
+#endif
for (;;)
{
- rc = read(selfpipe_readfd, buf, sizeof(buf));
+ rc = read(fd, buf, sizeof(buf));
if (rc < 0)
{
if (errno == EAGAIN || errno == EWOULDBLOCK)
- break; /* the pipe is empty */
+ break; /* the descriptor is empty */
else if (errno == EINTR)
continue; /* retry */
else
{
waiting = false;
+#ifdef WAIT_USE_POLL
elog(ERROR, "read() on self-pipe failed: %m");
+#else
+ elog(ERROR, "read() on signalfd failed: %m");
+#endif
}
}
else if (rc == 0)
{
waiting = false;
+#ifdef WAIT_USE_POLL
elog(ERROR, "unexpected EOF on self-pipe");
+#else
+ elog(ERROR, "unexpected EOF on signalfd");
+#endif
}
else if (rc < sizeof(buf))
{
@@ -2048,4 +2108,5 @@ drainSelfPipe(void)
/* else buffer wasn't big enough, so read again */
}
}
-#endif /* !WIN32 */
+
+#endif
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 734c66d4e84..e6550f99eec 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -118,6 +118,11 @@ InitPostmasterChild(void)
/* We don't want the postmaster's proc_exit() handlers */
on_exit_reset();
+ /* In EXEC_BACKEND case we will not have inherited BlockSig etc values */
+#ifdef EXEC_BACKEND
+ pqinitmask();
+#endif
+
/* Initialize process-local latch support */
InitializeLatchSupport();
MyLatch = &LocalLatchData;
@@ -135,11 +140,6 @@ InitPostmasterChild(void)
elog(FATAL, "setsid() failed: %m");
#endif
- /* In EXEC_BACKEND case we will not have inherited BlockSig etc values */
-#ifdef EXEC_BACKEND
- pqinitmask();
-#endif
-
/*
* Every postmaster child process is expected to respond promptly to
* SIGQUIT at all times. Therefore we centrally remove SIGQUIT from