Combine win32 and unix latch implementations.

Previously latches for windows and unix had been implemented in different files. A later patch introduce an expanded wait infrastructure, keeping the implementation separate would introduce too much duplication. This basically just moves the functions, without too much change. The reason to keep this separate is that it allows blame to continue working a little less badly; and to make review a tiny bit easier. Discussion: 20160114143931.GG10941@awork2.anarazel.de
author: Andres Freund <andres@anarazel.de> 2016-03-21 09:56:39 +0100
committer: Andres Freund <andres@anarazel.de> 2016-03-21 11:03:26 +0100
commit: 72e2d21c1249b674496f97cd6009c0bda62f6b4d (patch)
tree: d1bd384ebc65e08446191e4396e9055698093fbf /src/backend/storage/ipc/latch.c
parent: 326d73c86fda407a810675c3b5a48e0a0cc992f5 (diff)
download: postgresql-72e2d21c1249b674496f97cd6009c0bda62f6b4d.tar.gz
postgresql-72e2d21c1249b674496f97cd6009c0bda62f6b4d.zip
1 files changed, 993 insertions, 0 deletions
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
new file mode 100644
index 00000000000..d42c9c6fdf1
--- /dev/null
+++ b/src/backend/storage/ipc/latch.c
@@ -0,0 +1,993 @@
+/*-------------------------------------------------------------------------
+ *
+ * latch.c
+ *	  Routines for inter-process latches
+ *
+ * The Unix implementation uses the so-called self-pipe trick to overcome
+ * the race condition involved with select() and setting a global flag
+ * in the signal handler. When a latch is set and the current process
+ * is waiting for it, the signal handler wakes up the select() in
+ * WaitLatch by writing a byte to a pipe. A signal by itself doesn't
+ * interrupt select() on all platforms, and even on platforms where it
+ * does, a signal that arrives just before the select() call does not
+ * prevent the select() from entering sleep. An incoming byte on a pipe
+ * however reliably interrupts the sleep, and causes select() to return
+ * immediately even if the signal arrives before select() begins.
+ *
+ * (Actually, we prefer poll() over select() where available, but the
+ * same comments apply to it.)
+ *
+ * When SetLatch is called from the same process that owns the latch,
+ * SetLatch writes the byte directly to the pipe. If it's owned by another
+ * process, SIGUSR1 is sent and the signal handler in the waiting process
+ * writes the byte to the pipe on behalf of the signaling process.
+ *
+ * The Windows implementation uses Windows events that are inherited by
+ * all postmaster child processes.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/storage/ipc/latch.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#ifdef HAVE_POLL_H
+#include <poll.h>
+#endif
+#ifdef HAVE_SYS_POLL_H
+#include <sys/poll.h>
+#endif
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+
+#include "miscadmin.h"
+#include "portability/instr_time.h"
+#include "postmaster/postmaster.h"
+#include "storage/barrier.h"
+#include "storage/latch.h"
+#include "storage/pmsignal.h"
+#include "storage/shmem.h"
+
+/*
+ * Select the fd readiness primitive to use. Normally the "most modern"
+ * primitive supported by the OS will be used, but for testing it can be
+ * useful to manually specify the used primitive.  If desired, just add a
+ * define somewhere before this block.
+ */
+#if defined(LATCH_USE_POLL) || defined(LATCH_USE_SELECT) \
+	|| defined(LATCH_USE_WIN32)
+/* don't overwrite manual choice */
+#elif defined(HAVE_POLL)
+#define LATCH_USE_POLL
+#elif HAVE_SYS_SELECT_H
+#define LATCH_USE_SELECT
+#elif WIN32
+#define LATCH_USE_WIN32
+#else
+#error "no latch implementation available"
+#endif
+
+#ifndef WIN32
+/* Are we currently in WaitLatch? The signal handler would like to know. */
+static volatile sig_atomic_t waiting = false;
+
+/* Read and write ends of the self-pipe */
+static int	selfpipe_readfd = -1;
+static int	selfpipe_writefd = -1;
+
+/* Private function prototypes */
+static void sendSelfPipeByte(void);
+static void drainSelfPipe(void);
+#endif   /* WIN32 */
+
+
+/*
+ * Initialize the process-local latch infrastructure.
+ *
+ * This must be called once during startup of any process that can wait on
+ * latches, before it issues any InitLatch() or OwnLatch() calls.
+ */
+void
+InitializeLatchSupport(void)
+{
+#ifndef WIN32
+	int			pipefd[2];
+
+	Assert(selfpipe_readfd == -1);
+
+	/*
+	 * Set up the self-pipe that allows a signal handler to wake up the
+	 * select() in WaitLatch. Make the write-end non-blocking, so that
+	 * SetLatch won't block if the event has already been set many times
+	 * filling the kernel buffer. Make the read-end non-blocking too, so that
+	 * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
+	 */
+	if (pipe(pipefd) < 0)
+		elog(FATAL, "pipe() failed: %m");
+	if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) < 0)
+		elog(FATAL, "fcntl() failed on read-end of self-pipe: %m");
+	if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) < 0)
+		elog(FATAL, "fcntl() failed on write-end of self-pipe: %m");
+
+	selfpipe_readfd = pipefd[0];
+	selfpipe_writefd = pipefd[1];
+#else
+	/* currently, nothing to do here for Windows */
+#endif
+}
+
+/*
+ * Initialize a backend-local latch.
+ */
+void
+InitLatch(volatile Latch *latch)
+{
+	latch->is_set = false;
+	latch->owner_pid = MyProcPid;
+	latch->is_shared = false;
+
+#ifndef WIN32
+	/* Assert InitializeLatchSupport has been called in this process */
+	Assert(selfpipe_readfd >= 0);
+#else
+	latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
+	if (latch->event == NULL)
+		elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
+#endif   /* WIN32 */
+}
+
+/*
+ * Initialize a shared latch that can be set from other processes. The latch
+ * is initially owned by no-one; use OwnLatch to associate it with the
+ * current process.
+ *
+ * InitSharedLatch needs to be called in postmaster before forking child
+ * processes, usually right after allocating the shared memory block
+ * containing the latch with ShmemInitStruct. (The Unix implementation
+ * doesn't actually require that, but the Windows one does.) Because of
+ * this restriction, we have no concurrency issues to worry about here.
+ */
+void
+InitSharedLatch(volatile Latch *latch)
+{
+#ifdef WIN32
+	SECURITY_ATTRIBUTES sa;
+
+	/*
+	 * Set up security attributes to specify that the events are inherited.
+	 */
+	ZeroMemory(&sa, sizeof(sa));
+	sa.nLength = sizeof(sa);
+	sa.bInheritHandle = TRUE;
+
+	latch->event = CreateEvent(&sa, TRUE, FALSE, NULL);
+	if (latch->event == NULL)
+		elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
+#endif
+
+	latch->is_set = false;
+	latch->owner_pid = 0;
+	latch->is_shared = true;
+}
+
+/*
+ * Associate a shared latch with the current process, allowing it to
+ * wait on the latch.
+ *
+ * Although there is a sanity check for latch-already-owned, we don't do
+ * any sort of locking here, meaning that we could fail to detect the error
+ * if two processes try to own the same latch at about the same time.  If
+ * there is any risk of that, caller must provide an interlock to prevent it.
+ *
+ * In any process that calls OwnLatch(), make sure that
+ * latch_sigusr1_handler() is called from the SIGUSR1 signal handler,
+ * as shared latches use SIGUSR1 for inter-process communication.
+ */
+void
+OwnLatch(volatile Latch *latch)
+{
+	/* Sanity checks */
+	Assert(latch->is_shared);
+
+#ifndef WIN32
+	/* Assert InitializeLatchSupport has been called in this process */
+	Assert(selfpipe_readfd >= 0);
+#endif
+
+	if (latch->owner_pid != 0)
+		elog(ERROR, "latch already owned");
+
+	latch->owner_pid = MyProcPid;
+}
+
+/*
+ * Disown a shared latch currently owned by the current process.
+ */
+void
+DisownLatch(volatile Latch *latch)
+{
+	Assert(latch->is_shared);
+	Assert(latch->owner_pid == MyProcPid);
+
+	latch->owner_pid = 0;
+}
+
+/*
+ * Wait for a given latch to be set, or for postmaster death, or until timeout
+ * is exceeded. 'wakeEvents' is a bitmask that specifies which of those events
+ * to wait for. If the latch is already set (and WL_LATCH_SET is given), the
+ * function returns immediately.
+ *
+ * The "timeout" is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
+ * is given.  Although it is declared as "long", we don't actually support
+ * timeouts longer than INT_MAX milliseconds.  Note that some extra overhead
+ * is incurred when WL_TIMEOUT is given, so avoid using a timeout if possible.
+ *
+ * The latch must be owned by the current process, ie. it must be a
+ * backend-local latch initialized with InitLatch, or a shared latch
+ * associated with the current process by calling OwnLatch.
+ *
+ * Returns bit mask indicating which condition(s) caused the wake-up. Note
+ * that if multiple wake-up conditions are true, there is no guarantee that
+ * we return all of them in one call, but we will return at least one.
+ */
+int
+WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
+{
+	return WaitLatchOrSocket(latch, wakeEvents, PGINVALID_SOCKET, timeout);
+}
+
+/*
+ * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
+ * conditions.
+ *
+ * When waiting on a socket, EOF and error conditions are reported by
+ * returning the socket as readable/writable or both, depending on
+ * WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified.
+ */
+#ifndef LATCH_USE_WIN32
+int
+WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
+				  long timeout)
+{
+	int			result = 0;
+	int			rc;
+	instr_time	start_time,
+				cur_time;
+	long		cur_timeout;
+
+#if defined(LATCH_USE_POLL)
+	struct pollfd pfds[3];
+	int			nfds;
+#elif defined(LATCH_USE_SELECT)
+	struct timeval tv,
+			   *tvp;
+	fd_set		input_mask;
+	fd_set		output_mask;
+	int			hifd;
+#endif
+
+	Assert(wakeEvents != 0);	/* must have at least one wake event */
+
+	/* waiting for socket readiness without a socket indicates a bug */
+	if (sock == PGINVALID_SOCKET &&
+		(wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != 0)
+		elog(ERROR, "cannot wait on socket event without a socket");
+
+	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
+		elog(ERROR, "cannot wait on a latch owned by another process");
+
+	/*
+	 * Initialize timeout if requested.  We must record the current time so
+	 * that we can determine the remaining timeout if the poll() or select()
+	 * is interrupted.  (On some platforms, select() will update the contents
+	 * of "tv" for us, but unfortunately we can't rely on that.)
+	 */
+	if (wakeEvents & WL_TIMEOUT)
+	{
+		INSTR_TIME_SET_CURRENT(start_time);
+		Assert(timeout >= 0 && timeout <= INT_MAX);
+		cur_timeout = timeout;
+
+#ifdef LATCH_USE_SELECT
+		tv.tv_sec = cur_timeout / 1000L;
+		tv.tv_usec = (cur_timeout % 1000L) * 1000L;
+		tvp = &tv;
+#endif
+	}
+	else
+	{
+		cur_timeout = -1;
+
+#ifdef LATCH_USE_SELECT
+		tvp = NULL;
+#endif
+	}
+
+	waiting = true;
+	do
+	{
+		/*
+		 * Check if the latch is set already. If so, leave loop immediately,
+		 * avoid blocking again. We don't attempt to report any other events
+		 * that might also be satisfied.
+		 *
+		 * If someone sets the latch between this and the poll()/select()
+		 * below, the setter will write a byte to the pipe (or signal us and
+		 * the signal handler will do that), and the poll()/select() will
+		 * return immediately.
+		 *
+		 * If there's a pending byte in the self pipe, we'll notice whenever
+		 * blocking. Only clearing the pipe in that case avoids having to
+		 * drain it every time WaitLatchOrSocket() is used. Should the
+		 * pipe-buffer fill up we're still ok, because the pipe is in
+		 * nonblocking mode. It's unlikely for that to happen, because the
+		 * self pipe isn't filled unless we're blocking (waiting = true), or
+		 * from inside a signal handler in latch_sigusr1_handler().
+		 *
+		 * Note: we assume that the kernel calls involved in drainSelfPipe()
+		 * and SetLatch() will provide adequate synchronization on machines
+		 * with weak memory ordering, so that we cannot miss seeing is_set if
+		 * the signal byte is already in the pipe when we drain it.
+		 */
+		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
+		{
+			result |= WL_LATCH_SET;
+			break;
+		}
+
+		/*
+		 * Must wait ... we use the polling interface determined at the top of
+		 * this file to do so.
+		 */
+#if defined(LATCH_USE_POLL)
+		nfds = 0;
+
+		/* selfpipe is always in pfds[0] */
+		pfds[0].fd = selfpipe_readfd;
+		pfds[0].events = POLLIN;
+		pfds[0].revents = 0;
+		nfds++;
+
+		if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
+		{
+			/* socket, if used, is always in pfds[1] */
+			pfds[1].fd = sock;
+			pfds[1].events = 0;
+			if (wakeEvents & WL_SOCKET_READABLE)
+				pfds[1].events |= POLLIN;
+			if (wakeEvents & WL_SOCKET_WRITEABLE)
+				pfds[1].events |= POLLOUT;
+			pfds[1].revents = 0;
+			nfds++;
+		}
+
+		if (wakeEvents & WL_POSTMASTER_DEATH)
+		{
+			/* postmaster fd, if used, is always in pfds[nfds - 1] */
+			pfds[nfds].fd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
+			pfds[nfds].events = POLLIN;
+			pfds[nfds].revents = 0;
+			nfds++;
+		}
+
+		/* Sleep */
+		rc = poll(pfds, nfds, (int) cur_timeout);
+
+		/* Check return code */
+		if (rc < 0)
+		{
+			/* EINTR is okay, otherwise complain */
+			if (errno != EINTR)
+			{
+				waiting = false;
+				ereport(ERROR,
+						(errcode_for_socket_access(),
+						 errmsg("poll() failed: %m")));
+			}
+		}
+		else if (rc == 0)
+		{
+			/* timeout exceeded */
+			if (wakeEvents & WL_TIMEOUT)
+				result |= WL_TIMEOUT;
+		}
+		else
+		{
+			/* at least one event occurred, so check revents values */
+
+			if (pfds[0].revents & POLLIN)
+			{
+				/* There's data in the self-pipe, clear it. */
+				drainSelfPipe();
+			}
+
+			if ((wakeEvents & WL_SOCKET_READABLE) &&
+				(pfds[1].revents & POLLIN))
+			{
+				/* data available in socket, or EOF/error condition */
+				result |= WL_SOCKET_READABLE;
+			}
+			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
+				(pfds[1].revents & POLLOUT))
+			{
+				/* socket is writable */
+				result |= WL_SOCKET_WRITEABLE;
+			}
+			if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
+				(pfds[1].revents & (POLLHUP | POLLERR | POLLNVAL)))
+			{
+				/* EOF/error condition */
+				if (wakeEvents & WL_SOCKET_READABLE)
+					result |= WL_SOCKET_READABLE;
+				if (wakeEvents & WL_SOCKET_WRITEABLE)
+					result |= WL_SOCKET_WRITEABLE;
+			}
+
+			/*
+			 * We expect a POLLHUP when the remote end is closed, but because
+			 * we don't expect the pipe to become readable or to have any
+			 * errors either, treat those cases as postmaster death, too.
+			 */
+			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+				(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
+			{
+				/*
+				 * According to the select(2) man page on Linux, select(2) may
+				 * spuriously return and report a file descriptor as readable,
+				 * when it's not; and presumably so can poll(2).  It's not
+				 * clear that the relevant cases would ever apply to the
+				 * postmaster pipe, but since the consequences of falsely
+				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+				 * we take the trouble to positively verify EOF with
+				 * PostmasterIsAlive().
+				 */
+				if (!PostmasterIsAlive())
+					result |= WL_POSTMASTER_DEATH;
+			}
+		}
+#elif defined(LATCH_USE_SELECT)
+
+		/*
+		 * On at least older linux kernels select(), in violation of POSIX,
+		 * doesn't reliably return a socket as writable if closed - but we
+		 * rely on that. So far all the known cases of this problem are on
+		 * platforms that also provide a poll() implementation without that
+		 * bug.  If we find one where that's not the case, we'll need to add a
+		 * workaround.
+		 */
+		FD_ZERO(&input_mask);
+		FD_ZERO(&output_mask);
+
+		FD_SET(selfpipe_readfd, &input_mask);
+		hifd = selfpipe_readfd;
+
+		if (wakeEvents & WL_POSTMASTER_DEATH)
+		{
+			FD_SET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask);
+			if (postmaster_alive_fds[POSTMASTER_FD_WATCH] > hifd)
+				hifd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
+		}
+
+		if (wakeEvents & WL_SOCKET_READABLE)
+		{
+			FD_SET(sock, &input_mask);
+			if (sock > hifd)
+				hifd = sock;
+		}
+
+		if (wakeEvents & WL_SOCKET_WRITEABLE)
+		{
+			FD_SET(sock, &output_mask);
+			if (sock > hifd)
+				hifd = sock;
+		}
+
+		/* Sleep */
+		rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);
+
+		/* Check return code */
+		if (rc < 0)
+		{
+			/* EINTR is okay, otherwise complain */
+			if (errno != EINTR)
+			{
+				waiting = false;
+				ereport(ERROR,
+						(errcode_for_socket_access(),
+						 errmsg("select() failed: %m")));
+			}
+		}
+		else if (rc == 0)
+		{
+			/* timeout exceeded */
+			if (wakeEvents & WL_TIMEOUT)
+				result |= WL_TIMEOUT;
+		}
+		else
+		{
+			/* at least one event occurred, so check masks */
+			if (FD_ISSET(selfpipe_readfd, &input_mask))
+			{
+				/* There's data in the self-pipe, clear it. */
+				drainSelfPipe();
+			}
+			if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
+			{
+				/* data available in socket, or EOF */
+				result |= WL_SOCKET_READABLE;
+			}
+			if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
+			{
+				/* socket is writable, or EOF */
+				result |= WL_SOCKET_WRITEABLE;
+			}
+			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+				FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH],
+						 &input_mask))
+			{
+				/*
+				 * According to the select(2) man page on Linux, select(2) may
+				 * spuriously return and report a file descriptor as readable,
+				 * when it's not; and presumably so can poll(2).  It's not
+				 * clear that the relevant cases would ever apply to the
+				 * postmaster pipe, but since the consequences of falsely
+				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+				 * we take the trouble to positively verify EOF with
+				 * PostmasterIsAlive().
+				 */
+				if (!PostmasterIsAlive())
+					result |= WL_POSTMASTER_DEATH;
+			}
+		}
+#endif   /* LATCH_USE_SELECT */
+
+		/*
+		 * Check again whether latch is set, the arrival of a signal/self-byte
+		 * might be what stopped our sleep. It's not required for correctness
+		 * to signal the latch as being set (we'd just loop if there's no
+		 * other event), but it seems good to report an arrived latch asap.
+		 * This way we also don't have to compute the current timestamp again.
+		 */
+		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
+			result |= WL_LATCH_SET;
+
+		/* If we're not done, update cur_timeout for next iteration */
+		if (result == 0 && (wakeEvents & WL_TIMEOUT))
+		{
+			INSTR_TIME_SET_CURRENT(cur_time);
+			INSTR_TIME_SUBTRACT(cur_time, start_time);
+			cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+			if (cur_timeout <= 0)
+			{
+				/* Timeout has expired, no need to continue looping */
+				result |= WL_TIMEOUT;
+			}
+#ifdef LATCH_USE_SELECT
+			else
+			{
+				tv.tv_sec = cur_timeout / 1000L;
+				tv.tv_usec = (cur_timeout % 1000L) * 1000L;
+			}
+#endif
+		}
+	} while (result == 0);
+	waiting = false;
+
+	return result;
+}
+#else							/* LATCH_USE_WIN32 */
+int
+WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
+				  long timeout)
+{
+	DWORD		rc;
+	instr_time	start_time,
+				cur_time;
+	long		cur_timeout;
+	HANDLE		events[4];
+	HANDLE		latchevent;
+	HANDLE		sockevent = WSA_INVALID_EVENT;
+	int			numevents;
+	int			result = 0;
+	int			pmdeath_eventno = 0;
+
+	Assert(wakeEvents != 0);	/* must have at least one wake event */
+
+	/* waiting for socket readiness without a socket indicates a bug */
+	if (sock == PGINVALID_SOCKET &&
+		(wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != 0)
+		elog(ERROR, "cannot wait on socket event without a socket");
+
+	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
+		elog(ERROR, "cannot wait on a latch owned by another process");
+
+	/*
+	 * Initialize timeout if requested.  We must record the current time so
+	 * that we can determine the remaining timeout if WaitForMultipleObjects
+	 * is interrupted.
+	 */
+	if (wakeEvents & WL_TIMEOUT)
+	{
+		INSTR_TIME_SET_CURRENT(start_time);
+		Assert(timeout >= 0 && timeout <= INT_MAX);
+		cur_timeout = timeout;
+	}
+	else
+		cur_timeout = INFINITE;
+
+	/*
+	 * Construct an array of event handles for WaitforMultipleObjects().
+	 *
+	 * Note: pgwin32_signal_event should be first to ensure that it will be
+	 * reported when multiple events are set.  We want to guarantee that
+	 * pending signals are serviced.
+	 */
+	latchevent = latch->event;
+
+	events[0] = pgwin32_signal_event;
+	events[1] = latchevent;
+	numevents = 2;
+	if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
+	{
+		/* Need an event object to represent events on the socket */
+		int			flags = FD_CLOSE;	/* always check for errors/EOF */
+
+		if (wakeEvents & WL_SOCKET_READABLE)
+			flags |= FD_READ;
+		if (wakeEvents & WL_SOCKET_WRITEABLE)
+			flags |= FD_WRITE;
+
+		sockevent = WSACreateEvent();
+		if (sockevent == WSA_INVALID_EVENT)
+			elog(ERROR, "failed to create event for socket: error code %u",
+				 WSAGetLastError());
+		if (WSAEventSelect(sock, sockevent, flags) != 0)
+			elog(ERROR, "failed to set up event for socket: error code %u",
+				 WSAGetLastError());
+
+		events[numevents++] = sockevent;
+	}
+	if (wakeEvents & WL_POSTMASTER_DEATH)
+	{
+		pmdeath_eventno = numevents;
+		events[numevents++] = PostmasterHandle;
+	}
+
+	/* Ensure that signals are serviced even if latch is already set */
+	pgwin32_dispatch_queued_signals();
+
+	do
+	{
+		/*
+		 * The comment in the unix version above applies here as well. At
+		 * least after mentally replacing self-pipe with windows event.
+		 * There's no danger of overflowing, as "Setting an event that is
+		 * already set has no effect.".
+		 */
+		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
+		{
+			result |= WL_LATCH_SET;
+
+			/*
+			 * Leave loop immediately, avoid blocking again. We don't attempt
+			 * to report any other events that might also be satisfied.
+			 */
+			break;
+		}
+
+		rc = WaitForMultipleObjects(numevents, events, FALSE, cur_timeout);
+
+		if (rc == WAIT_FAILED)
+			elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
+				 GetLastError());
+		else if (rc == WAIT_TIMEOUT)
+		{
+			result |= WL_TIMEOUT;
+		}
+		else if (rc == WAIT_OBJECT_0)
+		{
+			/* Service newly-arrived signals */
+			pgwin32_dispatch_queued_signals();
+		}
+		else if (rc == WAIT_OBJECT_0 + 1)
+		{
+			/*
+			 * Reset the event.  We'll re-check the, potentially, set latch on
+			 * next iteration of loop, but let's not waste the cycles to
+			 * update cur_timeout below.
+			 */
+			if (!ResetEvent(latchevent))
+				elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
+
+			continue;
+		}
+		else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
+				 rc == WAIT_OBJECT_0 + 2)		/* socket is at event slot 2 */
+		{
+			WSANETWORKEVENTS resEvents;
+
+			ZeroMemory(&resEvents, sizeof(resEvents));
+			if (WSAEnumNetworkEvents(sock, sockevent, &resEvents) != 0)
+				elog(ERROR, "failed to enumerate network events: error code %u",
+					 WSAGetLastError());
+			if ((wakeEvents & WL_SOCKET_READABLE) &&
+				(resEvents.lNetworkEvents & FD_READ))
+			{
+				result |= WL_SOCKET_READABLE;
+			}
+			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
+				(resEvents.lNetworkEvents & FD_WRITE))
+			{
+				result |= WL_SOCKET_WRITEABLE;
+			}
+			if (resEvents.lNetworkEvents & FD_CLOSE)
+			{
+				if (wakeEvents & WL_SOCKET_READABLE)
+					result |= WL_SOCKET_READABLE;
+				if (wakeEvents & WL_SOCKET_WRITEABLE)
+					result |= WL_SOCKET_WRITEABLE;
+			}
+		}
+		else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+				 rc == WAIT_OBJECT_0 + pmdeath_eventno)
+		{
+			/*
+			 * Postmaster apparently died.  Since the consequences of falsely
+			 * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we
+			 * take the trouble to positively verify this with
+			 * PostmasterIsAlive(), even though there is no known reason to
+			 * think that the event could be falsely set on Windows.
+			 */
+			if (!PostmasterIsAlive())
+				result |= WL_POSTMASTER_DEATH;
+		}
+		else
+			elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
+
+		/* If we're not done, update cur_timeout for next iteration */
+		if (result == 0 && (wakeEvents & WL_TIMEOUT))
+		{
+			INSTR_TIME_SET_CURRENT(cur_time);
+			INSTR_TIME_SUBTRACT(cur_time, start_time);
+			cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+			if (cur_timeout <= 0)
+			{
+				/* Timeout has expired, no need to continue looping */
+				result |= WL_TIMEOUT;
+			}
+		}
+	} while (result == 0);
+
+	/* Clean up the event object we created for the socket */
+	if (sockevent != WSA_INVALID_EVENT)
+	{
+		WSAEventSelect(sock, NULL, 0);
+		WSACloseEvent(sockevent);
+	}
+
+	return result;
+}
+#endif   /* LATCH_USE_WIN32 */
+
+/*
+ * Sets a latch and wakes up anyone waiting on it.
+ *
+ * This is cheap if the latch is already set, otherwise not so much.
+ *
+ * NB: when calling this in a signal handler, be sure to save and restore
+ * errno around it.  (That's standard practice in most signal handlers, of
+ * course, but we used to omit it in handlers that only set a flag.)
+ *
+ * NB: this function is called from critical sections and signal handlers so
+ * throwing an error is not a good idea.
+ */
+void
+SetLatch(volatile Latch *latch)
+{
+#ifndef WIN32
+	pid_t		owner_pid;
+#else
+	HANDLE		handle;
+#endif
+
+	/*
+	 * The memory barrier has be to be placed here to ensure that any flag
+	 * variables possibly changed by this process have been flushed to main
+	 * memory, before we check/set is_set.
+	 */
+	pg_memory_barrier();
+
+	/* Quick exit if already set */
+	if (latch->is_set)
+		return;
+
+	latch->is_set = true;
+
+#ifndef WIN32
+
+	/*
+	 * See if anyone's waiting for the latch. It can be the current process if
+	 * we're in a signal handler. We use the self-pipe to wake up the select()
+	 * in that case. If it's another process, send a signal.
+	 *
+	 * Fetch owner_pid only once, in case the latch is concurrently getting
+	 * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
+	 * guaranteed to be true! In practice, the effective range of pid_t fits
+	 * in a 32 bit integer, and so should be atomic. In the worst case, we
+	 * might end up signaling the wrong process. Even then, you're very
+	 * unlucky if a process with that bogus pid exists and belongs to
+	 * Postgres; and PG database processes should handle excess SIGUSR1
+	 * interrupts without a problem anyhow.
+	 *
+	 * Another sort of race condition that's possible here is for a new
+	 * process to own the latch immediately after we look, so we don't signal
+	 * it. This is okay so long as all callers of ResetLatch/WaitLatch follow
+	 * the standard coding convention of waiting at the bottom of their loops,
+	 * not the top, so that they'll correctly process latch-setting events
+	 * that happen before they enter the loop.
+	 */
+	owner_pid = latch->owner_pid;
+	if (owner_pid == 0)
+		return;
+	else if (owner_pid == MyProcPid)
+	{
+		if (waiting)
+			sendSelfPipeByte();
+	}
+	else
+		kill(owner_pid, SIGUSR1);
+#else
+
+	/*
+	 * See if anyone's waiting for the latch. It can be the current process if
+	 * we're in a signal handler.
+	 *
+	 * Use a local variable here just in case somebody changes the event field
+	 * concurrently (which really should not happen).
+	 */
+	handle = latch->event;
+	if (handle)
+	{
+		SetEvent(handle);
+
+		/*
+		 * Note that we silently ignore any errors. We might be in a signal
+		 * handler or other critical path where it's not safe to call elog().
+		 */
+	}
+#endif
+
+}
+
+/*
+ * Clear the latch. Calling WaitLatch after this will sleep, unless
+ * the latch is set again before the WaitLatch call.
+ */
+void
+ResetLatch(volatile Latch *latch)
+{
+	/* Only the owner should reset the latch */
+	Assert(latch->owner_pid == MyProcPid);
+
+	latch->is_set = false;
+
+	/*
+	 * Ensure that the write to is_set gets flushed to main memory before we
+	 * examine any flag variables.  Otherwise a concurrent SetLatch might
+	 * falsely conclude that it needn't signal us, even though we have missed
+	 * seeing some flag updates that SetLatch was supposed to inform us of.
+	 */
+	pg_memory_barrier();
+}
+
+/*
+ * SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
+ *
+ * Wake up WaitLatch, if we're waiting.  (We might not be, since SIGUSR1 is
+ * overloaded for multiple purposes; or we might not have reached WaitLatch
+ * yet, in which case we don't need to fill the pipe either.)
+ *
+ * NB: when calling this in a signal handler, be sure to save and restore
+ * errno around it.
+ */
+#ifndef WIN32
+void
+latch_sigusr1_handler(void)
+{
+	if (waiting)
+		sendSelfPipeByte();
+}
+#endif   /* !WIN32 */
+
+/* Send one byte to the self-pipe, to wake up WaitLatch */
+#ifndef WIN32
+static void
+sendSelfPipeByte(void)
+{
+	int			rc;
+	char		dummy = 0;
+
+retry:
+	rc = write(selfpipe_writefd, &dummy, 1);
+	if (rc < 0)
+	{
+		/* If interrupted by signal, just retry */
+		if (errno == EINTR)
+			goto retry;
+
+		/*
+		 * If the pipe is full, we don't need to retry, the data that's there
+		 * already is enough to wake up WaitLatch.
+		 */
+		if (errno == EAGAIN || errno == EWOULDBLOCK)
+			return;
+
+		/*
+		 * Oops, the write() failed for some other reason. We might be in a
+		 * signal handler, so it's not safe to elog(). We have no choice but
+		 * silently ignore the error.
+		 */
+		return;
+	}
+}
+#endif   /* !WIN32 */
+
+/*
+ * Read all available data from the self-pipe
+ *
+ * Note: this is only called when waiting = true.  If it fails and doesn't
+ * return, it must reset that flag first (though ideally, this will never
+ * happen).
+ */
+#ifndef WIN32
+static void
+drainSelfPipe(void)
+{
+	/*
+	 * There shouldn't normally be more than one byte in the pipe, or maybe a
+	 * few bytes if multiple processes run SetLatch at the same instant.
+	 */
+	char		buf[16];
+	int			rc;
+
+	for (;;)
+	{
+		rc = read(selfpipe_readfd, buf, sizeof(buf));
+		if (rc < 0)
+		{
+			if (errno == EAGAIN || errno == EWOULDBLOCK)
+				break;			/* the pipe is empty */
+			else if (errno == EINTR)
+				continue;		/* retry */
+			else
+			{
+				waiting = false;
+				elog(ERROR, "read() on self-pipe failed: %m");
+			}
+		}
+		else if (rc == 0)
+		{
+			waiting = false;
+			elog(ERROR, "unexpected EOF on self-pipe");
+		}
+		else if (rc < sizeof(buf))
+		{
+			/* we successfully drained the pipe; no need to read() again */
+			break;
+		}
+		/* else buffer wasn't big enough, so read again */
+	}
+}
+#endif   /* !WIN32 */
author	Andres Freund <andres@anarazel.de>	2016-03-21 09:56:39 +0100
committer	Andres Freund <andres@anarazel.de>	2016-03-21 11:03:26 +0100
commit	72e2d21c1249b674496f97cd6009c0bda62f6b4d (patch)
tree	d1bd384ebc65e08446191e4396e9055698093fbf /src/backend/storage/ipc/latch.c
parent	326d73c86fda407a810675c3b5a48e0a0cc992f5 (diff)
download	postgresql-72e2d21c1249b674496f97cd6009c0bda62f6b4d.tar.gz postgresql-72e2d21c1249b674496f97cd6009c0bda62f6b4d.zip