aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config/c-compiler.m422
-rwxr-xr-xconfigure66
-rw-r--r--configure.in6
-rw-r--r--src/Makefile.global.in3
-rw-r--r--src/include/port/pg_bitutils.h176
-rw-r--r--src/port/Makefile16
-rw-r--r--src/port/pg_bitutils.c378
-rw-r--r--src/port/pg_bitutils_hwpopcnt.c36
8 files changed, 327 insertions, 376 deletions
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 05fa82518f8..7c0d52b515f 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -381,22 +381,16 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW
# PGAC_C_BUILTIN_POPCOUNT
# -------------------------
AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT],
-[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcount])])dnl
-AC_CACHE_CHECK([for __builtin_popcount], [Ac_cachevar],
-[pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS -mpopcnt"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-[static int x = __builtin_popcount(255);])],
-[Ac_cachevar=yes],
-[Ac_cachevar=no])
-CFLAGS="$pgac_save_CFLAGS"])
-if test x"$Ac_cachevar" = x"yes"; then
- CFLAGS_POPCNT="-mpopcnt"
+[AC_CACHE_CHECK([for __builtin_popcount], pgac_cv__builtin_popcount,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static int x = __builtin_popcount(255);]
+)],
+[pgac_cv__builtin_popcount=yes],
+[pgac_cv__builtin_popcount=no])])
+if test x"$pgac_cv__builtin_popcount" = x"yes"; then
AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1,
[Define to 1 if your compiler understands __builtin_popcount.])
-fi
-undefine([Ac_cachevar])dnl
-])# PGAC_C_BUILTIN_POPCOUNT
+fi])# PGAC_C_BUILTIN_POPCOUNT
diff --git a/configure b/configure
index 73e9c235b69..2e3cc372a6e 100755
--- a/configure
+++ b/configure
@@ -651,7 +651,7 @@ CFLAGS_ARMV8_CRC32C
CFLAGS_SSE42
have_win32_dbghelp
LIBOBJS
-CFLAGS_POPCNT
+have__builtin_popcount
UUID_LIBS
LDAP_LIBS_BE
LDAP_LIBS_FE
@@ -733,6 +733,7 @@ CPP
BITCODE_CXXFLAGS
BITCODE_CFLAGS
CFLAGS_VECTOR
+CFLAGS_POPCNT
PERMIT_DECLARATION_AFTER_STATEMENT
LLVM_BINPATH
LLVM_CXXFLAGS
@@ -6581,6 +6582,48 @@ fi
fi
+# Optimization flags and options for bit-twiddling
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -mpopcnt, for CFLAGS_POPCNT" >&5
+$as_echo_n "checking whether ${CC} supports -mpopcnt, for CFLAGS_POPCNT... " >&6; }
+if ${pgac_cv_prog_CC_cflags__mpopcnt+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ pgac_save_CFLAGS=$CFLAGS
+pgac_save_CC=$CC
+CC=${CC}
+CFLAGS="${CFLAGS_POPCNT} -mpopcnt"
+ac_save_c_werror_flag=$ac_c_werror_flag
+ac_c_werror_flag=yes
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv_prog_CC_cflags__mpopcnt=yes
+else
+ pgac_cv_prog_CC_cflags__mpopcnt=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_c_werror_flag=$ac_save_c_werror_flag
+CFLAGS="$pgac_save_CFLAGS"
+CC="$pgac_save_CC"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__mpopcnt" >&5
+$as_echo "$pgac_cv_prog_CC_cflags__mpopcnt" >&6; }
+if test x"$pgac_cv_prog_CC_cflags__mpopcnt" = x"yes"; then
+ CFLAGS_POPCNT="${CFLAGS_POPCNT} -mpopcnt"
+fi
+
+
+
+
CFLAGS_VECTOR=$CFLAGS_VECTOR
@@ -14111,32 +14154,28 @@ $as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
$as_echo_n "checking for __builtin_popcount... " >&6; }
-if ${pgac_cv_popcount+:} false; then :
+if ${pgac_cv__builtin_popcount+:} false; then :
$as_echo_n "(cached) " >&6
else
- pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS -mpopcnt"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
static int x = __builtin_popcount(255);
+
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- pgac_cv_popcount=yes
+ pgac_cv__builtin_popcount=yes
else
- pgac_cv_popcount=no
+ pgac_cv__builtin_popcount=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-CFLAGS="$pgac_save_CFLAGS"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcount" >&5
-$as_echo "$pgac_cv_popcount" >&6; }
-if test x"$pgac_cv_popcount" = x"yes"; then
- CFLAGS_POPCNT="-mpopcnt"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5
+$as_echo "$pgac_cv__builtin_popcount" >&6; }
+if test x"$pgac_cv__builtin_popcount" = x"yes"; then
$as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h
fi
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5
$as_echo_n "checking for __builtin_unreachable... " >&6; }
if ${pgac_cv__builtin_unreachable+:} false; then :
@@ -14654,6 +14693,7 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h
fi
+have__builtin_popcount=$pgac_cv__builtin_popcount
# MSVC doesn't cope well with defining restrict to __restrict, the
diff --git a/configure.in b/configure.in
index 9c4d5f0691e..e12d5b14f56 100644
--- a/configure.in
+++ b/configure.in
@@ -547,6 +547,10 @@ elif test "$PORTNAME" = "hpux"; then
PGAC_PROG_CXX_CFLAGS_OPT([+Olibmerrno])
fi
+# Optimization flags and options for bit-twiddling
+PGAC_PROG_CC_VAR_OPT(CFLAGS_POPCNT, [-mpopcnt])
+AC_SUBST(CFLAGS_POPCNT)
+
AC_SUBST(CFLAGS_VECTOR, $CFLAGS_VECTOR)
# Determine flags used to emit bitcode for JIT inlining. Need to test
@@ -1506,7 +1510,7 @@ AC_TYPE_LONG_LONG_INT
PGAC_TYPE_LOCALE_T
-AC_SUBST(CFLAGS_POPCNT)
+AC_SUBST(have__builtin_popcount, $pgac_cv__builtin_popcount)
# MSVC doesn't cope well with defining restrict to __restrict, the
# spelling it understands, because it conflicts with
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index aa16da3e0f2..0f4dd195845 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -517,6 +517,9 @@ WIN32_STACK_RLIMIT=4194304
# Set if we have a working win32 crashdump header
have_win32_dbghelp = @have_win32_dbghelp@
+# Set if __builtin_popcount() is supported by $(CC)
+have__builtin_popcount = @have__builtin_popcount@
+
# Pull in platform-specific magic
include $(top_builddir)/src/Makefile.port
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 148c5550573..70aae5128fa 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -10,17 +10,177 @@
*
*------------------------------------------------------------------------ -
*/
-
#ifndef PG_BITUTILS_H
#define PG_BITUTILS_H
-extern int (*pg_popcount32) (uint32 word);
-extern int (*pg_popcount64) (uint64 word);
-extern int (*pg_rightmost_one32) (uint32 word);
-extern int (*pg_rightmost_one64) (uint64 word);
-extern int (*pg_leftmost_one32) (uint32 word);
-extern int (*pg_leftmost_one64) (uint64 word);
-
+extern int (*pg_popcount32) (uint32 word);
+extern int (*pg_popcount64) (uint64 word);
extern uint64 pg_popcount(const char *buf, int bytes);
+/* in pg_bitutils_hwpopcnt.c */
+extern int pg_popcount32_hw(uint32 word);
+extern int pg_popcount64_hw(uint64 word);
+
+
+#ifndef HAVE__BUILTIN_CTZ
+/*
+ * Array marking the position of the right-most set bit for each value of
+ * 1-255. We count the right-most position as the 0th bit, and the
+ * left-most the 7th bit. The 0th index of the array must not be used.
+ */
+static const uint8 rightmost_one_pos[256] = {
+ 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+#endif /* !HAVE__BUILTIN_CTZ */
+
+/*
+ * pg_rightmost_one32
+ * Returns the number of trailing 0-bits in word, starting at the least
+ * significant bit position. word must not be 0.
+ */
+static inline int
+pg_rightmost_one32(uint32 word)
+{
+ int result = 0;
+
+ Assert(word != 0);
+
+#ifdef HAVE__BUILTIN_CTZ
+ result = __builtin_ctz(word);
+#else
+ while ((word & 255) == 0)
+ {
+ word >>= 8;
+ result += 8;
+ }
+ result += rightmost_one_pos[word & 255];
+#endif /* HAVE__BUILTIN_CTZ */
+
+ return result;
+}
+
+/*
+ * pg_rightmost_one64
+ * Returns the number of trailing 0-bits in word, starting at the least
+ * significant bit position. word must not be 0.
+ */
+static inline int
+pg_rightmost_one64(uint64 word)
+{
+ int result = 0;
+
+ Assert(word != 0);
+#ifdef HAVE__BUILTIN_CTZ
+#if defined(HAVE_LONG_INT_64)
+ return __builtin_ctzl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+ return __builtin_ctzll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif
+#else /* HAVE__BUILTIN_CTZ */
+ while ((word & 255) == 0)
+ {
+ word >>= 8;
+ result += 8;
+ }
+ result += rightmost_one_pos[word & 255];
+#endif
+
+ return result;
+}
+
+#ifndef HAVE__BUILTIN_CLZ
+/*
+ * Array marking the position of the left-most set bit for each value of
+ * 1-255. We count the right-most position as the 0th bit, and the
+ * left-most the 7th bit. The 0th index of the array must not be used.
+ */
+static const uint8 leftmost_one_pos[256] = {
+ 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+};
+#endif /* !HAVE_BUILTIN_CLZ */
+
+/*
+ * pg_leftmost_one32
+ * Returns the 0-based position of the most significant set bit in word
+ * measured from the least significant bit. word must not be 0.
+ */
+static inline int
+pg_leftmost_one32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_CLZ
+ Assert(word != 0);
+
+ return 31 - __builtin_clz(word);
+#else
+ int shift = 32 - 8;
+
+ Assert(word != 0);
+
+ while ((word >> shift) == 0)
+ shift -= 8;
+
+ return shift + leftmost_one_pos[(word >> shift) & 255];
+#endif /* HAVE__BUILTIN_CLZ */
+}
+
+/*
+ * pg_leftmost_one64
+ * Returns the 0-based position of the most significant set bit in word
+ * measured from the least significant bit. word must not be 0.
+ */
+static inline int
+pg_leftmost_one64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_CLZ
+ Assert(word != 0);
+#if defined(HAVE_LONG_INT_64)
+ return 63 - __builtin_clzl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+ return 63 - __builtin_clzll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif
+#else /* HAVE__BUILTIN_CLZ */
+ int shift = 64 - 8;
+
+ Assert(word != 0);
+ while ((word >> shift) == 0)
+ shift -= 8;
+
+ return shift + leftmost_one_pos[(word >> shift) & 255];
+#endif /* !HAVE__BUIILTIN_CLZ */
+}
+
#endif /* PG_BITUTILS_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index 2da73260a13..a7f8fd2e668 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -41,6 +41,14 @@ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \
tar.o thread.o
+# If the compiler supports a special flag for the POPCOUNT instruction and it
+# has __builtin_popcount, add pg_bitutils_hwpopcnt.o.
+ifneq ($(CFLAGS_POPCNT),)
+ifeq ($(have__builtin_popcount),yes)
+OBJS += pg_bitutils_hwpopcnt.o
+endif
+endif
+
# libpgport.a, libpgport_shlib.a, and libpgport_srv.a contain the same files
# foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c
OBJS_SHLIB = $(OBJS:%.o=%_shlib.o)
@@ -78,10 +86,10 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
-# pg_bitutils.c needs CFLAGS_POPCNT
-pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT)
-pg_bitutils_shlib.o: CFLAGS+=$(CFLAGS_POPCNT)
-pg_bitutils_srv.o: CFLAGS+=$(CFLAGS_POPCNT)
+# all versions of pg_bitutils_hwpopcnt.c need CFLAGS_POPCNT
+pg_bitutils_hwpopcnt.o: CFLAGS+=$(CFLAGS_POPCNT)
+pg_bitutils_hwpopcnt_shlib.o: CFLAGS+=$(CFLAGS_POPCNT)
+pg_bitutils_hwpopcnt_srv.o: CFLAGS+=$(CFLAGS_POPCNT)
#
# Shared library versions of object files
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index aac394fe927..97bfcebe4e1 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -10,7 +10,6 @@
*
*-------------------------------------------------------------------------
*/
-
#include "postgres.h"
#ifdef HAVE__GET_CPUID
@@ -23,61 +22,21 @@
#include "port/pg_bitutils.h"
-#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID)
+#ifdef HAVE__BUILTIN_POPCOUNT
static bool pg_popcount_available(void);
-static int pg_popcount32_choose(uint32 word);
-static int pg_popcount32_sse42(uint32 word);
-static int pg_popcount64_choose(uint64 word);
-static int pg_popcount64_sse42(uint64 word);
-#endif
-static int pg_popcount32_slow(uint32 word);
-static int pg_popcount64_slow(uint64 word);
-
-#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ))
-static bool pg_lzcnt_available(void);
-#endif
-
-#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID)
-static int pg_rightmost_one32_choose(uint32 word);
-static int pg_rightmost_one32_abm(uint32 word);
-static int pg_rightmost_one64_choose(uint64 word);
-static int pg_rightmost_one64_abm(uint64 word);
-#endif
-static int pg_rightmost_one32_slow(uint32 word);
-static int pg_rightmost_one64_slow(uint64 word);
-
-#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID)
-static int pg_leftmost_one32_choose(uint32 word);
-static int pg_leftmost_one32_abm(uint32 word);
-static int pg_leftmost_one64_choose(uint64 word);
-static int pg_leftmost_one64_abm(uint64 word);
-#endif
-static int pg_leftmost_one32_slow(uint32 word);
-static int pg_leftmost_one64_slow(uint64 word);
-
-#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID)
-int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
-int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
-#else
-int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
-int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
-#endif
-
-#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID)
-int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose;
-int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose;
+static int pg_popcount32_choose(uint32 word);
+static int pg_popcount32_builtin(uint32 word);
+static int pg_popcount64_choose(uint64 word);
+static int pg_popcount64_builtin(uint64 word);
+int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
+int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
#else
-int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow;
-int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow;
-#endif
+static int pg_popcount32_slow(uint32 word);
+static int pg_popcount64_slow(uint64 word);
+int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
+int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
+#endif /* !HAVE_BUILTIN_POPCOUNT */
-#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID)
-int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose;
-int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose;
-#else
-int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow;
-int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow;
-#endif
/* Array marking the number of 1-bits for each value of 0-255. */
static const uint8 number_of_ones[256] = {
@@ -100,96 +59,51 @@ static const uint8 number_of_ones[256] = {
};
/*
- * Array marking the position of the right-most set bit for each value of
- * 1-255. We count the right-most position as the 0th bit, and the
- * left-most the 7th bit. The 0th index of the array must not be used.
+ * Return true iff we have CPUID support and it indicates that the POPCNT
+ * instruction is available.
*/
-static const uint8 rightmost_one_pos[256] = {
- 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-
-/*
- * Array marking the position of the left-most set bit for each value of
- * 1-255. We count the right-most position as the 0th bit, and the
- * left-most the 7th bit. The 0th index of the array must not be used.
- */
-static const uint8 leftmost_one_pos[256] = {
- 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-};
-
-#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT)
-
static bool
pg_popcount_available(void)
{
- unsigned int exx[4] = { 0, 0, 0, 0 };
+#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
+ unsigned int exx[4] = {0, 0, 0, 0};
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
#endif
return (exx[2] & (1 << 23)) != 0; /* POPCNT */
-}
-#endif
+#else /* HAVE__GET_CPUID || HAVE__CPUID */
-#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT)
+ return false;
+#endif
+}
+#ifdef HAVE__BUILTIN_POPCOUNT
/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
+ * This gets called on the first call to pg_popcount32. It replaces the
+ * function pointer so that subsequent calls are routed directly to the chosen
+ * implementation.
*/
static int
pg_popcount32_choose(uint32 word)
{
if (pg_popcount_available())
- pg_popcount32 = pg_popcount32_sse42;
+ pg_popcount32 = pg_popcount32_hw;
else
- pg_popcount32 = pg_popcount32_slow;
+ pg_popcount32 = pg_popcount32_builtin;
return pg_popcount32(word);
}
static int
-pg_popcount32_sse42(uint32 word)
+pg_popcount32_builtin(uint32 word)
{
return __builtin_popcount(word);
}
-#endif
-
+#else /* HAVE__BUILTIN_POPCOUNT */
/*
* pg_popcount32_slow
* Return the number of 1 bits set in word
@@ -197,7 +111,7 @@ pg_popcount32_sse42(uint32 word)
static int
pg_popcount32_slow(uint32 word)
{
- int result = 0;
+ int result = 0;
while (word != 0)
{
@@ -207,6 +121,7 @@ pg_popcount32_slow(uint32 word)
return result;
}
+#endif
/*
* pg_popcount
@@ -215,13 +130,13 @@ pg_popcount32_slow(uint32 word)
uint64
pg_popcount(const char *buf, int bytes)
{
- uint64 popcnt = 0;
+ uint64 popcnt = 0;
#if SIZEOF_VOID_P >= 8
/* Process in 64-bit chunks if the buffer is aligned. */
if (buf == (char *) TYPEALIGN(8, buf))
{
- uint64 *words = (uint64 *) buf;
+ uint64 *words = (uint64 *) buf;
while (bytes >= 8)
{
@@ -235,7 +150,7 @@ pg_popcount(const char *buf, int bytes)
/* Process in 32-bit chunks if the buffer is aligned. */
if (buf == (char *) TYPEALIGN(4, buf))
{
- uint32 *words = (uint32 *) buf;
+ uint32 *words = (uint32 *) buf;
while (bytes >= 4)
{
@@ -254,38 +169,36 @@ pg_popcount(const char *buf, int bytes)
return popcnt;
}
-#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT)
-
+#ifdef HAVE__BUILTIN_POPCOUNT
/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
+ * This gets called on the first call to pg_popcount64. It replaces the
+ * function pointer so that subsequent calls are routed directly to the chosen
+ * implementation.
*/
static int
pg_popcount64_choose(uint64 word)
{
if (pg_popcount_available())
- pg_popcount64 = pg_popcount64_sse42;
+ pg_popcount64 = pg_popcount64_hw;
else
- pg_popcount64 = pg_popcount64_slow;
+ pg_popcount64 = pg_popcount64_builtin;
return pg_popcount64(word);
}
static int
-pg_popcount64_sse42(uint64 word)
+pg_popcount64_builtin(uint64 word)
{
#if defined(HAVE_LONG_INT_64)
return __builtin_popcountl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return __builtin_popcountll(word);
#else
- /* shouldn't happen */
#error must have a working 64-bit integer datatype
#endif
}
-#endif
-
+#else /* HAVE__BUILTIN_POPCOUNT */
/*
* pg_popcount64_slow
* Return the number of 1 bits set in word
@@ -293,7 +206,7 @@ pg_popcount64_sse42(uint64 word)
static int
pg_popcount64_slow(uint64 word)
{
- int result = 0;
+ int result = 0;
while (word != 0)
{
@@ -303,211 +216,4 @@ pg_popcount64_slow(uint64 word)
return result;
}
-
-#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ))
-
-static bool
-pg_lzcnt_available(void)
-{
-
- unsigned int exx[4] = { 0, 0, 0, 0 };
-
-#if defined(HAVE__GET_CPUID)
- __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
- __cpuid(exx, 0x80000001);
-#else
-#error cpuid instruction not available
-#endif
-
- return (exx[2] & (1 << 5)) != 0; /* LZCNT */
-}
-#endif
-
-#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ)
-/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
- */
-static int
-pg_rightmost_one32_choose(uint32 word)
-{
- if (pg_lzcnt_available())
- pg_rightmost_one32 = pg_rightmost_one32_abm;
- else
- pg_rightmost_one32 = pg_rightmost_one32_slow;
-
- return pg_rightmost_one32(word);
-}
-
-static int
-pg_rightmost_one32_abm(uint32 word)
-{
- return __builtin_ctz(word);
-}
-
#endif
-
-/*
- * pg_rightmost_one32_slow
- * Returns the number of trailing 0-bits in word, starting at the least
- * significant bit position. word must not be 0.
- */
-static int
-pg_rightmost_one32_slow(uint32 word)
-{
- int result = 0;
-
- Assert(word != 0);
-
- while ((word & 255) == 0)
- {
- word >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[word & 255];
-
- return result;
-}
-
-#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ)
-/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
- */
-static int
-pg_rightmost_one64_choose(uint64 word)
-{
- if (pg_lzcnt_available())
- pg_rightmost_one64 = pg_rightmost_one64_abm;
- else
- pg_rightmost_one64 = pg_rightmost_one64_slow;
-
- return pg_rightmost_one64(word);
-}
-
-static int
-pg_rightmost_one64_abm(uint64 word)
-{
-#if defined(HAVE_LONG_INT_64)
- return __builtin_ctzl(word);
-#elif defined(HAVE_LONG_LONG_INT_64)
- return __builtin_ctzll(word);
-#else
- /* shouldn't happen */
-#error must have a working 64-bit integer datatype
-#endif
-}
-#endif
-
-/*
- * pg_rightmost_one64_slow
- * Returns the number of trailing 0-bits in word, starting at the least
- * significant bit position. word must not be 0.
- */
-static int
-pg_rightmost_one64_slow(uint64 word)
-{
- int result = 0;
-
- Assert(word != 0);
-
- while ((word & 255) == 0)
- {
- word >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[word & 255];
-
- return result;
-}
-
-#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ)
-/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
- */
-static int
-pg_leftmost_one32_choose(uint32 word)
-{
- if (pg_lzcnt_available())
- pg_leftmost_one32 = pg_leftmost_one32_abm;
- else
- pg_leftmost_one32 = pg_leftmost_one32_slow;
-
- return pg_leftmost_one32(word);
-}
-
-static int
-pg_leftmost_one32_abm(uint32 word)
-{
- return 31 - __builtin_clz(word);
-}
-#endif
-
-/*
- * pg_leftmost_one32_slow
- * Returns the 0-based position of the most significant set bit in word
- * measured from the least significant bit. word must not be 0.
- */
-static int
-pg_leftmost_one32_slow(uint32 word)
-{
- int shift = 32 - 8;
-
- Assert(word != 0);
-
- while ((word >> shift) == 0)
- shift -= 8;
-
- return shift + leftmost_one_pos[(word >> shift) & 255];
-}
-
-#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ)
-/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
- */
-static int
-pg_leftmost_one64_choose(uint64 word)
-{
- if (pg_lzcnt_available())
- pg_leftmost_one64 = pg_leftmost_one64_abm;
- else
- pg_leftmost_one64 = pg_leftmost_one64_slow;
-
- return pg_leftmost_one64(word);
-}
-
-static int
-pg_leftmost_one64_abm(uint64 word)
-{
-#if defined(HAVE_LONG_INT_64)
- return 63 - __builtin_clzl(word);
-#elif defined(HAVE_LONG_LONG_INT_64)
- return 63 - __builtin_clzll(word);
-#else
- /* shouldn't happen */
-#error must have a working 64-bit integer datatype
-#endif
-
-}
-#endif
-
-/*
- * pg_leftmost_one64_slow
- * Returns the 0-based position of the most significant set bit in word
- * measured from the least significant bit. word must not be 0.
- */
-static int
-pg_leftmost_one64_slow(uint64 word)
-{
- int shift = 64 - 8;
-
- Assert(word != 0);
-
- while ((word >> shift) == 0)
- shift -= 8;
-
- return shift + leftmost_one_pos[(word >> shift) & 255];
-}
diff --git a/src/port/pg_bitutils_hwpopcnt.c b/src/port/pg_bitutils_hwpopcnt.c
new file mode 100644
index 00000000000..516efd586dd
--- /dev/null
+++ b/src/port/pg_bitutils_hwpopcnt.c
@@ -0,0 +1,36 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_bitutils_hwpopcnt.c
+ * CPU-optimized implementation of pg_popcount variants
+ *
+ * This file must be compiled with a compiler-specific flag to enable the
+ * POPCNT instruction.
+ *
+ * Portions Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/port/pg_bitutils_hwpopcnt.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "port/pg_bitutils.h"
+
+int
+pg_popcount32_hw(uint32 word)
+{
+ return __builtin_popcount(word);
+}
+
+int
+pg_popcount64_hw(uint64 word)
+{
+#if defined(HAVE_LONG_INT_64)
+ return __builtin_popcountl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+ return __builtin_popcountll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif
+}