diff options
-rw-r--r-- | config/c-compiler.m4 | 22 | ||||
-rwxr-xr-x | configure | 66 | ||||
-rw-r--r-- | configure.in | 6 | ||||
-rw-r--r-- | src/Makefile.global.in | 3 | ||||
-rw-r--r-- | src/include/port/pg_bitutils.h | 176 | ||||
-rw-r--r-- | src/port/Makefile | 16 | ||||
-rw-r--r-- | src/port/pg_bitutils.c | 378 | ||||
-rw-r--r-- | src/port/pg_bitutils_hwpopcnt.c | 36 |
8 files changed, 327 insertions, 376 deletions
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 05fa82518f8..7c0d52b515f 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -381,22 +381,16 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW # PGAC_C_BUILTIN_POPCOUNT # ------------------------- AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT], -[define([Ac_cachevar], [AS_TR_SH([pgac_cv_popcount])])dnl -AC_CACHE_CHECK([for __builtin_popcount], [Ac_cachevar], -[pgac_save_CFLAGS=$CFLAGS -CFLAGS="$pgac_save_CFLAGS -mpopcnt" -AC_COMPILE_IFELSE([AC_LANG_SOURCE( -[static int x = __builtin_popcount(255);])], -[Ac_cachevar=yes], -[Ac_cachevar=no]) -CFLAGS="$pgac_save_CFLAGS"]) -if test x"$Ac_cachevar" = x"yes"; then - CFLAGS_POPCNT="-mpopcnt" +[AC_CACHE_CHECK([for __builtin_popcount], pgac_cv__builtin_popcount, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_popcount(255);] +)], +[pgac_cv__builtin_popcount=yes], +[pgac_cv__builtin_popcount=no])]) +if test x"$pgac_cv__builtin_popcount" = x"yes"; then AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1, [Define to 1 if your compiler understands __builtin_popcount.]) -fi -undefine([Ac_cachevar])dnl -])# PGAC_C_BUILTIN_POPCOUNT +fi])# PGAC_C_BUILTIN_POPCOUNT diff --git a/configure b/configure index 73e9c235b69..2e3cc372a6e 100755 --- a/configure +++ b/configure @@ -651,7 +651,7 @@ CFLAGS_ARMV8_CRC32C CFLAGS_SSE42 have_win32_dbghelp LIBOBJS -CFLAGS_POPCNT +have__builtin_popcount UUID_LIBS LDAP_LIBS_BE LDAP_LIBS_FE @@ -733,6 +733,7 @@ CPP BITCODE_CXXFLAGS BITCODE_CFLAGS CFLAGS_VECTOR +CFLAGS_POPCNT PERMIT_DECLARATION_AFTER_STATEMENT LLVM_BINPATH LLVM_CXXFLAGS @@ -6581,6 +6582,48 @@ fi fi +# Optimization flags and options for bit-twiddling +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -mpopcnt, for CFLAGS_POPCNT" >&5 +$as_echo_n "checking whether ${CC} supports -mpopcnt, for CFLAGS_POPCNT... " >&6; } +if ${pgac_cv_prog_CC_cflags__mpopcnt+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +pgac_save_CC=$CC +CC=${CC} +CFLAGS="${CFLAGS_POPCNT} -mpopcnt" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_prog_CC_cflags__mpopcnt=yes +else + pgac_cv_prog_CC_cflags__mpopcnt=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +CC="$pgac_save_CC" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__mpopcnt" >&5 +$as_echo "$pgac_cv_prog_CC_cflags__mpopcnt" >&6; } +if test x"$pgac_cv_prog_CC_cflags__mpopcnt" = x"yes"; then + CFLAGS_POPCNT="${CFLAGS_POPCNT} -mpopcnt" +fi + + + + CFLAGS_VECTOR=$CFLAGS_VECTOR @@ -14111,32 +14154,28 @@ $as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5 $as_echo_n "checking for __builtin_popcount... " >&6; } -if ${pgac_cv_popcount+:} false; then : +if ${pgac_cv__builtin_popcount+:} false; then : $as_echo_n "(cached) " >&6 else - pgac_save_CFLAGS=$CFLAGS -CFLAGS="$pgac_save_CFLAGS -mpopcnt" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ static int x = __builtin_popcount(255); + _ACEOF if ac_fn_c_try_compile "$LINENO"; then : - pgac_cv_popcount=yes + pgac_cv__builtin_popcount=yes else - pgac_cv_popcount=no + pgac_cv__builtin_popcount=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -CFLAGS="$pgac_save_CFLAGS" fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_popcount" >&5 -$as_echo "$pgac_cv_popcount" >&6; } -if test x"$pgac_cv_popcount" = x"yes"; then - CFLAGS_POPCNT="-mpopcnt" +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5 +$as_echo "$pgac_cv__builtin_popcount" >&6; } +if test x"$pgac_cv__builtin_popcount" = x"yes"; then $as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5 $as_echo_n "checking for __builtin_unreachable... " >&6; } if ${pgac_cv__builtin_unreachable+:} false; then : @@ -14654,6 +14693,7 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h fi +have__builtin_popcount=$pgac_cv__builtin_popcount # MSVC doesn't cope well with defining restrict to __restrict, the diff --git a/configure.in b/configure.in index 9c4d5f0691e..e12d5b14f56 100644 --- a/configure.in +++ b/configure.in @@ -547,6 +547,10 @@ elif test "$PORTNAME" = "hpux"; then PGAC_PROG_CXX_CFLAGS_OPT([+Olibmerrno]) fi +# Optimization flags and options for bit-twiddling +PGAC_PROG_CC_VAR_OPT(CFLAGS_POPCNT, [-mpopcnt]) +AC_SUBST(CFLAGS_POPCNT) + AC_SUBST(CFLAGS_VECTOR, $CFLAGS_VECTOR) # Determine flags used to emit bitcode for JIT inlining. Need to test @@ -1506,7 +1510,7 @@ AC_TYPE_LONG_LONG_INT PGAC_TYPE_LOCALE_T -AC_SUBST(CFLAGS_POPCNT) +AC_SUBST(have__builtin_popcount, $pgac_cv__builtin_popcount) # MSVC doesn't cope well with defining restrict to __restrict, the # spelling it understands, because it conflicts with diff --git a/src/Makefile.global.in b/src/Makefile.global.in index aa16da3e0f2..0f4dd195845 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -517,6 +517,9 @@ WIN32_STACK_RLIMIT=4194304 # Set if we have a working win32 crashdump header have_win32_dbghelp = @have_win32_dbghelp@ +# Set if __builtin_popcount() is supported by $(CC) +have__builtin_popcount = @have__builtin_popcount@ + # Pull in platform-specific magic include $(top_builddir)/src/Makefile.port diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 148c5550573..70aae5128fa 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -10,17 +10,177 @@ * *------------------------------------------------------------------------ - */ - #ifndef PG_BITUTILS_H #define PG_BITUTILS_H -extern int (*pg_popcount32) (uint32 word); -extern int (*pg_popcount64) (uint64 word); -extern int (*pg_rightmost_one32) (uint32 word); -extern int (*pg_rightmost_one64) (uint64 word); -extern int (*pg_leftmost_one32) (uint32 word); -extern int (*pg_leftmost_one64) (uint64 word); - +extern int (*pg_popcount32) (uint32 word); +extern int (*pg_popcount64) (uint64 word); extern uint64 pg_popcount(const char *buf, int bytes); +/* in pg_bitutils_hwpopcnt.c */ +extern int pg_popcount32_hw(uint32 word); +extern int pg_popcount64_hw(uint64 word); + + +#ifndef HAVE__BUILTIN_CTZ +/* + * Array marking the position of the right-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 rightmost_one_pos[256] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; +#endif /* !HAVE__BUILTIN_CTZ */ + +/* + * pg_rightmost_one32 + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static inline int +pg_rightmost_one32(uint32 word) +{ + int result = 0; + + Assert(word != 0); + +#ifdef HAVE__BUILTIN_CTZ + result = __builtin_ctz(word); +#else + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; +#endif /* HAVE__BUILTIN_CTZ */ + + return result; +} + +/* + * pg_rightmost_one64 + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static inline int +pg_rightmost_one64(uint64 word) +{ + int result = 0; + + Assert(word != 0); +#ifdef HAVE__BUILTIN_CTZ +#if defined(HAVE_LONG_INT_64) + return __builtin_ctzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_ctzll(word); +#else +#error must have a working 64-bit integer datatype +#endif +#else /* HAVE__BUILTIN_CTZ */ + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; +#endif + + return result; +} + +#ifndef HAVE__BUILTIN_CLZ +/* + * Array marking the position of the left-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 leftmost_one_pos[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; +#endif /* !HAVE_BUILTIN_CLZ */ + +/* + * pg_leftmost_one32 + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_leftmost_one32(uint32 word) +{ +#ifdef HAVE__BUILTIN_CLZ + Assert(word != 0); + + return 31 - __builtin_clz(word); +#else + int shift = 32 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +#endif /* HAVE__BUILTIN_CLZ */ +} + +/* + * pg_leftmost_one64 + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_leftmost_one64(uint64 word) +{ +#ifdef HAVE__BUILTIN_CLZ + Assert(word != 0); +#if defined(HAVE_LONG_INT_64) + return 63 - __builtin_clzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return 63 - __builtin_clzll(word); +#else +#error must have a working 64-bit integer datatype +#endif +#else /* HAVE__BUILTIN_CLZ */ + int shift = 64 - 8; + + Assert(word != 0); + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +#endif /* !HAVE__BUIILTIN_CLZ */ +} + #endif /* PG_BITUTILS_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 2da73260a13..a7f8fd2e668 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -41,6 +41,14 @@ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \ tar.o thread.o +# If the compiler supports a special flag for the POPCOUNT instruction and it +# has __builtin_popcount, add pg_bitutils_hwpopcnt.o. +ifneq ($(CFLAGS_POPCNT),) +ifeq ($(have__builtin_popcount),yes) +OBJS += pg_bitutils_hwpopcnt.o +endif +endif + # libpgport.a, libpgport_shlib.a, and libpgport_srv.a contain the same files # foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c OBJS_SHLIB = $(OBJS:%.o=%_shlib.o) @@ -78,10 +86,10 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) -# pg_bitutils.c needs CFLAGS_POPCNT -pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_srv.o: CFLAGS+=$(CFLAGS_POPCNT) +# all versions of pg_bitutils_hwpopcnt.c need CFLAGS_POPCNT +pg_bitutils_hwpopcnt.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_bitutils_hwpopcnt_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_bitutils_hwpopcnt_srv.o: CFLAGS+=$(CFLAGS_POPCNT) # # Shared library versions of object files diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index aac394fe927..97bfcebe4e1 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -10,7 +10,6 @@ * *------------------------------------------------------------------------- */ - #include "postgres.h" #ifdef HAVE__GET_CPUID @@ -23,61 +22,21 @@ #include "port/pg_bitutils.h" -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) +#ifdef HAVE__BUILTIN_POPCOUNT static bool pg_popcount_available(void); -static int pg_popcount32_choose(uint32 word); -static int pg_popcount32_sse42(uint32 word); -static int pg_popcount64_choose(uint64 word); -static int pg_popcount64_sse42(uint64 word); -#endif -static int pg_popcount32_slow(uint32 word); -static int pg_popcount64_slow(uint64 word); - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) -static bool pg_lzcnt_available(void); -#endif - -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) -static int pg_rightmost_one32_choose(uint32 word); -static int pg_rightmost_one32_abm(uint32 word); -static int pg_rightmost_one64_choose(uint64 word); -static int pg_rightmost_one64_abm(uint64 word); -#endif -static int pg_rightmost_one32_slow(uint32 word); -static int pg_rightmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) -static int pg_leftmost_one32_choose(uint32 word); -static int pg_leftmost_one32_abm(uint32 word); -static int pg_leftmost_one64_choose(uint64 word); -static int pg_leftmost_one64_abm(uint64 word); -#endif -static int pg_leftmost_one32_slow(uint32 word); -static int pg_leftmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) -int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; -int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; -#else -int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; -int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; -#endif - -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) -int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose; -int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose; +static int pg_popcount32_choose(uint32 word); +static int pg_popcount32_builtin(uint32 word); +static int pg_popcount64_choose(uint64 word); +static int pg_popcount64_builtin(uint64 word); +int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; +int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; #else -int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow; -int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow; -#endif +static int pg_popcount32_slow(uint32 word); +static int pg_popcount64_slow(uint64 word); +int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; +int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; +#endif /* !HAVE_BUILTIN_POPCOUNT */ -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) -int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose; -int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose; -#else -int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow; -int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow; -#endif /* Array marking the number of 1-bits for each value of 0-255. */ static const uint8 number_of_ones[256] = { @@ -100,96 +59,51 @@ static const uint8 number_of_ones[256] = { }; /* - * Array marking the position of the right-most set bit for each value of - * 1-255. We count the right-most position as the 0th bit, and the - * left-most the 7th bit. The 0th index of the array must not be used. + * Return true iff we have CPUID support and it indicates that the POPCNT + * instruction is available. */ -static const uint8 rightmost_one_pos[256] = { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; - -/* - * Array marking the position of the left-most set bit for each value of - * 1-255. We count the right-most position as the 0th bit, and the - * left-most the 7th bit. The 0th index of the array must not be used. - */ -static const uint8 leftmost_one_pos[256] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - static bool pg_popcount_available(void) { - unsigned int exx[4] = { 0, 0, 0, 0 }; +#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) + unsigned int exx[4] = {0, 0, 0, 0}; #if defined(HAVE__GET_CPUID) __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUID) __cpuid(exx, 1); -#else -#error cpuid instruction not available #endif return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} -#endif +#else /* HAVE__GET_CPUID || HAVE__CPUID */ -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) + return false; +#endif +} +#ifdef HAVE__BUILTIN_POPCOUNT /* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. + * This gets called on the first call to pg_popcount32. It replaces the + * function pointer so that subsequent calls are routed directly to the chosen + * implementation. */ static int pg_popcount32_choose(uint32 word) { if (pg_popcount_available()) - pg_popcount32 = pg_popcount32_sse42; + pg_popcount32 = pg_popcount32_hw; else - pg_popcount32 = pg_popcount32_slow; + pg_popcount32 = pg_popcount32_builtin; return pg_popcount32(word); } static int -pg_popcount32_sse42(uint32 word) +pg_popcount32_builtin(uint32 word) { return __builtin_popcount(word); } -#endif - +#else /* HAVE__BUILTIN_POPCOUNT */ /* * pg_popcount32_slow * Return the number of 1 bits set in word @@ -197,7 +111,7 @@ pg_popcount32_sse42(uint32 word) static int pg_popcount32_slow(uint32 word) { - int result = 0; + int result = 0; while (word != 0) { @@ -207,6 +121,7 @@ pg_popcount32_slow(uint32 word) return result; } +#endif /* * pg_popcount @@ -215,13 +130,13 @@ pg_popcount32_slow(uint32 word) uint64 pg_popcount(const char *buf, int bytes) { - uint64 popcnt = 0; + uint64 popcnt = 0; #if SIZEOF_VOID_P >= 8 /* Process in 64-bit chunks if the buffer is aligned. */ if (buf == (char *) TYPEALIGN(8, buf)) { - uint64 *words = (uint64 *) buf; + uint64 *words = (uint64 *) buf; while (bytes >= 8) { @@ -235,7 +150,7 @@ pg_popcount(const char *buf, int bytes) /* Process in 32-bit chunks if the buffer is aligned. */ if (buf == (char *) TYPEALIGN(4, buf)) { - uint32 *words = (uint32 *) buf; + uint32 *words = (uint32 *) buf; while (bytes >= 4) { @@ -254,38 +169,36 @@ pg_popcount(const char *buf, int bytes) return popcnt; } -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - +#ifdef HAVE__BUILTIN_POPCOUNT /* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. + * This gets called on the first call to pg_popcount64. It replaces the + * function pointer so that subsequent calls are routed directly to the chosen + * implementation. */ static int pg_popcount64_choose(uint64 word) { if (pg_popcount_available()) - pg_popcount64 = pg_popcount64_sse42; + pg_popcount64 = pg_popcount64_hw; else - pg_popcount64 = pg_popcount64_slow; + pg_popcount64 = pg_popcount64_builtin; return pg_popcount64(word); } static int -pg_popcount64_sse42(uint64 word) +pg_popcount64_builtin(uint64 word) { #if defined(HAVE_LONG_INT_64) return __builtin_popcountl(word); #elif defined(HAVE_LONG_LONG_INT_64) return __builtin_popcountll(word); #else - /* shouldn't happen */ #error must have a working 64-bit integer datatype #endif } -#endif - +#else /* HAVE__BUILTIN_POPCOUNT */ /* * pg_popcount64_slow * Return the number of 1 bits set in word @@ -293,7 +206,7 @@ pg_popcount64_sse42(uint64 word) static int pg_popcount64_slow(uint64 word) { - int result = 0; + int result = 0; while (word != 0) { @@ -303,211 +216,4 @@ pg_popcount64_slow(uint64 word) return result; } - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) - -static bool -pg_lzcnt_available(void) -{ - - unsigned int exx[4] = { 0, 0, 0, 0 }; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 0x80000001); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 5)) != 0; /* LZCNT */ -} -#endif - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_rightmost_one32_choose(uint32 word) -{ - if (pg_lzcnt_available()) - pg_rightmost_one32 = pg_rightmost_one32_abm; - else - pg_rightmost_one32 = pg_rightmost_one32_slow; - - return pg_rightmost_one32(word); -} - -static int -pg_rightmost_one32_abm(uint32 word) -{ - return __builtin_ctz(word); -} - #endif - -/* - * pg_rightmost_one32_slow - * Returns the number of trailing 0-bits in word, starting at the least - * significant bit position. word must not be 0. - */ -static int -pg_rightmost_one32_slow(uint32 word) -{ - int result = 0; - - Assert(word != 0); - - while ((word & 255) == 0) - { - word >>= 8; - result += 8; - } - result += rightmost_one_pos[word & 255]; - - return result; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_rightmost_one64_choose(uint64 word) -{ - if (pg_lzcnt_available()) - pg_rightmost_one64 = pg_rightmost_one64_abm; - else - pg_rightmost_one64 = pg_rightmost_one64_slow; - - return pg_rightmost_one64(word); -} - -static int -pg_rightmost_one64_abm(uint64 word) -{ -#if defined(HAVE_LONG_INT_64) - return __builtin_ctzl(word); -#elif defined(HAVE_LONG_LONG_INT_64) - return __builtin_ctzll(word); -#else - /* shouldn't happen */ -#error must have a working 64-bit integer datatype -#endif -} -#endif - -/* - * pg_rightmost_one64_slow - * Returns the number of trailing 0-bits in word, starting at the least - * significant bit position. word must not be 0. - */ -static int -pg_rightmost_one64_slow(uint64 word) -{ - int result = 0; - - Assert(word != 0); - - while ((word & 255) == 0) - { - word >>= 8; - result += 8; - } - result += rightmost_one_pos[word & 255]; - - return result; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_leftmost_one32_choose(uint32 word) -{ - if (pg_lzcnt_available()) - pg_leftmost_one32 = pg_leftmost_one32_abm; - else - pg_leftmost_one32 = pg_leftmost_one32_slow; - - return pg_leftmost_one32(word); -} - -static int -pg_leftmost_one32_abm(uint32 word) -{ - return 31 - __builtin_clz(word); -} -#endif - -/* - * pg_leftmost_one32_slow - * Returns the 0-based position of the most significant set bit in word - * measured from the least significant bit. word must not be 0. - */ -static int -pg_leftmost_one32_slow(uint32 word) -{ - int shift = 32 - 8; - - Assert(word != 0); - - while ((word >> shift) == 0) - shift -= 8; - - return shift + leftmost_one_pos[(word >> shift) & 255]; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_leftmost_one64_choose(uint64 word) -{ - if (pg_lzcnt_available()) - pg_leftmost_one64 = pg_leftmost_one64_abm; - else - pg_leftmost_one64 = pg_leftmost_one64_slow; - - return pg_leftmost_one64(word); -} - -static int -pg_leftmost_one64_abm(uint64 word) -{ -#if defined(HAVE_LONG_INT_64) - return 63 - __builtin_clzl(word); -#elif defined(HAVE_LONG_LONG_INT_64) - return 63 - __builtin_clzll(word); -#else - /* shouldn't happen */ -#error must have a working 64-bit integer datatype -#endif - -} -#endif - -/* - * pg_leftmost_one64_slow - * Returns the 0-based position of the most significant set bit in word - * measured from the least significant bit. word must not be 0. - */ -static int -pg_leftmost_one64_slow(uint64 word) -{ - int shift = 64 - 8; - - Assert(word != 0); - - while ((word >> shift) == 0) - shift -= 8; - - return shift + leftmost_one_pos[(word >> shift) & 255]; -} diff --git a/src/port/pg_bitutils_hwpopcnt.c b/src/port/pg_bitutils_hwpopcnt.c new file mode 100644 index 00000000000..516efd586dd --- /dev/null +++ b/src/port/pg_bitutils_hwpopcnt.c @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * + * pg_bitutils_hwpopcnt.c + * CPU-optimized implementation of pg_popcount variants + * + * This file must be compiled with a compiler-specific flag to enable the + * POPCNT instruction. + * + * Portions Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_bitutils_hwpopcnt.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "port/pg_bitutils.h" + +int +pg_popcount32_hw(uint32 word) +{ + return __builtin_popcount(word); +} + +int +pg_popcount64_hw(uint64 word) +{ +#if defined(HAVE_LONG_INT_64) + return __builtin_popcountl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_popcountll(word); +#else +#error must have a working 64-bit integer datatype +#endif +} |