diff options
author | Nathan Bossart <nathan@postgresql.org> | 2025-03-28 16:20:20 -0500 |
---|---|---|
committer | Nathan Bossart <nathan@postgresql.org> | 2025-03-28 16:20:20 -0500 |
commit | 519338ace410d9b1ffb13176b8802b0307ff0531 (patch) | |
tree | cef689c0b92e9678b1b5cf0110b0ba3a37c8ebe0 /configure | |
parent | 3c8e463b0d885e0d976f6a13a1fb78187b25c86f (diff) | |
download | postgresql-519338ace410d9b1ffb13176b8802b0307ff0531.tar.gz postgresql-519338ace410d9b1ffb13176b8802b0307ff0531.zip |
Optimize popcount functions with ARM SVE intrinsics.
This commit introduces SVE implementations of pg_popcount{32,64}.
Unlike the Neon versions, we need an additional configure-time
check to determine if the compiler supports SVE intrinsics, and we
need a runtime check to determine if the current CPU supports SVE
instructions. Our testing showed that the SVE implementations are
much faster for larger inputs and are comparable to the status
quo for smaller inputs.
Author: "Devanga.Susmitha@fujitsu.com" <Devanga.Susmitha@fujitsu.com>
Co-authored-by: "Chiranmoy.Bhattacharya@fujitsu.com" <Chiranmoy.Bhattacharya@fujitsu.com>
Co-authored-by: "Malladi, Rama" <ramamalladi@hotmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/010101936e4aaa70-b474ab9e-b9ce-474d-a3ba-a3dc223d295c-000000%40us-west-2.amazonses.com
Discussion: https://postgr.es/m/OSZPR01MB84990A9A02A3515C6E85A65B8B2A2%40OSZPR01MB8499.jpnprd01.prod.outlook.com
Diffstat (limited to 'configure')
-rwxr-xr-x | configure | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/configure b/configure index 4dd67a5cc6e..30d949c3c46 100755 --- a/configure +++ b/configure @@ -17517,6 +17517,77 @@ $as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h fi fi +# Check for SVE popcount intrinsics +# +if test x"$host_cpu" = x"aarch64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for svcnt_x" >&5 +$as_echo_n "checking for svcnt_x... " >&6; } +if ${pgac_cv_sve_popcnt_intrinsics+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <arm_sve.h> + + char buf[128]; + + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("arch=armv8-a+sve"))) + #endif + static int popcount_test(void) + { + svbool_t pred = svptrue_b64(); + svuint8_t vec8; + svuint64_t accum1 = svdup_u64(0), + accum2 = svdup_u64(0), + vec64; + char *p = buf; + uint64_t popcnt, + mask = 0x5555555555555555; + + vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask); + accum1 = svadd_u64_x(pred, accum1, svcnt_u64_x(pred, vec64)); + p += svcntb(); + + vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask); + accum2 = svadd_u64_x(pred, accum2, svcnt_u64_x(pred, vec64)); + p += svcntb(); + + popcnt = svaddv_u64(pred, svadd_u64_x(pred, accum1, accum2)); + + pred = svwhilelt_b8_s32(0, sizeof(buf)); + vec8 = svand_n_u8_x(pred, svld1_u8(pred, (const uint8_t *) p), 0x55); + return (int) (popcnt + svaddv_u8(pred, svcnt_u8_x(pred, vec8))); + } +int +main () +{ +return popcount_test(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_sve_popcnt_intrinsics=yes +else + pgac_cv_sve_popcnt_intrinsics=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sve_popcnt_intrinsics" >&5 +$as_echo "$pgac_cv_sve_popcnt_intrinsics" >&6; } +if test x"$pgac_cv_sve_popcnt_intrinsics" = x"yes"; then + pgac_sve_popcnt_intrinsics=yes +fi + + if test x"$pgac_sve_popcnt_intrinsics" = x"yes"; then + +$as_echo "#define USE_SVE_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h + + fi +fi + # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u32" >&5 |