aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config/c-compiler.m432
-rwxr-xr-xconfigure93
-rw-r--r--configure.ac19
-rw-r--r--meson.build22
-rw-r--r--src/port/Makefile5
-rw-r--r--src/port/meson.build2
-rw-r--r--src/port/pg_crc32c_sse42.c1
7 files changed, 72 insertions, 102 deletions
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index a129edb88e1..309d5b04b46 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -605,24 +605,26 @@ fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
# test the 8-byte variant, _mm_crc32_u64, but it is assumed to be present if
# the other ones are, on x86-64 platforms)
#
-# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the
-# intrinsics are supported, sets pgac_sse42_crc32_intrinsics, and CFLAGS_CRC.
+# If the intrinsics are supported, sets pgac_sse42_crc32_intrinsics.
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
-[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics_$1])])dnl
-AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32 with CFLAGS=$1], [Ac_cachevar],
-[pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS $1"
-AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h>],
- [unsigned int crc = 0;
- crc = _mm_crc32_u8(crc, 0);
- crc = _mm_crc32_u32(crc, 0);
- /* return computed value, to prevent the above being optimized away */
- return crc == 0;])],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics])])dnl
+AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("sse4.2")))
+ #endif
+ static int crc32_sse42_test(void)
+ {
+ unsigned int crc = 0;
+ crc = _mm_crc32_u8(crc, 0);
+ crc = _mm_crc32_u32(crc, 0);
+ /* return computed value, to prevent the above being optimized away */
+ return crc == 0;
+ }],
+ [return crc32_sse42_test();])],
[Ac_cachevar=yes],
- [Ac_cachevar=no])
-CFLAGS="$pgac_save_CFLAGS"])
+ [Ac_cachevar=no])])
if test x"$Ac_cachevar" = x"yes"; then
- CFLAGS_CRC="$1"
pgac_sse42_crc32_intrinsics=yes
fi
undefine([Ac_cachevar])dnl
diff --git a/configure b/configure
index 163ef9a7732..02b2c2e1061 100755
--- a/configure
+++ b/configure
@@ -17364,87 +17364,47 @@ fi
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
-# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
-# with the default compiler flags. If not, check if adding the -msse4.2
-# flag helps. CFLAGS_CRC is set to -msse4.2 if that's required.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u32 with CFLAGS=" >&5
-$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u32 with CFLAGS=... " >&6; }
-if ${pgac_cv_sse42_crc32_intrinsics_+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u32" >&5
+$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u32... " >&6; }
+if ${pgac_cv_sse42_crc32_intrinsics+:} false; then :
$as_echo_n "(cached) " >&6
else
- pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS "
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <nmmintrin.h>
-int
-main ()
-{
-unsigned int crc = 0;
- crc = _mm_crc32_u8(crc, 0);
- crc = _mm_crc32_u32(crc, 0);
- /* return computed value, to prevent the above being optimized away */
- return crc == 0;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv_sse42_crc32_intrinsics_=yes
-else
- pgac_cv_sse42_crc32_intrinsics_=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-CFLAGS="$pgac_save_CFLAGS"
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics_" >&5
-$as_echo "$pgac_cv_sse42_crc32_intrinsics_" >&6; }
-if test x"$pgac_cv_sse42_crc32_intrinsics_" = x"yes"; then
- CFLAGS_CRC=""
- pgac_sse42_crc32_intrinsics=yes
-fi
-
-if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u32 with CFLAGS=-msse4.2" >&5
-$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u32 with CFLAGS=-msse4.2... " >&6; }
-if ${pgac_cv_sse42_crc32_intrinsics__msse4_2+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS -msse4.2"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <nmmintrin.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("sse4.2")))
+ #endif
+ static int crc32_sse42_test(void)
+ {
+ unsigned int crc = 0;
+ crc = _mm_crc32_u8(crc, 0);
+ crc = _mm_crc32_u32(crc, 0);
+ /* return computed value, to prevent the above being optimized away */
+ return crc == 0;
+ }
int
main ()
{
-unsigned int crc = 0;
- crc = _mm_crc32_u8(crc, 0);
- crc = _mm_crc32_u32(crc, 0);
- /* return computed value, to prevent the above being optimized away */
- return crc == 0;
+return crc32_sse42_test();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv_sse42_crc32_intrinsics__msse4_2=yes
+ pgac_cv_sse42_crc32_intrinsics=yes
else
- pgac_cv_sse42_crc32_intrinsics__msse4_2=no
+ pgac_cv_sse42_crc32_intrinsics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-CFLAGS="$pgac_save_CFLAGS"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics__msse4_2" >&5
-$as_echo "$pgac_cv_sse42_crc32_intrinsics__msse4_2" >&6; }
-if test x"$pgac_cv_sse42_crc32_intrinsics__msse4_2" = x"yes"; then
- CFLAGS_CRC="-msse4.2"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics" >&5
+$as_echo "$pgac_cv_sse42_crc32_intrinsics" >&6; }
+if test x"$pgac_cv_sse42_crc32_intrinsics" = x"yes"; then
pgac_sse42_crc32_intrinsics=yes
fi
-fi
# Are we targeting a processor that supports SSE 4.2? gcc, clang and icc all
# define __SSE4_2__ in that case.
@@ -17647,15 +17607,20 @@ fi
# If we are targeting a processor that has Intel SSE 4.2 instructions, we can
# use the special CRC instructions for calculating CRC-32C. If we're not
# targeting such a processor, but we can nevertheless produce code that uses
-# the SSE intrinsics, perhaps with some extra CFLAGS, compile both
-# implementations and select which one to use at runtime, depending on whether
-# SSE 4.2 is supported by the processor we're running on.
+# the SSE intrinsics, compile both implementations and select which one to use
+# at runtime, depending on whether SSE 4.2 is supported by the processor we're
+# running on.
#
# Similarly, if we are targeting an ARM processor that has the CRC
# instructions that are part of the ARMv8 CRC Extension, use them. And if
# we're not targeting such a processor, but can nevertheless produce code that
# uses the CRC instructions, compile both, and select at runtime.
#
+# Note that we do not use __attribute__((target("..."))) for the ARM CRC
+# instructions because until clang 16, using the ARM intrinsics still requires
+# special -march flags. Perhaps we can re-evaluate this decision after some
+# time has passed.
+#
# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1
# in the template or configure command line.
#
diff --git a/configure.ac b/configure.ac
index 8c3367bd545..99a5b1aac64 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2068,13 +2068,7 @@ fi
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
-# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
-# with the default compiler flags. If not, check if adding the -msse4.2
-# flag helps. CFLAGS_CRC is set to -msse4.2 if that's required.
-PGAC_SSE42_CRC32_INTRINSICS([])
-if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
- PGAC_SSE42_CRC32_INTRINSICS([-msse4.2])
-fi
+PGAC_SSE42_CRC32_INTRINSICS()
# Are we targeting a processor that supports SSE 4.2? gcc, clang and icc all
# define __SSE4_2__ in that case.
@@ -2111,15 +2105,20 @@ AC_SUBST(CFLAGS_CRC)
# If we are targeting a processor that has Intel SSE 4.2 instructions, we can
# use the special CRC instructions for calculating CRC-32C. If we're not
# targeting such a processor, but we can nevertheless produce code that uses
-# the SSE intrinsics, perhaps with some extra CFLAGS, compile both
-# implementations and select which one to use at runtime, depending on whether
-# SSE 4.2 is supported by the processor we're running on.
+# the SSE intrinsics, compile both implementations and select which one to use
+# at runtime, depending on whether SSE 4.2 is supported by the processor we're
+# running on.
#
# Similarly, if we are targeting an ARM processor that has the CRC
# instructions that are part of the ARMv8 CRC Extension, use them. And if
# we're not targeting such a processor, but can nevertheless produce code that
# uses the CRC instructions, compile both, and select at runtime.
#
+# Note that we do not use __attribute__((target("..."))) for the ARM CRC
+# instructions because until clang 16, using the ARM intrinsics still requires
+# special -march flags. Perhaps we can re-evaluate this decision after some
+# time has passed.
+#
# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1
# in the template or configure command line.
#
diff --git a/meson.build b/meson.build
index ff3848b1d85..6bc3bb51dc0 100644
--- a/meson.build
+++ b/meson.build
@@ -2211,14 +2211,19 @@ endif
# If we are targeting a processor that has Intel SSE 4.2 instructions, we can
# use the special CRC instructions for calculating CRC-32C. If we're not
# targeting such a processor, but we can nevertheless produce code that uses
-# the SSE intrinsics, perhaps with some extra CFLAGS, compile both
-# implementations and select which one to use at runtime, depending on whether
-# SSE 4.2 is supported by the processor we're running on.
+# the SSE intrinsics, compile both implementations and select which one to use
+# at runtime, depending on whether SSE 4.2 is supported by the processor we're
+# running on.
#
# Similarly, if we are targeting an ARM processor that has the CRC
# instructions that are part of the ARMv8 CRC Extension, use them. And if
# we're not targeting such a processor, but can nevertheless produce code that
# uses the CRC instructions, compile both, and select at runtime.
+#
+# Note that we do not use __attribute__((target("..."))) for the ARM CRC
+# instructions because until clang 16, using the ARM intrinsics still requires
+# special -march flags. Perhaps we can re-evaluate this decision after some
+# time has passed.
###############################################################
have_optimized_crc = false
@@ -2234,6 +2239,9 @@ if host_cpu == 'x86' or host_cpu == 'x86_64'
prog = '''
#include <nmmintrin.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("sse4.2")))
+#endif
int main(void)
{
unsigned int crc = 0;
@@ -2244,16 +2252,16 @@ int main(void)
}
'''
- if cc.links(prog, name: '_mm_crc32_u8 and _mm_crc32_u32 without -msse4.2',
+ if not cc.links(prog, name: '_mm_crc32_u8 and _mm_crc32_u32',
args: test_c_args)
+ # Do not use Intel SSE 4.2
+ elif (cc.get_define('__SSE4_2__') != '')
# Use Intel SSE 4.2 unconditionally.
cdata.set('USE_SSE42_CRC32C', 1)
have_optimized_crc = true
- elif cc.links(prog, name: '_mm_crc32_u8 and _mm_crc32_u32 with -msse4.2',
- args: test_c_args + ['-msse4.2'])
+ else
# Use Intel SSE 4.2, with runtime check. The CPUID instruction is needed for
# the runtime check.
- cflags_crc += '-msse4.2'
cdata.set('USE_SSE42_CRC32C', false)
cdata.set('USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 1)
have_optimized_crc = true
diff --git a/src/port/Makefile b/src/port/Makefile
index 366c814bd92..4c224319512 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -82,11 +82,6 @@ libpgport.a: $(OBJS)
rm -f $@
$(AR) $(AROPT) $@ $^
-# all versions of pg_crc32c_sse42.o need CFLAGS_CRC
-pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_CRC)
-pg_crc32c_sse42_shlib.o: CFLAGS+=$(CFLAGS_CRC)
-pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_CRC)
-
# all versions of pg_crc32c_armv8.o need CFLAGS_CRC
pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC)
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
diff --git a/src/port/meson.build b/src/port/meson.build
index 83a06325209..c5bceed9cda 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -82,7 +82,7 @@ endif
replace_funcs_pos = [
# x86/x64
['pg_crc32c_sse42', 'USE_SSE42_CRC32C'],
- ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 'crc'],
+ ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index 7f88c114800..dcc4904a82b 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -19,6 +19,7 @@
#include "port/pg_crc32c.h"
pg_attribute_no_sanitize_alignment()
+pg_attribute_target("sse4.2")
pg_crc32c
pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
{