aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNathan Bossart <nathan@postgresql.org>2024-04-23 10:54:04 -0500
committerNathan Bossart <nathan@postgresql.org>2024-04-23 10:54:04 -0500
commit598e0114a3b16bb7edcaba6784b3794f816889ae (patch)
tree3c3e335bc66410633c814ab93d132a008c4de189 /src
parentbb3ca232398658291d4b1eaae12cd6e88dfc96a3 (diff)
downloadpostgresql-598e0114a3b16bb7edcaba6784b3794f816889ae.tar.gz
postgresql-598e0114a3b16bb7edcaba6784b3794f816889ae.zip
Fix code for probing availability of AVX-512.
This commit fixes a few things: * Instead of checking for CPU support of the "xsave" extension, we need to check for OS support of XGETBV instructions via the "osxsave" flag. * We must check that additional XCR0 bits are set to be sure the ZMM registers are fully enabled. * We should use the recommended ordering of steps. Specifically, we need to check that the ZMM registers are enabled prior to checking for AVX-512 via CPUID. In passing, split this code into separate functions to improve readability. Reported-by: Andrew Kane Reviewed-by: Akash Shankaran, Raghuveer Devulapalli Discussion: https://postgr.es/m/20240418024459.GA3385227%40nathanxps13
Diffstat (limited to 'src')
-rw-r--r--src/port/pg_popcount_avx512_choose.c80
1 files changed, 47 insertions, 33 deletions
diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c
index ae3fa3d3067..b37107803a8 100644
--- a/src/port/pg_popcount_avx512_choose.c
+++ b/src/port/pg_popcount_avx512_choose.c
@@ -34,39 +34,13 @@
#ifdef TRY_POPCNT_FAST
/*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
+ * Does CPUID say there's support for XSAVE instructions?
*/
-bool
-pg_popcount_avx512_available(void)
+static inline bool
+xsave_available(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
- /* Does CPUID say there's support for AVX-512 popcount instructions? */
-#if defined(HAVE__GET_CPUID_COUNT)
- __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
- __cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
- if ((exx[2] & (1 << 14)) == 0) /* avx512-vpopcntdq */
- return false;
-
- /* Does CPUID say there's support for AVX-512 byte and word instructions? */
- memset(exx, 0, sizeof(exx));
-#if defined(HAVE__GET_CPUID_COUNT)
- __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
- __cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
- if ((exx[1] & (1 << 30)) == 0) /* avx512-bw */
- return false;
-
- /* Does CPUID say there's support for XSAVE instructions? */
- memset(exx, 0, sizeof(exx));
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
@@ -74,15 +48,55 @@ pg_popcount_avx512_available(void)
#else
#error cpuid instruction not available
#endif
- if ((exx[2] & (1 << 26)) == 0) /* xsave */
- return false;
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
- /* Does XGETBV say the ZMM registers are enabled? */
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+static inline bool
+zmm_regs_available(void)
+{
#ifdef HAVE_XSAVE_INTRINSICS
- return (_xgetbv(0) & 0xe0) != 0;
+ return (_xgetbv(0) & 0xe6) == 0xe6;
#else
return false;
#endif
}
+/*
+ * Does CPUID say there's support for AVX-512 popcount and byte-and-word
+ * instructions?
+ */
+static inline bool
+avx512_popcnt_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
+ (exx[1] & (1 << 30)) != 0; /* avx512-bw */
+}
+
+/*
+ * Returns true if the CPU supports the instructions required for the AVX-512
+ * pg_popcount() implementation.
+ */
+bool
+pg_popcount_avx512_available(void)
+{
+ return xsave_available() &&
+ zmm_regs_available() &&
+ avx512_popcnt_available();
+}
+
#endif /* TRY_POPCNT_FAST */