/*------------------------------------------------------------------------- * * pg_lfind.h * Optimized linear search routines using SIMD intrinsics where * available. * * Copyright (c) 2022-2025, PostgreSQL Global Development Group * * IDENTIFICATION * src/include/port/pg_lfind.h * *------------------------------------------------------------------------- */ #ifndef PG_LFIND_H #define PG_LFIND_H #include "port/simd.h" /* * pg_lfind8 * * Return true if there is an element in 'base' that equals 'key', otherwise * return false. */ static inline bool pg_lfind8(uint8 key, uint8 *base, uint32 nelem) { uint32 i; /* round down to multiple of vector length */ uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1); Vector8 chunk; for (i = 0; i < tail_idx; i += sizeof(Vector8)) { vector8_load(&chunk, &base[i]); if (vector8_has(chunk, key)) return true; } /* Process the remaining elements one at a time. */ for (; i < nelem; i++) { if (key == base[i]) return true; } return false; } /* * pg_lfind8_le * * Return true if there is an element in 'base' that is less than or equal to * 'key', otherwise return false. */ static inline bool pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem) { uint32 i; /* round down to multiple of vector length */ uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1); Vector8 chunk; for (i = 0; i < tail_idx; i += sizeof(Vector8)) { vector8_load(&chunk, &base[i]); if (vector8_has_le(chunk, key)) return true; } /* Process the remaining elements one at a time. */ for (; i < nelem; i++) { if (base[i] <= key) return true; } return false; } /* * pg_lfind32_one_by_one_helper * * Searches the array of integers one-by-one. The caller is responsible for * ensuring that there are at least "nelem" integers in the array. */ static inline bool pg_lfind32_one_by_one_helper(uint32 key, const uint32 *base, uint32 nelem) { for (uint32 i = 0; i < nelem; i++) { if (key == base[i]) return true; } return false; } #ifndef USE_NO_SIMD /* * pg_lfind32_simd_helper * * Searches one 4-register-block of integers. The caller is responsible for * ensuring that there are at least 4-registers-worth of integers remaining. */ static inline bool pg_lfind32_simd_helper(const Vector32 keys, const uint32 *base) { const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32); Vector32 vals1, vals2, vals3, vals4, result1, result2, result3, result4, tmp1, tmp2, result; /* load the next block into 4 registers */ vector32_load(&vals1, base); vector32_load(&vals2, &base[nelem_per_vector]); vector32_load(&vals3, &base[nelem_per_vector * 2]); vector32_load(&vals4, &base[nelem_per_vector * 3]); /* compare each value to the key */ result1 = vector32_eq(keys, vals1); result2 = vector32_eq(keys, vals2); result3 = vector32_eq(keys, vals3); result4 = vector32_eq(keys, vals4); /* combine the results into a single variable */ tmp1 = vector32_or(result1, result2); tmp2 = vector32_or(result3, result4); result = vector32_or(tmp1, tmp2); /* return whether there was a match */ return vector32_is_highbit_set(result); } #endif /* ! USE_NO_SIMD */ /* * pg_lfind32 * * Return true if there is an element in 'base' that equals 'key', otherwise * return false. */ static inline bool pg_lfind32(uint32 key, const uint32 *base, uint32 nelem) { #ifndef USE_NO_SIMD uint32 i = 0; /* * For better instruction-level parallelism, each loop iteration operates * on a block of four registers. */ const Vector32 keys = vector32_broadcast(key); /* load copies of key */ const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32); const uint32 nelem_per_iteration = 4 * nelem_per_vector; /* round down to multiple of elements per iteration */ const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1); #if defined(USE_ASSERT_CHECKING) bool assert_result = pg_lfind32_one_by_one_helper(key, base, nelem); #endif /* * If there aren't enough elements for the SIMD code, use the standard * one-by-one linear search code. */ if (nelem < nelem_per_iteration) return pg_lfind32_one_by_one_helper(key, base, nelem); /* * Process as many elements as possible with a block of 4 registers. */ do { if (pg_lfind32_simd_helper(keys, &base[i])) { Assert(assert_result == true); return true; } i += nelem_per_iteration; } while (i < tail_idx); /* * Process the last 'nelem_per_iteration' elements in the array with a * 4-register block. This will cause us to check a subset of the elements * more than once, but that won't affect correctness, and testing has * demonstrated that this helps more cases than it harms. */ Assert(assert_result == pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration])); return pg_lfind32_simd_helper(keys, &base[nelem - nelem_per_iteration]); #else /* Process the elements one at a time. */ return pg_lfind32_one_by_one_helper(key, base, nelem); #endif } #endif /* PG_LFIND_H */