// Compile with -C opt-level=3 -C target-cpu=native to see autovectorization #[repr(align(64))] pub struct Aligned(T); // assumes input is aligned on 64-byte boundary and that // input's length is a multiple of 64. pub fn sum_array(input: &Aligned<[i32]>) -> i32 { if input.0.len() & 63 != 0 { unsafe { std::hint::unreachable_unchecked() } } (0..input.0.len()) .map(|i| unsafe { *input.0.as_ptr().add(i) }) .sum() }