#define MAX(x, y)((x > y) ? x : y) __global__ void find_max(const int * array, int * max, int * mutex, unsigned int n) { unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; unsigned int stride = gridDim.x * blockDim.x; unsigned int input_idx = idx; __shared__ int partial_res[256]; int local_max = array[0]; while (input_idx < n) { local_max = MAX(local_max, array[input_idx]); input_idx += stride; } partial_res[threadIdx.x] = local_max; __syncthreads(); // reduction unsigned int i = blockDim.x / 2; while (i != 0) { if (threadIdx.x < i) { partial_res[threadIdx.x] = MAX(partial_res[threadIdx.x], partial_res[threadIdx.x + i]); } __syncthreads(); i /= 2; } if (threadIdx.x == 0) { while (atomicCAS(mutex, 0, 1) != 0); * max = MAX( * max, partial_res[0]); atomicExch(mutex, 0); } }