aboutsummaryrefslogtreecommitdiff
path: root/examples/cuda/Max_array.cu
blob: 08e353a14165f41b4b4ad22f7366948b278bbdff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#define MAX(x, y)((x > y) ? x : y)

__global__ void find_max(const int * array, int * max, int * mutex, unsigned int n) {
    unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x;
    unsigned int stride = gridDim.x * blockDim.x;
    unsigned int input_idx = idx;

    __shared__ int partial_res[256];

    int local_max = array[0];
    while (input_idx < n) {
        local_max = MAX(local_max, array[input_idx]);
        input_idx += stride;
    }

    partial_res[threadIdx.x] = local_max;

    __syncthreads();

    // reduction
    unsigned int i = blockDim.x / 2;
    while (i != 0) {
        if (threadIdx.x < i) {
            partial_res[threadIdx.x] = MAX(partial_res[threadIdx.x], partial_res[threadIdx.x + i]);
        }

        __syncthreads();
        i /= 2;
    }

    if (threadIdx.x == 0) {
        while (atomicCAS(mutex, 0, 1) != 0);
        * max = MAX( * max, partial_res[0]);
        atomicExch(mutex, 0);
    }
}