hillslopes.clΒΆ

///
/// @file hillslopes.cl
///
/// Kernels to (re)map thin channels, branching structure, and single outflow directions.
///
/// @author CPS
/// @bug No known bugs
///

#ifdef KERNEL_HILLSLOPES
///
/// TBD
///
/// Compiled if KERNEL_HILLSLOPES is defined.
///
/// @param[in]     seed_point_array: list of initial streamline point vectors,
///                                  one allotted to each kernel instance
/// @param[in]     mask_array: grid pixel mask (padded),
///                            with @p true = masked, @p false = good
/// @param[in]     uv_array: flow unit velocity vector grid (padded)
/// @param[in,out] mapping_array: flag grid recording status of each pixel (padded)
/// @param[in,out] count_array: counter grid recording number of pixel steps
///                             downstream from dominant channel head (padded)
/// @param[in,out] link_array: link grid providing the grid array index of the next
///                             downstream pixel (padded)
///
/// @returns void
///
/// @ingroup structure
///
__kernel void hillslopes(
        __global const float2 *seed_point_array,
        __global const bool   *mask_array,
        __global const float2 *uv_array,
        __global       uint   *mapping_array,
        __global const uint   *count_array,
        __global       uint   *link_array
   )
{
    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u);
    // For every hillslope aka non-thin-channel pixel...
    if (global_id>=N_SEED_POINTS) {
        // This is a "padding" seed, so let's bail
#ifdef DEBUG
        printf("Bailing @ %d !in [%d-%d]\n",
                global_id,get_global_offset(0u),N_SEED_POINTS-1);
#endif
        return;
    }

#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    __private uint idx, prev_idx, n_steps=0u;
    __private float dl=0.0f, dt=DT_MAX;
    __private float2 uv1_vec, uv2_vec, dxy1_vec, dxy2_vec,
                     vec = seed_point_array[global_id], next_vec;

    // Remember here
    idx = get_array_idx(vec);
    prev_idx = idx;
    // Integrate downstream one pixel
    // HACK: factor 100x
    while (prev_idx==idx && !mask_array[idx] && n_steps<100*(MAX_N_STEPS)) {
        prev_idx = idx;
        compute_step_vec(dt, uv_array, &dxy1_vec, &dxy2_vec, &uv1_vec, &uv2_vec,
                         vec, &next_vec, &idx);
        if (countlink_runge_kutta_step(&dt, &dl, &dxy1_vec, &dxy2_vec,
                                       &vec, &next_vec, &idx, mapping_array)) {
#ifdef DEBUG
            printf("Link hillslopes (%d): stuck @ %d ...bailing\n",global_id,idx);
#endif
            break;
        }
        n_steps++;
    }
    // If we're on a new pixel, link previous pixel to here
    if (prev_idx!=idx && !mask_array[idx]) {
        atomic_xchg(&link_array[prev_idx],idx);
    }
    return;
}
#endif