segment.clΒΆ

///
/// @file segment.cl
///
/// Kernels to (sub)segment landscape into smallish patches from channels to ridges
///
/// @author CPS
/// @bug No known bugs
///

///
/// @defgroup segmentation Channel and hillslope segmentation
/// Segment and subsegment hillslopes and adjacent channels into smallish zones
///

#ifdef KERNEL_SEGMENT_DOWNCHANNELS
///
/// TBD
///
/// Compiled if KERNEL_SEGMENT_DOWNCHANNELS is defined.
///
/// @param[in]     seed_point_array: list of initial streamline point vectors,
///                                  one allotted to each kernel instance
/// @param[in]     mask_array: grid pixel mask (padded),
///                            with @p true = masked, @p false = good
/// @param[in]     uv_array: flow unit velocity vector grid (padded)
/// @param[in]     mapping_array: flag grid recording status of each pixel (padded)
/// @param[in]     count_array: counter grid recording number of pixel steps
///                             downstream from dominant channel head (padded)
/// @param[in]     link_array: link grid providing the grid array index of the next
///                             downstream pixel (padded)
/// @param[in,out] label_array: label grid giving the ID of the subsegment to which
///                             this pixel belongs (padded); the MSB is set if left flank
///
/// @returns void
///
/// @ingroup segmentation
///
__kernel void segment_downchannels(
        __global const float2 *seed_point_array,
        __global const bool   *mask_array,
        __global const float2 *uv_array,
        __global       uint   *mapping_array,
        __global const uint   *count_array,
        __global const uint   *link_array,
        __global       uint   *label_array
   )
{
    // For every channel head pixel...

    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u);
#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    if (global_id>=N_SEED_POINTS) {
        // This is a "padding" seed, so let's bail
        return;
    }
    __private uint idx, prev_idx, segment_label=0u,
                   segmentation_counter=0u;
    __private float2 vec = seed_point_array[global_id];

    // Remember here
    prev_idx = get_array_idx(vec);
    // Label this stream segment, starting with the head pixel
    segment_label = prev_idx;
    atomic_xchg(&label_array[prev_idx],segment_label);
    atomic_or(&mapping_array[prev_idx],IS_SUBSEGMENTHEAD);
    // Step downstream
    idx = link_array[prev_idx];
    // Continue stepping downstream until a dominant confluence
    //    or a masked pixel is reached
    while (!mask_array[idx] && prev_idx!=idx
            && (mapping_array[idx] & IS_SUBSEGMENTHEAD)==0) {
//        if (  0 & ((mapping_array[idx]) & IS_MAJORCONFLUENCE)
//              && ((mapping_array[prev_idx]) & IS_MAJORINFLOW)
//              && (count_array[idx]>=segmentation_counter) ) {
//            segment_label = idx;
//            segmentation_counter += SEGMENTATION_THRESHOLD;
////            if ((mapping_array[prev_idx]&IS_SUBSEGMENTHEAD)!=0)
//                atomic_or(&mapping_array[idx],IS_SUBSEGMENTHEAD);
//        } else
        if (   (count_array[idx]>=segmentation_counter+SEGMENTATION_THRESHOLD/2)
//            && ((~mapping_array[idx]) & IS_MAJORCONFLUENCE)
//            && ((~mapping_array[prev_idx]) & IS_MAJORINFLOW)
            ) {
                segment_label = idx;
                segmentation_counter = count_array[idx]+SEGMENTATION_THRESHOLD;
                    atomic_or(&mapping_array[idx],IS_SUBSEGMENTHEAD);
        }
        // Label here with the current segment's label
        atomic_xchg(&label_array[idx],segment_label);
        // Continue downstream
        prev_idx = idx;
        idx = link_array[idx];
//        is_initial = false;
    }
    return;
}
#endif

#ifdef KERNEL_SEGMENT_HILLSLOPES
///
/// TBD
///
/// Compiled if KERNEL_SEGMENT_HILLSLOPES is defined.
///
/// @param[in]     seed_point_array: list of initial streamline point vectors,
///                                  one allotted to each kernel instance
/// @param[in]     mask_array: grid pixel mask (padded),
///                            with @p true = masked, @p false = good
/// @param[in]     uv_array: flow unit velocity vector grid (padded)
/// @param[in]     mapping_array: flag grid recording status of each pixel (padded)
/// @param[in]     count_array: counter grid recording number of pixel steps
///                             downstream from dominant channel head (padded)
/// @param[in]     link_array: link grid providing the grid array index of the next
///                             downstream pixel (padded)
/// @param[in,out] label_array: label grid giving the ID of the subsegment to which
///                             this pixel belongs (padded); the MSB is set if left flank
///
/// @returns void
///
/// @ingroup segmentation
///
__kernel void segment_hillslopes(
        __global const float2 *seed_point_array,
        __global const bool   *mask_array,
        __global const float2 *uv_array,
        __global const uint   *mapping_array,
        __global const uint   *count_array,
        __global const uint   *link_array,
        __global       uint   *label_array
   )
{
    // For every non-thin-channel pixel

    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u);
#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    if (global_id>=N_SEED_POINTS) {
        // This is a "padding" seed, so let's bail
        return;
    }
    __private uint idx, hillslope_idx, n_steps=0u;
    __private float dl=0.0f, dt=DT_MAX;
    __private float2 uv1_vec, uv2_vec, dxy1_vec, dxy2_vec,
                     vec = seed_point_array[global_id], next_vec;

    // Remember here
    idx = get_array_idx(vec);
    hillslope_idx = idx;
//#ifdef DEBUG
//    printf("%d\n",idx);
//#endif
    // Integrate downstream until a channel pixel (or masked pixel) is reached
    while (!mask_array[idx] && ((~mapping_array[idx])&IS_THINCHANNEL)
           && n_steps<MAX_N_STEPS) {
        compute_step_vec(dt, uv_array, &dxy1_vec, &dxy2_vec, &uv1_vec, &uv2_vec,
                         vec, &next_vec, &idx);
        if (segment_runge_kutta_step(&dt, &dl, &dxy1_vec, &dxy2_vec,
                                     &vec, &next_vec, &n_steps, &idx))
            break;
    }
    if (mapping_array[idx]&IS_THINCHANNEL) {
        // We've reached a (thin) channel, so grab its label and apply it to
        //   the source hillslope pixel
        atomic_xchg(&label_array[hillslope_idx],label_array[idx]);
    }
    return;
}
#endif

#ifdef KERNEL_SUBSEGMENT_CHANNEL_EDGES
///
/// TBD
///
static inline uint rotate_left(uint prev_x, uint prev_y, char *dx, char *dy) {
    __private char rotated_dx=*dx-*dy, rotated_dy=*dx+*dy;
    *dx = rotated_dx/clamp((char)abs(rotated_dx),(char)1,(char)2);
    *dy = rotated_dy/clamp((char)abs(rotated_dy),(char)1,(char)2);
    return (prev_x+*dx)*NY_PADDED + (prev_y+*dy);
}

///
/// TBD
///
/// Compiled if KERNEL_SUBSEGMENT_CHANNEL_EDGES is defined.
///
/// @param[in]     seed_point_array: list of initial streamline point vectors,
///                                  one allotted to each kernel instance
/// @param[in]     mask_array: grid pixel mask (padded),
///                            with @p true = masked, @p false = good
/// @param[in]     uv_array: flow unit velocity vector grid (padded)
/// @param[in,out] mapping_array: flag grid recording status of each pixel (padded)
/// @param[in]     channel_label_array: copy of the label grid array
/// @param[in]     link_array: link grid providing the grid array index of the next
///                            downstream pixel (padded)
/// @param[in,out] label_array: label grid giving the ID of the subsegment to which
///                             this pixel belongs (padded); the MSB is set if left flank
///
/// @returns void
///
/// @ingroup segmentation

/// @bug Corner case problem where thin channel seq has a fat kink, leading
///       to mislabeling of left flank pixel & thin hillslope sl among right flank zone
///
__kernel void subsegment_channel_edges(
        __global const float2 *seed_point_array,
        __global const bool   *mask_array,
        __global const float2 *uv_array,
        __global       uint   *mapping_array,
        __global const uint   *channel_label_array,
        __global const uint   *link_array,
        __global       uint   *label_array
   )
{
    // For every subsegment head pixel...

    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u);
#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    if (global_id>=N_SEED_POINTS) {
        // This is a "padding" seed, so let's bail
        return;
    }
    __private uint idx, prev_idx, left_idx, segment_label=0u,
            prev_x,prev_y, x,y, n_turns, n_steps;
    __private char dx,dy;
    __private float2 vec = seed_point_array[global_id];
    __private bool do_label_left;

    // Remember here
    prev_idx = get_array_idx(vec);
    segment_label = channel_label_array[prev_idx];
    // Step downstream off subsegment head pixel
    idx = link_array[prev_idx];
    // Even if this pixel is masked, we still need to try to subsegment
    n_steps = 0u;
    while (prev_idx!=idx && n_steps++<MAX_N_STEPS) {
        prev_x = prev_idx/NY_PADDED;
        prev_y = prev_idx%NY_PADDED;
        x =  idx/NY_PADDED;
        y =  idx%NY_PADDED;

        // "Scan" progressively left nbr pixels to ensure we are on the left flank
        n_turns = 0;
        left_idx = idx;
        dx = (char)(x-prev_x);
        dy = (char)(y-prev_y);
        // Step across nbrhood pixels by repeatedly rotating the vector
        // (dx,dy) = pixel[prev_idx] -> pixel[idx] (which defines "N").
        // Start with the W nbr, stop at the NE nbr.
        // If a thin channel pixel nbr is reached, check if the rotated vector
        //   points downstream - if so, flag DO NOT label left here
        //   - otherwise, break and continue on to left-flank labeling.
        // This deals with the corner case in which the downstream pixel sequence
        //   has a nasty "bump" turning sharp R then sharp L,
        //   i.e., when this downstream-vector rotation finds itself on the R flank
        //   not the L flank as required here.
        do_label_left = true;
        while (left_idx!=prev_idx && ++n_turns<=7) {
            left_idx = rotate_left(prev_x,prev_y,&dx,&dy);
            if (n_turns>=2 && !mask_array[left_idx]
                          && ((mapping_array[left_idx]) & IS_THINCHANNEL)) {
                if (left_idx==link_array[idx]) {
                    do_label_left = false;
//                    printf("don't label left\n");
                }
                break;
            }
        }
        if (do_label_left) {
            // "Scan" left-side thin-channel-adjacent pixels and label as "left flank"
            //    until another thin-channel pixel is reached in the scan - then stop
            n_turns = 0;
            left_idx = idx;
            dx = (char)(x-prev_x);
            dy = (char)(y-prev_y);
            // Step across nbrhood pixels by repeatedly rotating the vector
            // (dx,dy) = pixel[prev_idx] -> pixel[idx] (which defines "N").
            // Start with the W nbr, stop at the S nbr.
            // Break if a thin channel pixel nbr is reached.
            // Label as left flank if not a thin channel pixel.
            while (left_idx!=prev_idx && ++n_turns<=4) {
                left_idx = rotate_left(prev_x,prev_y,&dx,&dy);
                if (n_turns>=2 && !mask_array[left_idx]) {
                    if ((mapping_array[left_idx]) & IS_THINCHANNEL) {
                            break;
                    } else {
                        atomic_or(&mapping_array[left_idx],IS_LEFTFLANK);
                        atomic_or(&label_array[left_idx],LEFT_FLANK_ADDITION);
                    }
                }
            }
        }

        // Step further downstream if necessary
        prev_idx = idx;
        idx = link_array[idx];
        if (mask_array[idx] || (mapping_array[prev_idx] & IS_SUBSEGMENTHEAD) ) {
            break;
        }
    }
    return;
}
#endif

#ifdef KERNEL_SUBSEGMENT_FLANKS
///
/// TBD
///
/// Compiled if KERNEL_SUBSEGMENT_FLANKS is defined.
///
/// @param[in]  seed_point_array: list of initial streamline point vectors,
///                               one allotted to each kernel instance
/// @param[in]  mask_array: grid pixel mask (padded),
///                         with @p true = masked, @p false = good
/// @param[in]  uv_array: flow unit velocity vector grid (padded)
/// @param[in,out] mapping_array: flag grid recording status of each pixel (padded)
/// @param[in,out] count_array: counter grid recording number of pixel steps
///                             downstream from dominant channel head (padded)
/// @param[in,out] link_array: link grid providing the grid array index of the next
///                             downstream pixel (padded)
/// @param[in,out] label_array: label grid giving the ID of the subsegment to which
///                             this pixel belongs (padded); the MSB is set if left flank
///
/// @returns void
///
/// @ingroup segmentation
///
__kernel void subsegment_flanks(
        __global const float2 *seed_point_array,
        __global const bool   *mask_array,
        __global const float2 *uv_array,
        __global       uint   *mapping_array,
        __global const uint   *count_array,
        __global const uint   *link_array,
        __global       uint   *label_array
   )
{
    // For every non-left-flank hillslope pixel...

    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u);
#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    if (global_id>=N_SEED_POINTS) {
        // This is a "padding" seed, so let's bail
        return;
    }
    __private uint idx, hillslope_idx, n_steps;
    __private float dl=0.0f, dt=DT_MAX;
    __private float2 uv1_vec, uv2_vec, dxy1_vec, dxy2_vec,
                     vec = seed_point_array[global_id], next_vec;

    // Remember here
    idx = get_array_idx(vec);
    hillslope_idx = idx;
    // Integrate downstream until thin channel or left-flank pixel is reached
    n_steps = 0u;
    while (!mask_array[idx] && ((~mapping_array[idx])&IS_LEFTFLANK)
            && ((~mapping_array[idx])&IS_THINCHANNEL) && n_steps<MAX_N_STEPS) {
        compute_step_vec(dt, uv_array, &dxy1_vec, &dxy2_vec, &uv1_vec, &uv2_vec,
                         vec, &next_vec, &idx);
        if (segment_runge_kutta_step(&dt, &dl, &dxy1_vec, &dxy2_vec,
                                     &vec, &next_vec, &n_steps, &idx))
            break;
    }
    if (mapping_array[idx]&IS_LEFTFLANK) {
        // We've reached a (thin) channel, so grab its label and apply it to
        //   the source hillslope pixel
        // No need for atomic here since we're writing to the source pixel
        label_array[hillslope_idx] |= LEFT_FLANK_ADDITION;
    }
    return;
}
#endif


#ifdef KERNEL_FIX_RIGHT_FLANKS
///
/// TBD
///
/// Compiled if KERNEL_FIX_RIGHT_FLANKS is defined.
///
/// @param[in]  seed_point_array: list of initial streamline point vectors,
///                               one allotted to each kernel instance
/// @param[in]  mask_array: grid pixel mask (padded),
///                         with @p true = masked, @p false = good
/// @param[in]  uv_array: flow unit velocity vector grid (padded)
/// @param[in,out] mapping_array: flag grid recording status of each pixel (padded)
/// @param[in,out] count_array: counter grid recording number of pixel steps
///                             downstream from dominant channel head (padded)
/// @param[in,out] link_array: link grid providing the grid array index of the next
///                             downstream pixel (padded)
/// @param[in,out] label_array: label grid giving the ID of the subsegment to which
///                             this pixel belongs (padded); the MSB is set if left flank
///
/// @returns void
///
/// @ingroup segmentation
///
__kernel void fix_right_flanks(
        __global const float2 *seed_point_array,
        __global const bool   *mask_array,
        __global const float2 *uv_array,
        __global       uint   *mapping_array,
        __global const uint   *count_array,
        __global const uint   *link_array,
        __global       uint   *label_array
   )
{
    // For every right-flank hillslope pixel...

    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u);
#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    if (global_id>=N_SEED_POINTS) {
        // This is a "padding" seed, so let's bail
        return;
    }
    __private uint idx, hillslope_idx, n_steps;
    __private float dl=0.0f, dt=DT_MAX;
    __private float2 uv1_vec, uv2_vec, dxy1_vec, dxy2_vec,
                     vec = seed_point_array[global_id], next_vec;

    // Remember here
    idx = get_array_idx(vec);
    hillslope_idx = idx;
    // Integrate downstream until thin channel or left-flank pixel is reached
    n_steps = 0u;
    while (!mask_array[idx] && ((~label_array[idx])&LEFT_FLANK_ADDITION)
            && ((~mapping_array[idx])&IS_THINCHANNEL) && n_steps<MAX_N_STEPS) {
        compute_step_vec(dt, uv_array, &dxy1_vec, &dxy2_vec, &uv1_vec, &uv2_vec,
                         vec, &next_vec, &idx);
        if ( (label_array[hillslope_idx]&(~LEFT_FLANK_ADDITION))
                    != (label_array[idx]&(~LEFT_FLANK_ADDITION)) ) {
            label_array[hillslope_idx] = label_array[idx];
        }
        if (segment_runge_kutta_step(&dt, &dl, &dxy1_vec, &dxy2_vec,
                                     &vec, &next_vec, &n_steps, &idx))
            break;
    }
    if ( (label_array[hillslope_idx]&(~LEFT_FLANK_ADDITION))
                != (label_array[idx]&(~LEFT_FLANK_ADDITION)) ) {
        label_array[hillslope_idx] = label_array[idx];
//            break;
    }
    if (label_array[idx]&LEFT_FLANK_ADDITION) {
        // We've reached a (thin) channel, so grab its label and apply it to
        //   the source hillslope pixel
        // No need for atomic here since we're writing to the source pixel
        label_array[hillslope_idx] |= LEFT_FLANK_ADDITION;
    }
    return;
}
#endif

#ifdef KERNEL_FIX_LEFT_FLANKS
///
/// TBD
///
/// Compiled if KERNEL_FIX_LEFT_FLANKS is defined.
///
/// @param[in]  seed_point_array: list of initial streamline point vectors,
///                               one allotted to each kernel instance
/// @param[in]  mask_array: grid pixel mask (padded),
///                         with @p true = masked, @p false = good
/// @param[in]  uv_array: flow unit velocity vector grid (padded)
/// @param[in,out] mapping_array: flag grid recording status of each pixel (padded)
/// @param[in,out] count_array: counter grid recording number of pixel steps
///                             downstream from dominant channel head (padded)
/// @param[in,out] link_array: link grid providing the grid array index of the next
///                             downstream pixel (padded)
/// @param[in,out] label_array: label grid giving the ID of the subsegment to which
///                             this pixel belongs (padded); the MSB is set if left flank
///
/// @returns void
///
/// @ingroup segmentation
///
__kernel void fix_left_flanks(
        __global const float2 *seed_point_array,
        __global const bool   *mask_array,
        __global const float2 *uv_array,
        __global       uint   *mapping_array,
        __global const uint   *count_array,
        __global const uint   *link_array,
        __global       uint   *label_array
   )
{
    // For every left-flank hillslope pixel...

    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u);
#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    if (global_id>=N_SEED_POINTS) {
        // This is a "padding" seed, so let's bail
        return;
    }
    __private uint idx, hillslope_idx, n_steps;
    __private float dl=0.0f, dt=DT_MAX;
    __private float2 uv1_vec, uv2_vec, dxy1_vec, dxy2_vec,
                     vec = seed_point_array[global_id], next_vec;

    // Remember here
    idx = get_array_idx(vec);
    hillslope_idx = idx;
    // Integrate downstream until thin channel or right-flank pixel is reached
    n_steps = 0u;
    while (!mask_array[idx] && ((label_array[idx])&LEFT_FLANK_ADDITION)
            && ((~mapping_array[idx])&IS_THINCHANNEL) && n_steps<MAX_N_STEPS) {
        compute_step_vec(dt, uv_array, &dxy1_vec, &dxy2_vec, &uv1_vec, &uv2_vec,
                         vec, &next_vec, &idx);
        if ( (label_array[hillslope_idx]&(~LEFT_FLANK_ADDITION))
                    != (label_array[idx]&(~LEFT_FLANK_ADDITION)) ) {
            label_array[hillslope_idx] = label_array[idx];
        }
        if (segment_runge_kutta_step(&dt, &dl, &dxy1_vec, &dxy2_vec,
                                     &vec, &next_vec, &n_steps, &idx))
            break;
    }
    if ( (label_array[hillslope_idx]&(~LEFT_FLANK_ADDITION))
                != (label_array[idx]&(~LEFT_FLANK_ADDITION)) ) {
        label_array[hillslope_idx] = label_array[idx];
//            break;
    }
    if ( ((~label_array[idx])&LEFT_FLANK_ADDITION)
            && ((~mapping_array[idx])&IS_THINCHANNEL) ) {
        // We've reached a right-flank channel, so grab its label and apply it to
        //   the source hillslope pixel
        // No need for atomic here since we're writing to the source pixel
        label_array[hillslope_idx] &= ~LEFT_FLANK_ADDITION;
    }
    return;
}
#endif