integratetraj.clΒΆ

///
/// @file integratetraj.cl
///
/// Streamline trajectory integration kernel and related tracing functions.
///
/// @author CPS
///
/// @todo Fix big-DTM crash issue
/// @todo Perhaps use compiler directive volatile where variables not const?
/// @todo Update doc about trajectory integration to describe subpixel seeding & jittering
///
/// @bug Crashes (reported as 'abort 6' by PyOpenCL) occur for very large DTMs.
///      The reason remains obscure: it may be because of GPU timeout, but more likely
///      is because of a memory leakage.
///


#ifdef KERNEL_INTEGRATE_TRAJECTORY
///
/// GPU kernel that drives streamline integration from seed positions
/// given in @p seed_point_array, controlled by the 'flow' vector field
/// given in @p uv_array, and either terminated at pixels masked in
/// mask_array or because a streamline exceeds a threshold
/// distance (length or number of integration points) given by parameters
/// stored in info. Further integration parameters are provided in this struct.
///
/// The kernel acts on one seed point only. It chooses this seed point
/// by computing a global id and using it to index the @p seed_point_array.
/// UPDATE: now doing sub-pixel streamlines as a set per seed point... need to doc here
///
/// Each streamline trajectory is returned in the appropriate location
/// in @p trajectories_array as a list of compressed-into-byte dx,dy values.
///
/// Compiled if KERNEL_INTEGRATE_TRAJECTORY is defined.
///
/// @param[in]  seed_point_array: list of initial streamline point vectors,
///                               one allotted to each kernel instance
/// @param[in]  mask_array: grid pixel mask (padded),
///                         with @p true = masked, @p false = good
/// @param[in]  uv_array: flow unit velocity vector grid (padded)
/// @param[out]  mapping_array: multi-flag array
/// @param[out] trajectories_array: lists of streamline trajectories, stored as
///                                 compressed-into-byte dx,dy vector sequences;
///                                 one list per @p seed_point_array vector
/// @param[out] traj_nsteps_array: list of number of steps along each trajectory;
///                                 one per @p seed_point_array vector
/// @param[out] traj_length_array: list of lengths of each trajectory;
///                                 one per @p seed_point_array vector
///
/// @returns void
///
/// @ingroup integrate
///
__kernel void integrate_trajectory( __global const float2 *seed_point_array,
                                    __global const bool   *mask_array,
                                    __global const float2 *uv_array,
                                    __global       uint   *mapping_array,
                                    __global       char2  *trajectories_array,
                                    __global       ushort *traj_nsteps_array,
                                    __global       float  *traj_length_array )
{
    // global_id plus the chunk SEEDS_CHUNK_OFFSET is a seed point index
    const uint global_id = get_global_id(0u)+get_global_id(1u)*get_global_size(0u),
               seed_idx = (SEEDS_CHUNK_OFFSET)+global_id,
               trajectory_index = global_id*(MAX_N_STEPS);
    __global char2 *trajectory_vec;

//    printf("global ids %d %d size %d offset %d\n",
//            get_global_id(0u),get_global_id(1u),get_global_size(0u),get_global_offset(0u));

#ifdef VERBOSE
    // Report how kernel instances are distributed
    if (global_id==0 || global_id==get_global_offset(0u)) {
        printf("\n  >>> on GPU/OpenCL device: id=%d offset=%d ",
                get_global_id(0u),
                get_global_offset(0u));
        printf("#workitems=%d x #workgroups=%d = %d=%d\n",
                get_local_size(0u), get_num_groups(0u),
                get_local_size(0u)*get_num_groups(0u),
                get_global_size(0u));
    }
#endif
    if (seed_idx>=N_SEED_POINTS) {
        // This is a "padding seed", so let's bail
        return;
    }

    // Bug fix: bail BEFORE reading this element, because trajectories_array
    //   isn't padded and shouldn't be accessed for seed_idx>=N_SEED_POINTS
    trajectory_vec = &trajectories_array[trajectory_index];

    // Trace a "smooth" streamline from the seed point coordinate
    trajectory_record( uv_array, mask_array,
                       mapping_array, traj_nsteps_array, traj_length_array,
                       trajectory_vec, global_id, seed_idx,
                       seed_point_array[seed_idx] );
}
#endif