[Beignet] [PATCH 1/3] Add three copy cl files for Enqueue Copy usage.

Zhigang Gong zhigang.gong at linux.intel.com
Tue Apr 1 22:35:39 PDT 2014


LGTM, pushed, thanks.

On Wed, Mar 26, 2014 at 06:27:48PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
> 
> Add these three cl files,
> one for src and dst are not aligned but have same offset to 4.
> second for src's %4 offset is bigger than the dst's
> third for src's %4 offset is small than the dst's
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  src/CMakeLists.txt                                 |  4 ++-
>  .../cl_internal_copy_buf_unalign_dst_offset.cl     | 28 +++++++++++++++++++++
>  .../cl_internal_copy_buf_unalign_same_offset.cl    | 19 ++++++++++++++
>  .../cl_internal_copy_buf_unalign_src_offset.cl     | 29 ++++++++++++++++++++++
>  4 files changed, 79 insertions(+), 1 deletion(-)
>  create mode 100644 src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
>  create mode 100644 src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
>  create mode 100644 src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
> 
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index 95ff56f..9db53ad 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -18,7 +18,9 @@ endforeach (KF)
>  endmacro (MakeKernelBinStr)
>  
>  set (KERNEL_STR_FILES)
> -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 cl_internal_copy_buf_align16)
> +set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4
> +cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
> +cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset)
>  MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
>  
>  set(OPENCL_SRC
> diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
> new file mode 100644
> index 0000000..13f4162
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
> @@ -0,0 +1,28 @@
> +kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset,
> +                                     global int* dst, unsigned int dst_offset,
> +				     unsigned int size,
> +				     unsigned int first_mask, unsigned int last_mask,
> +				     unsigned int shift, unsigned int dw_mask)
> +{
> +    int i = get_global_id(0);
> +    unsigned int tmp = 0;
> +
> +    if (i > size -1)
> +        return;
> +
> +    /* last dw, need to be careful, not to overflow the source. */
> +    if ((i == size - 1) && ((last_mask & (~(~dw_mask >> shift))) == 0)) {
> +        tmp = ((src[src_offset + i] & ~dw_mask) >> shift);
> +    } else {
> +        tmp = ((src[src_offset + i] & ~dw_mask) >> shift)
> +             | ((src[src_offset + i + 1] & dw_mask) << (32 - shift));
> +    }
> +
> +    if (i == 0) {
> +        dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask));
> +    } else if (i == size - 1) {
> +        dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask));
> +    } else {
> +        dst[i+dst_offset] = tmp;
> +    }
> +}
> diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
> new file mode 100644
> index 0000000..8510246
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
> @@ -0,0 +1,19 @@
> +kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset,
> +                                     global int* dst, unsigned int dst_offset,
> +				     unsigned int size,
> +				     unsigned int first_mask, unsigned int last_mask)
> +{
> +    int i = get_global_id(0);
> +    if (i > size -1)
> +       return;
> +
> +    if (i == 0) {
> +        dst[dst_offset] = (dst[dst_offset] & first_mask)
> +             | (src[src_offset] & (~first_mask));
> +    } else if (i == size - 1) {
> +        dst[i+dst_offset] = (src[i+src_offset] & last_mask)
> +            | (dst[i+dst_offset] & (~last_mask));
> +    } else {
> +        dst[i+dst_offset] = src[i+src_offset];
> +    }
> +}
> diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
> new file mode 100644
> index 0000000..f98368a
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
> @@ -0,0 +1,29 @@
> +kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset,
> +                                     global int* dst, unsigned int dst_offset,
> +				     unsigned int size,
> +				     unsigned int first_mask, unsigned int last_mask,
> +				     unsigned int shift, unsigned int dw_mask, int src_less)
> +{
> +    int i = get_global_id(0);
> +    unsigned int tmp = 0;
> +
> +    if (i > size -1)
> +        return;
> +
> +    if (i == 0) {
> +        tmp = ((src[src_offset + i] & dw_mask) << shift);
> +    } else if (src_less && i == size - 1) { // not exceed the bound of source
> +        tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift));
> +    } else {
> +        tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift))
> +             | ((src[src_offset + i] & dw_mask) << shift);
> +    }
> +
> +    if (i == 0) {
> +        dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask));
> +    } else if (i == size - 1) {
> +        dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask));
> +    } else {
> +        dst[i+dst_offset] = tmp;
> +    }
> +}
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list