[Beignet] [PATCH 1/5 V2] libocl: Add the module for work_group functions.

Yang, Rong R rong.r.yang at intel.com
Tue Nov 24 21:02:29 PST 2015


The patchset LGTM, pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Wednesday, November 18, 2015 14:07
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH 1/5 V2] libocl: Add the module for work_group
> functions.
> 
> From: Junyan He <junyan.he at linux.intel.com>
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  backend/src/libocl/CMakeLists.txt           |    2 +-
>  backend/src/libocl/include/ocl.h            |    1 +
>  backend/src/libocl/include/ocl_work_group.h |  118
> +++++++++++++++++++++++++
>  backend/src/libocl/src/ocl_work_group.cl    |  126
> +++++++++++++++++++++++++++
>  4 files changed, 246 insertions(+), 1 deletion(-)  create mode 100644
> backend/src/libocl/include/ocl_work_group.h
>  create mode 100644 backend/src/libocl/src/ocl_work_group.cl
> 
> diff --git a/backend/src/libocl/CMakeLists.txt
> b/backend/src/libocl/CMakeLists.txt
> index 0fffd9b..1d1ec68 100644
> --- a/backend/src/libocl/CMakeLists.txt
> +++ b/backend/src/libocl/CMakeLists.txt
> @@ -53,7 +53,7 @@ FOREACH(M ${OCL_COPY_HEADERS})
>  ENDFOREACH(M)
> 
>  SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync
> ocl_memcpy
> -                      ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image)
> +                      ocl_memset ocl_misc ocl_vload ocl_geometric
> + ocl_image ocl_work_group)
>  FOREACH(M ${OCL_COPY_MODULES})
>      COPY_THE_HEADER(${M})
>      COPY_THE_SOURCE(${M})
> diff --git a/backend/src/libocl/include/ocl.h
> b/backend/src/libocl/include/ocl.h
> index 3a1f4bf..abb2bd4 100644
> --- a/backend/src/libocl/include/ocl.h
> +++ b/backend/src/libocl/include/ocl.h
> @@ -39,6 +39,7 @@
>  #include "ocl_vload.h"
>  #include "ocl_workitem.h"
>  #include "ocl_simd.h"
> +#include "ocl_work_group.h"
>  #pragma OPENCL EXTENSION cl_khr_fp64 : disable  #pragma OPENCL
> EXTENSION cl_khr_fp16 : disable  #endif diff --git
> a/backend/src/libocl/include/ocl_work_group.h
> b/backend/src/libocl/include/ocl_work_group.h
> new file mode 100644
> index 0000000..ebd264f
> --- /dev/null
> +++ b/backend/src/libocl/include/ocl_work_group.h
> @@ -0,0 +1,118 @@
> +/*
> + * Copyright © 2012 - 2014 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +#ifndef __OCL_WORK_GROUP_H__
> +#define __OCL_WORK_GROUP_H__
> +#include "ocl_types.h"
> +
> +int work_group_all(int predicate);
> +int work_group_any(int predicate);
> +
> +/* broadcast */
> +OVERLOADABLE int work_group_broadcast(int a, size_t local_id);
> +OVERLOADABLE uint work_group_broadcast(uint a, size_t local_id);
> +OVERLOADABLE long work_group_broadcast(long a, size_t local_id);
> +OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id);
> +OVERLOADABLE float work_group_broadcast(float a, size_t local_id);
> +OVERLOADABLE double work_group_broadcast(double a, size_t local_id);
> +
> +OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t
> +local_id_y); OVERLOADABLE uint work_group_broadcast(uint a, size_t
> +local_id_x, size_t local_id_y); OVERLOADABLE long
> +work_group_broadcast(long a, size_t local_id_x, size_t local_id_y);
> +OVERLOADABLE ulong work_group_broadcast(ulong a, size_t local_id_x,
> +size_t local_id_y); OVERLOADABLE float work_group_broadcast(float a,
> +size_t local_id_x, size_t local_id_y); OVERLOADABLE double
> +work_group_broadcast(double a, size_t local_id_x, size_t local_id_y);
> +
> +OVERLOADABLE int work_group_broadcast(int a, size_t local_id_x, size_t
> +local_id_y, size_t local_id_z); OVERLOADABLE uint
> +work_group_broadcast(uint a, size_t local_id_x, size_t local_id_y,
> +size_t local_id_z); OVERLOADABLE long work_group_broadcast(long a,
> +size_t local_id_x, size_t local_id_y, size_t local_id_z); OVERLOADABLE
> +ulong work_group_broadcast(ulong a, size_t local_id_x, size_t
> +local_id_y, size_t local_id_z); OVERLOADABLE float
> +work_group_broadcast(float a, size_t local_id_x, size_t local_id_y,
> +size_t local_id_z); OVERLOADABLE double work_group_broadcast(double a,
> +size_t local_id_x, size_t local_id_y, size_t local_id_z);
> +
> +/* reduce add */
> +OVERLOADABLE int work_group_reduce_add(int x); OVERLOADABLE uint
> +work_group_reduce_add(uint x); OVERLOADABLE long
> +work_group_reduce_add(long x); OVERLOADABLE ulong
> +work_group_reduce_add(ulong x); OVERLOADABLE float
> +work_group_reduce_add(float x); OVERLOADABLE double
> +work_group_reduce_add(double x);
> +
> +/* reduce min */
> +OVERLOADABLE int work_group_reduce_min(int x); OVERLOADABLE uint
> +work_group_reduce_min(uint x); OVERLOADABLE long
> +work_group_reduce_min(long x); OVERLOADABLE ulong
> +work_group_reduce_min(ulong x); OVERLOADABLE float
> +work_group_reduce_min(float x); OVERLOADABLE double
> +work_group_reduce_min(double x);
> +
> +/* reduce max */
> +OVERLOADABLE int work_group_reduce_max(int x); OVERLOADABLE uint
> +work_group_reduce_max(uint x); OVERLOADABLE long
> +work_group_reduce_max(long x); OVERLOADABLE ulong
> +work_group_reduce_max(ulong x); OVERLOADABLE float
> +work_group_reduce_max(float x); OVERLOADABLE double
> +work_group_reduce_max(double x);
> +
> +/* scan_inclusive add */
> +OVERLOADABLE int work_group_scan_inclusive_add(int x); OVERLOADABLE
> +uint work_group_scan_inclusive_add(uint x); OVERLOADABLE long
> +work_group_scan_inclusive_add(long x); OVERLOADABLE ulong
> +work_group_scan_inclusive_add(ulong x); OVERLOADABLE float
> +work_group_scan_inclusive_add(float x); OVERLOADABLE double
> +work_group_scan_inclusive_add(double x);
> +
> +/* scan_inclusive min */
> +OVERLOADABLE int work_group_scan_inclusive_min(int x); OVERLOADABLE
> +uint work_group_scan_inclusive_min(uint x); OVERLOADABLE long
> +work_group_scan_inclusive_min(long x); OVERLOADABLE ulong
> +work_group_scan_inclusive_min(ulong x); OVERLOADABLE float
> +work_group_scan_inclusive_min(float x); OVERLOADABLE double
> +work_group_scan_inclusive_min(double x);
> +
> +/* scan_inclusive max */
> +OVERLOADABLE int work_group_scan_inclusive_max(int x);
> OVERLOADABLE
> +uint work_group_scan_inclusive_max(uint x); OVERLOADABLE long
> +work_group_scan_inclusive_max(long x); OVERLOADABLE ulong
> +work_group_scan_inclusive_max(ulong x); OVERLOADABLE float
> +work_group_scan_inclusive_max(float x); OVERLOADABLE double
> +work_group_scan_inclusive_max(double x);
> +
> +/* scan_exclusive add */
> +OVERLOADABLE int work_group_scan_exclusive_add(int x);
> OVERLOADABLE
> +uint work_group_scan_exclusive_add(uint x); OVERLOADABLE long
> +work_group_scan_exclusive_add(long x); OVERLOADABLE ulong
> +work_group_scan_exclusive_add(ulong x); OVERLOADABLE float
> +work_group_scan_exclusive_add(float x); OVERLOADABLE double
> +work_group_scan_exclusive_add(double x);
> +
> +/* scan_exclusive min */
> +OVERLOADABLE int work_group_scan_exclusive_min(int x);
> OVERLOADABLE
> +uint work_group_scan_exclusive_min(uint x); OVERLOADABLE long
> +work_group_scan_exclusive_min(long x); OVERLOADABLE ulong
> +work_group_scan_exclusive_min(ulong x); OVERLOADABLE float
> +work_group_scan_exclusive_min(float x); OVERLOADABLE double
> +work_group_scan_exclusive_min(double x);
> +
> +/* scan_exclusive max */
> +OVERLOADABLE int work_group_scan_exclusive_max(int x);
> OVERLOADABLE
> +uint work_group_scan_exclusive_max(uint x); OVERLOADABLE long
> +work_group_scan_exclusive_max(long x); OVERLOADABLE ulong
> +work_group_scan_exclusive_max(ulong x); OVERLOADABLE float
> +work_group_scan_exclusive_max(float x); OVERLOADABLE double
> +work_group_scan_exclusive_max(double x); #endif  /*
> +__OCL_WORK_GROUP_H__ */
> diff --git a/backend/src/libocl/src/ocl_work_group.cl
> b/backend/src/libocl/src/ocl_work_group.cl
> new file mode 100644
> index 0000000..065b223
> --- /dev/null
> +++ b/backend/src/libocl/src/ocl_work_group.cl
> @@ -0,0 +1,126 @@
> +/*
> + * Copyright © 2012 - 2014 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +#include "ocl_work_group.h"
> +
> +int __gen_ocl_work_group_all(int predicate); int work_group_all(int
> +predicate) {
> +  return __gen_ocl_work_group_all(predicate);
> +}
> +
> +int __gen_ocl_work_group_any(int predicate); int work_group_any(int
> +predicate) {
> +  return __gen_ocl_work_group_any(predicate);
> +}
> +
> +/* broadcast */
> +#define BROADCAST_IMPL(GEN_TYPE) \
> +    OVERLOADABLE GEN_TYPE
> __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id); \
> +    OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t
> local_id) { \
> +      return __gen_ocl_work_group_broadcast(a, local_id); \
> +    } \
> +    OVERLOADABLE GEN_TYPE
> __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t
> local_id_y); \
> +    OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t
> local_id_x, size_t local_id_y) { \
> +      return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y);  \
> +    } \
> +    OVERLOADABLE GEN_TYPE
> __gen_ocl_work_group_broadcast(GEN_TYPE a, size_t local_id_x, size_t
> local_id_y, size_t local_id_z); \
> +    OVERLOADABLE GEN_TYPE work_group_broadcast(GEN_TYPE a, size_t
> local_id_x, size_t local_id_y, size_t local_id_z) { \
> +      return __gen_ocl_work_group_broadcast(a, local_id_x, local_id_y,
> local_id_z); \
> +    }
> +
> +BROADCAST_IMPL(int)
> +BROADCAST_IMPL(uint)
> +BROADCAST_IMPL(long)
> +BROADCAST_IMPL(ulong)
> +BROADCAST_IMPL(float)
> +BROADCAST_IMPL(double)
> +#undef BROADCAST_IMPL
> +
> +
> +#define RANGE_OP(RANGE, OP, GEN_TYPE) \
> +    OVERLOADABLE GEN_TYPE
> __gen_ocl_work_group_##RANGE##_##OP(GEN_TYPE x); \
> +    OVERLOADABLE GEN_TYPE work_group_##RANGE##_##OP(GEN_TYPE x)
> { \
> +      return __gen_ocl_work_group_##RANGE##_##OP(x);  \
> +    }
> +
> +/* reduce add */
> +RANGE_OP(reduce, add, int)
> +RANGE_OP(reduce, add, uint)
> +RANGE_OP(reduce, add, long)
> +RANGE_OP(reduce, add, ulong)
> +RANGE_OP(reduce, add, float)
> +RANGE_OP(reduce, add, double)
> +/* reduce min */
> +RANGE_OP(reduce, min, int)
> +RANGE_OP(reduce, min, uint)
> +RANGE_OP(reduce, min, long)
> +RANGE_OP(reduce, min, ulong)
> +RANGE_OP(reduce, min, float)
> +RANGE_OP(reduce, min, double)
> +/* reduce max */
> +RANGE_OP(reduce, max, int)
> +RANGE_OP(reduce, max, uint)
> +RANGE_OP(reduce, max, long)
> +RANGE_OP(reduce, max, ulong)
> +RANGE_OP(reduce, max, float)
> +RANGE_OP(reduce, max, double)
> +
> +/* scan_inclusive add */
> +RANGE_OP(scan_inclusive, add, int)
> +RANGE_OP(scan_inclusive, add, uint)
> +RANGE_OP(scan_inclusive, add, long)
> +RANGE_OP(scan_inclusive, add, ulong)
> +RANGE_OP(scan_inclusive, add, float)
> +RANGE_OP(scan_inclusive, add, double)
> +/* scan_inclusive min */
> +RANGE_OP(scan_inclusive, min, int)
> +RANGE_OP(scan_inclusive, min, uint)
> +RANGE_OP(scan_inclusive, min, long)
> +RANGE_OP(scan_inclusive, min, ulong)
> +RANGE_OP(scan_inclusive, min, float)
> +RANGE_OP(scan_inclusive, min, double)
> +/* scan_inclusive max */
> +RANGE_OP(scan_inclusive, max, int)
> +RANGE_OP(scan_inclusive, max, uint)
> +RANGE_OP(scan_inclusive, max, long)
> +RANGE_OP(scan_inclusive, max, ulong)
> +RANGE_OP(scan_inclusive, max, float)
> +RANGE_OP(scan_inclusive, max, double)
> +
> +/* scan_exclusive add */
> +RANGE_OP(scan_exclusive, add, int)
> +RANGE_OP(scan_exclusive, add, uint)
> +RANGE_OP(scan_exclusive, add, long)
> +RANGE_OP(scan_exclusive, add, ulong)
> +RANGE_OP(scan_exclusive, add, float)
> +RANGE_OP(scan_exclusive, add, double)
> +/* scan_exclusive min */
> +RANGE_OP(scan_exclusive, min, int)
> +RANGE_OP(scan_exclusive, min, uint)
> +RANGE_OP(scan_exclusive, min, long)
> +RANGE_OP(scan_exclusive, min, ulong)
> +RANGE_OP(scan_exclusive, min, float)
> +RANGE_OP(scan_exclusive, min, double)
> +/* scan_exclusive max */
> +RANGE_OP(scan_exclusive, max, int)
> +RANGE_OP(scan_exclusive, max, uint)
> +RANGE_OP(scan_exclusive, max, long)
> +RANGE_OP(scan_exclusive, max, ulong)
> +RANGE_OP(scan_exclusive, max, float)
> +RANGE_OP(scan_exclusive, max, double)
> +
> +#undef RANGE_OP
> --
> 1.7.9.5
> 
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list