[Beignet] [PATCH 2/4] change the workitem related api to OVERLOABABLE.
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Mar 9 01:29:50 PDT 2015
On Mon, Mar 09, 2015 at 11:24:26AM +0800, xionghu.luo at intel.com wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
>
> the SPIR header file requirs these functions to be overlable.
> (https://github.com/KhronosGroup/SPIR-Tools/blob/master/headers/opencl_spir.h)
>
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
> backend/src/libocl/include/ocl_async.h | 2 +-
> backend/src/libocl/include/ocl_sync.h | 2 +-
> backend/src/libocl/include/ocl_types.h | 2 --
> backend/src/libocl/include/ocl_workitem.h | 16 ++++++++--------
> backend/src/libocl/src/ocl_async.cl | 2 +-
> backend/src/libocl/src/ocl_barrier.ll | 2 +-
> backend/src/libocl/src/ocl_workitem.cl | 6 +++---
> kernels/compiler_async_copy.cl | 4 ++--
> 8 files changed, 17 insertions(+), 19 deletions(-)
>
> diff --git a/backend/src/libocl/include/ocl_async.h b/backend/src/libocl/include/ocl_async.h
> index dd89942..9d5cc06 100644
> --- a/backend/src/libocl/include/ocl_async.h
> +++ b/backend/src/libocl/include/ocl_async.h
> @@ -45,7 +45,7 @@ DEF(double)
> #undef DEFN
> #undef DEF
>
> -void wait_group_events (int num_events, event_t *event_list);
> +OVERLOADABLE void wait_group_events (int num_events, event_t *event_list);
>
> #define DEFN(TYPE) \
> OVERLOADABLE void prefetch(const global TYPE *p, size_t num);
> diff --git a/backend/src/libocl/include/ocl_sync.h b/backend/src/libocl/include/ocl_sync.h
> index ed7c6e4..18090d5 100644
> --- a/backend/src/libocl/include/ocl_sync.h
> +++ b/backend/src/libocl/include/ocl_sync.h
> @@ -27,7 +27,7 @@
> #define CLK_GLOBAL_MEM_FENCE (1 << 1)
>
> typedef uint cl_mem_fence_flags;
> -void barrier(cl_mem_fence_flags flags);
> +OVERLOADABLE void barrier(cl_mem_fence_flags flags);
> void mem_fence(cl_mem_fence_flags flags);
> void read_mem_fence(cl_mem_fence_flags flags);
> void write_mem_fence(cl_mem_fence_flags flags);
> diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
> index 487fe68..ae0236b 100644
> --- a/backend/src/libocl/include/ocl_types.h
> +++ b/backend/src/libocl/include/ocl_types.h
> @@ -87,7 +87,5 @@ DEF(double);
> // FIXME:
> // This is a transitional hack to bypass the LLVM 3.3 built-in types.
> // See the Khronos SPIR specification for handling of these types.
> -typedef size_t __event_t;
> -#define event_t __event_t;
This change requires to modify the corresponding utest cases. async_work_group_strided_copy.
It's trivial I will do that before push the whole patchset. All all the other parts LGTM.
Thanks,
Zhigang Gong.
>
> #endif /* __OCL_TYPES_H__ */
> diff --git a/backend/src/libocl/include/ocl_workitem.h b/backend/src/libocl/include/ocl_workitem.h
> index 7534ee8..84bb1fb 100644
> --- a/backend/src/libocl/include/ocl_workitem.h
> +++ b/backend/src/libocl/include/ocl_workitem.h
> @@ -20,13 +20,13 @@
>
> #include "ocl_types.h"
>
> -uint get_work_dim(void);
> -uint get_global_size(uint dimindx);
> -uint get_global_id(uint dimindx);
> -uint get_local_size(uint dimindx);
> -uint get_local_id(uint dimindx);
> -uint get_num_groups(uint dimindx);
> -uint get_group_id(uint dimindx);
> -uint get_global_offset(uint dimindx);
> +OVERLOADABLE uint get_work_dim(void);
> +OVERLOADABLE uint get_global_size(uint dimindx);
> +OVERLOADABLE uint get_global_id(uint dimindx);
> +OVERLOADABLE uint get_local_size(uint dimindx);
> +OVERLOADABLE uint get_local_id(uint dimindx);
> +OVERLOADABLE uint get_num_groups(uint dimindx);
> +OVERLOADABLE uint get_group_id(uint dimindx);
> +OVERLOADABLE uint get_global_offset(uint dimindx);
>
> #endif /* __OCL_WORKITEM_H__ */
> diff --git a/backend/src/libocl/src/ocl_async.cl b/backend/src/libocl/src/ocl_async.cl
> index 041aaf2..10d0aa4 100644
> --- a/backend/src/libocl/src/ocl_async.cl
> +++ b/backend/src/libocl/src/ocl_async.cl
> @@ -66,7 +66,7 @@ DEF(double)
> #undef DEFN
> #undef DEF
>
> -void wait_group_events (int num_events, event_t *event_list) {
> +OVERLOADABLE void wait_group_events (int num_events, event_t *event_list) {
> barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
> }
>
> diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll
> index 4e55fcb..dc3579c 100644
> --- a/backend/src/libocl/src/ocl_barrier.ll
> +++ b/backend/src/libocl/src/ocl_barrier.ll
> @@ -10,7 +10,7 @@ declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate
> declare void @__gen_ocl_barrier_global() nounwind alwaysinline noduplicate
> declare void @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate
>
> -define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
> +define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline {
> %1 = icmp eq i32 %flags, 3
> br i1 %1, label %barrier_local_global, label %barrier_local_check
>
> diff --git a/backend/src/libocl/src/ocl_workitem.cl b/backend/src/libocl/src/ocl_workitem.cl
> index f4629f8..6ddc406 100644
> --- a/backend/src/libocl/src/ocl_workitem.cl
> +++ b/backend/src/libocl/src/ocl_workitem.cl
> @@ -18,7 +18,7 @@
> #include "ocl_workitem.h"
>
> PURE CONST uint __gen_ocl_get_work_dim(void);
> -uint get_work_dim(void)
> +OVERLOADABLE uint get_work_dim(void)
> {
> return __gen_ocl_get_work_dim();
> }
> @@ -37,7 +37,7 @@ DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)
> #undef DECL_INTERNAL_WORK_ITEM_FN
>
> #define DECL_PUBLIC_WORK_ITEM_FN(NAME, OTHER_RET) \
> -unsigned NAME(unsigned int dim) { \
> +OVERLOADABLE unsigned NAME(unsigned int dim) { \
> if (dim == 0) return __gen_ocl_##NAME##0(); \
> else if (dim == 1) return __gen_ocl_##NAME##1(); \
> else if (dim == 2) return __gen_ocl_##NAME##2(); \
> @@ -52,6 +52,6 @@ DECL_PUBLIC_WORK_ITEM_FN(get_global_offset, 0)
> DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1)
> #undef DECL_PUBLIC_WORK_ITEM_FN
>
> -uint get_global_id(uint dim) {
> +OVERLOADABLE uint get_global_id(uint dim) {
> return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) + get_global_offset(dim);
> }
> diff --git a/kernels/compiler_async_copy.cl b/kernels/compiler_async_copy.cl
> index dddde44..4beb436 100644
> --- a/kernels/compiler_async_copy.cl
> +++ b/kernels/compiler_async_copy.cl
> @@ -5,10 +5,10 @@ compiler_async_copy_##TYPE(__global TYPE *dst, __global TYPE *src, __local TYPE
> event_t event; \
> int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); \
> int i; \
> - event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 ); \
> + event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 ); \
> wait_group_events( 1, &event ); \
> \
> - event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); \
> + event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, 0 ); \
> wait_group_events( 1, &event ); \
> }
>
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list