[Beignet] [PATCH 2/4] change the workitem related api to OVERLOABABLE.
xionghu.luo at intel.com
xionghu.luo at intel.com
Sun Mar 8 20:24:26 PDT 2015
From: Luo Xionghu <xionghu.luo at intel.com>
the SPIR header file requirs these functions to be overlable.
(https://github.com/KhronosGroup/SPIR-Tools/blob/master/headers/opencl_spir.h)
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
backend/src/libocl/include/ocl_async.h | 2 +-
backend/src/libocl/include/ocl_sync.h | 2 +-
backend/src/libocl/include/ocl_types.h | 2 --
backend/src/libocl/include/ocl_workitem.h | 16 ++++++++--------
backend/src/libocl/src/ocl_async.cl | 2 +-
backend/src/libocl/src/ocl_barrier.ll | 2 +-
backend/src/libocl/src/ocl_workitem.cl | 6 +++---
kernels/compiler_async_copy.cl | 4 ++--
8 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/backend/src/libocl/include/ocl_async.h b/backend/src/libocl/include/ocl_async.h
index dd89942..9d5cc06 100644
--- a/backend/src/libocl/include/ocl_async.h
+++ b/backend/src/libocl/include/ocl_async.h
@@ -45,7 +45,7 @@ DEF(double)
#undef DEFN
#undef DEF
-void wait_group_events (int num_events, event_t *event_list);
+OVERLOADABLE void wait_group_events (int num_events, event_t *event_list);
#define DEFN(TYPE) \
OVERLOADABLE void prefetch(const global TYPE *p, size_t num);
diff --git a/backend/src/libocl/include/ocl_sync.h b/backend/src/libocl/include/ocl_sync.h
index ed7c6e4..18090d5 100644
--- a/backend/src/libocl/include/ocl_sync.h
+++ b/backend/src/libocl/include/ocl_sync.h
@@ -27,7 +27,7 @@
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
typedef uint cl_mem_fence_flags;
-void barrier(cl_mem_fence_flags flags);
+OVERLOADABLE void barrier(cl_mem_fence_flags flags);
void mem_fence(cl_mem_fence_flags flags);
void read_mem_fence(cl_mem_fence_flags flags);
void write_mem_fence(cl_mem_fence_flags flags);
diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
index 487fe68..ae0236b 100644
--- a/backend/src/libocl/include/ocl_types.h
+++ b/backend/src/libocl/include/ocl_types.h
@@ -87,7 +87,5 @@ DEF(double);
// FIXME:
// This is a transitional hack to bypass the LLVM 3.3 built-in types.
// See the Khronos SPIR specification for handling of these types.
-typedef size_t __event_t;
-#define event_t __event_t
#endif /* __OCL_TYPES_H__ */
diff --git a/backend/src/libocl/include/ocl_workitem.h b/backend/src/libocl/include/ocl_workitem.h
index 7534ee8..84bb1fb 100644
--- a/backend/src/libocl/include/ocl_workitem.h
+++ b/backend/src/libocl/include/ocl_workitem.h
@@ -20,13 +20,13 @@
#include "ocl_types.h"
-uint get_work_dim(void);
-uint get_global_size(uint dimindx);
-uint get_global_id(uint dimindx);
-uint get_local_size(uint dimindx);
-uint get_local_id(uint dimindx);
-uint get_num_groups(uint dimindx);
-uint get_group_id(uint dimindx);
-uint get_global_offset(uint dimindx);
+OVERLOADABLE uint get_work_dim(void);
+OVERLOADABLE uint get_global_size(uint dimindx);
+OVERLOADABLE uint get_global_id(uint dimindx);
+OVERLOADABLE uint get_local_size(uint dimindx);
+OVERLOADABLE uint get_local_id(uint dimindx);
+OVERLOADABLE uint get_num_groups(uint dimindx);
+OVERLOADABLE uint get_group_id(uint dimindx);
+OVERLOADABLE uint get_global_offset(uint dimindx);
#endif /* __OCL_WORKITEM_H__ */
diff --git a/backend/src/libocl/src/ocl_async.cl b/backend/src/libocl/src/ocl_async.cl
index 041aaf2..10d0aa4 100644
--- a/backend/src/libocl/src/ocl_async.cl
+++ b/backend/src/libocl/src/ocl_async.cl
@@ -66,7 +66,7 @@ DEF(double)
#undef DEFN
#undef DEF
-void wait_group_events (int num_events, event_t *event_list) {
+OVERLOADABLE void wait_group_events (int num_events, event_t *event_list) {
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
}
diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll
index 4e55fcb..dc3579c 100644
--- a/backend/src/libocl/src/ocl_barrier.ll
+++ b/backend/src/libocl/src/ocl_barrier.ll
@@ -10,7 +10,7 @@ declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate
declare void @__gen_ocl_barrier_global() nounwind alwaysinline noduplicate
declare void @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate
-define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline {
%1 = icmp eq i32 %flags, 3
br i1 %1, label %barrier_local_global, label %barrier_local_check
diff --git a/backend/src/libocl/src/ocl_workitem.cl b/backend/src/libocl/src/ocl_workitem.cl
index f4629f8..6ddc406 100644
--- a/backend/src/libocl/src/ocl_workitem.cl
+++ b/backend/src/libocl/src/ocl_workitem.cl
@@ -18,7 +18,7 @@
#include "ocl_workitem.h"
PURE CONST uint __gen_ocl_get_work_dim(void);
-uint get_work_dim(void)
+OVERLOADABLE uint get_work_dim(void)
{
return __gen_ocl_get_work_dim();
}
@@ -37,7 +37,7 @@ DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)
#undef DECL_INTERNAL_WORK_ITEM_FN
#define DECL_PUBLIC_WORK_ITEM_FN(NAME, OTHER_RET) \
-unsigned NAME(unsigned int dim) { \
+OVERLOADABLE unsigned NAME(unsigned int dim) { \
if (dim == 0) return __gen_ocl_##NAME##0(); \
else if (dim == 1) return __gen_ocl_##NAME##1(); \
else if (dim == 2) return __gen_ocl_##NAME##2(); \
@@ -52,6 +52,6 @@ DECL_PUBLIC_WORK_ITEM_FN(get_global_offset, 0)
DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1)
#undef DECL_PUBLIC_WORK_ITEM_FN
-uint get_global_id(uint dim) {
+OVERLOADABLE uint get_global_id(uint dim) {
return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) + get_global_offset(dim);
}
diff --git a/kernels/compiler_async_copy.cl b/kernels/compiler_async_copy.cl
index dddde44..4beb436 100644
--- a/kernels/compiler_async_copy.cl
+++ b/kernels/compiler_async_copy.cl
@@ -5,10 +5,10 @@ compiler_async_copy_##TYPE(__global TYPE *dst, __global TYPE *src, __local TYPE
event_t event; \
int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); \
int i; \
- event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 ); \
+ event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 ); \
wait_group_events( 1, &event ); \
\
- event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); \
+ event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, 0 ); \
wait_group_events( 1, &event ); \
}
--
1.9.1
More information about the Beignet
mailing list