[Beignet] [PATCH 2/3] Implement async and prefetch built-in.
Zhigang Gong
zhigang.gong at linux.intel.com
Fri Aug 16 00:56:23 PDT 2013
On Thu, Aug 15, 2013 at 05:10:16PM +0800, Yang Rong wrote:
> Using the normal load & store to implement async copy,
> and so wait_group_events is same as barrier.
> Prefetch just define and function is empty.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> backend/src/ocl_stdlib.tmpl.h | 67 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 67 insertions(+)
>
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index d1cc6aa..9703405 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -37,6 +37,7 @@ typedef __typeof__(sizeof(int)) size_t;
> typedef __typeof__((int *)0-(int *)0) ptrdiff_t;
> typedef signed int intptr_t;
> typedef unsigned int uintptr_t;
> +typedef signed int event_t;
event_t is a keyword since llvm 3.3, so you need to define event_t to another
name before this typedef, otherwise it trigger a compilation failure.
>
> /////////////////////////////////////////////////////////////////////////////
> // OpenCL address space
> @@ -1226,6 +1227,72 @@ INLINE void write_mem_fence(cl_mem_fence_flags flags) {
> }
>
> /////////////////////////////////////////////////////////////////////////////
> +// Async Copies and prefetch
> +/////////////////////////////////////////////////////////////////////////////
> +#define BODY(SRC_STRIDE, DST_STRIDE) \
> + uint size = get_local_size(2) * get_local_size(1) * get_local_size(0); \
> + uint count = num / size; \
> + uint offset = get_local_id(2) * get_local_size(1) + get_local_id(1); \
> + offset = offset * get_local_size(0) + get_local_id(0); \
> + for(uint i=0; i<count; i+=1) { \
> + *(dst + offset * DST_STRIDE) = *(src + offset * SRC_STRIDE); \
> + offset += size; \
> + } \
> + if(offset < num) \
> + *(dst + offset * DST_STRIDE) = *(src + offset * SRC_STRIDE); \
> + return 0;
> +
> +#define DEFN(TYPE) \
> +INLINE_OVERLOADABLE event_t async_work_group_copy (local TYPE *dst, const global TYPE *src, \
> + size_t num, event_t event) { \
> + BODY(1, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_copy (global TYPE *dst, const local TYPE *src, \
> + size_t num, event_t event) { \
> + BODY(1, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_strided_copy (local TYPE *dst, const global TYPE *src, \
> + size_t num, size_t src_stride, event_t event) { \
> + BODY(src_stride, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_strided_copy (global TYPE *dst, const local TYPE *src, \
> + size_t num, size_t dst_stride, event_t event) { \
> + BODY(1, dst_stride); \
> +}
> +#define DEF(TYPE) \
> + DEFN(TYPE); DEFN(TYPE##2); DEFN(TYPE##3); DEFN(TYPE##4); DEFN(TYPE##8); DEFN(TYPE##16);
> +DEF(char)
> +DEF(uchar)
> +DEF(short)
> +DEF(ushort)
> +DEF(int)
> +DEF(uint)
> +DEF(float)
> +#undef BODY
> +#undef DEFN
> +#undef DEF
> +
> +void wait_group_events (int num_events, event_t *event_list) {
This function take a pointer parameter, you need to define it as INLINE function,
otherwise trigger compilation error with LLVM 3.3.
> + barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
> +}
> +
> +#define DEFN(TYPE) \
> +INLINE_OVERLOADABLE void prefetch(const global TYPE *p, size_t num) { }
> +#define DEF(TYPE) \
> +DEFN(TYPE); DEFN(TYPE##2); DEFN(TYPE##3); DEFN(TYPE##4); DEFN(TYPE##8); DEFN(TYPE##16)
> +DEF(char);
> +DEF(uchar);
> +DEF(short);
> +DEF(ushort);
> +DEF(int);
> +DEF(uint);
> +DEF(long);
> +DEF(ulong);
> +DEF(float);
> +#undef DEFN
> +#undef DEF
> +
> +/////////////////////////////////////////////////////////////////////////////
> // Atomic functions
> /////////////////////////////////////////////////////////////////////////////
> OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val);
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list