[Beignet] [PATCH V2 1/2] Implement async and prefetch built-in.

Zhigang Gong zhigang.gong at linux.intel.com
Fri Aug 16 01:29:54 PDT 2013


LGTM, pushed, thanks.

On Fri, Aug 16, 2013 at 04:24:08PM +0800, Yang Rong wrote:
> Using the normal load & store to implement async copy,
> and so wait_group_events use barrier.
> Prefetch just define an empty function.
> 
> V2: fix llvm build error.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/ocl_stdlib.tmpl.h | 66 +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 66 insertions(+)
> 
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index d1cc6aa..696a6c9 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -1226,6 +1226,72 @@ INLINE void write_mem_fence(cl_mem_fence_flags flags) {
>  }
>  
>  /////////////////////////////////////////////////////////////////////////////
> +// Async Copies and prefetch
> +/////////////////////////////////////////////////////////////////////////////
> +#define BODY(SRC_STRIDE, DST_STRIDE) \
> +  uint size = get_local_size(2) * get_local_size(1) * get_local_size(0); \
> +  uint count = num / size;  \
> +  uint offset = get_local_id(2) * get_local_size(1) + get_local_id(1);  \
> +  offset = offset * get_local_size(0) + get_local_id(0); \
> +  for(uint i=0; i<count; i+=1) { \
> +    *(dst + offset * DST_STRIDE) = *(src + offset * SRC_STRIDE); \
> +    offset += size;                                 \
> +  } \
> +  if(offset < num) \
> +    *(dst + offset * DST_STRIDE) = *(src + offset * SRC_STRIDE); \
> +  return 0;
> +
> +#define DEFN(TYPE) \
> +INLINE_OVERLOADABLE event_t async_work_group_copy (local TYPE *dst,  const global TYPE *src, \
> +										    size_t num, event_t event) { \
> +  BODY(1, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_copy (global TYPE *dst,  const local TYPE *src, \
> +										    size_t num, event_t event) { \
> +  BODY(1, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_strided_copy (local TYPE *dst,  const global TYPE *src, \
> +										            size_t num, size_t src_stride, event_t event) { \
> +  BODY(src_stride, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_strided_copy (global TYPE *dst,  const local TYPE *src, \
> +										            size_t num, size_t dst_stride, event_t event) { \
> +  BODY(1, dst_stride); \
> +}
> +#define DEF(TYPE) \
> +  DEFN(TYPE); DEFN(TYPE##2); DEFN(TYPE##3); DEFN(TYPE##4); DEFN(TYPE##8); DEFN(TYPE##16);
> +DEF(char)
> +DEF(uchar)
> +DEF(short)
> +DEF(ushort)
> +DEF(int)
> +DEF(uint)
> +DEF(float)
> +#undef BODY
> +#undef DEFN
> +#undef DEF
> +
> +INLINE void wait_group_events (int num_events, event_t *event_list) {
> +  barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
> +}
> +
> +#define DEFN(TYPE) \
> +INLINE_OVERLOADABLE void prefetch(const global TYPE *p, size_t num) { }
> +#define DEF(TYPE) \
> +DEFN(TYPE); DEFN(TYPE##2); DEFN(TYPE##3); DEFN(TYPE##4); DEFN(TYPE##8); DEFN(TYPE##16)
> +DEF(char);
> +DEF(uchar);
> +DEF(short);
> +DEF(ushort);
> +DEF(int);
> +DEF(uint);
> +DEF(long);
> +DEF(ulong);
> +DEF(float);
> +#undef DEFN
> +#undef DEF
> +
> +/////////////////////////////////////////////////////////////////////////////
>  // Atomic functions
>  /////////////////////////////////////////////////////////////////////////////
>  OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val);
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list