[Beignet] [PATCH V2 1/2] Implement async and prefetch built-in.
Zhigang Gong
zhigang.gong at linux.intel.com
Fri Aug 16 01:29:54 PDT 2013
LGTM, pushed, thanks.
On Fri, Aug 16, 2013 at 04:24:08PM +0800, Yang Rong wrote:
> Using the normal load & store to implement async copy,
> and so wait_group_events use barrier.
> Prefetch just define an empty function.
>
> V2: fix llvm build error.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> backend/src/ocl_stdlib.tmpl.h | 66 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 66 insertions(+)
>
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index d1cc6aa..696a6c9 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -1226,6 +1226,72 @@ INLINE void write_mem_fence(cl_mem_fence_flags flags) {
> }
>
> /////////////////////////////////////////////////////////////////////////////
> +// Async Copies and prefetch
> +/////////////////////////////////////////////////////////////////////////////
> +#define BODY(SRC_STRIDE, DST_STRIDE) \
> + uint size = get_local_size(2) * get_local_size(1) * get_local_size(0); \
> + uint count = num / size; \
> + uint offset = get_local_id(2) * get_local_size(1) + get_local_id(1); \
> + offset = offset * get_local_size(0) + get_local_id(0); \
> + for(uint i=0; i<count; i+=1) { \
> + *(dst + offset * DST_STRIDE) = *(src + offset * SRC_STRIDE); \
> + offset += size; \
> + } \
> + if(offset < num) \
> + *(dst + offset * DST_STRIDE) = *(src + offset * SRC_STRIDE); \
> + return 0;
> +
> +#define DEFN(TYPE) \
> +INLINE_OVERLOADABLE event_t async_work_group_copy (local TYPE *dst, const global TYPE *src, \
> + size_t num, event_t event) { \
> + BODY(1, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_copy (global TYPE *dst, const local TYPE *src, \
> + size_t num, event_t event) { \
> + BODY(1, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_strided_copy (local TYPE *dst, const global TYPE *src, \
> + size_t num, size_t src_stride, event_t event) { \
> + BODY(src_stride, 1); \
> +} \
> +INLINE_OVERLOADABLE event_t async_work_group_strided_copy (global TYPE *dst, const local TYPE *src, \
> + size_t num, size_t dst_stride, event_t event) { \
> + BODY(1, dst_stride); \
> +}
> +#define DEF(TYPE) \
> + DEFN(TYPE); DEFN(TYPE##2); DEFN(TYPE##3); DEFN(TYPE##4); DEFN(TYPE##8); DEFN(TYPE##16);
> +DEF(char)
> +DEF(uchar)
> +DEF(short)
> +DEF(ushort)
> +DEF(int)
> +DEF(uint)
> +DEF(float)
> +#undef BODY
> +#undef DEFN
> +#undef DEF
> +
> +INLINE void wait_group_events (int num_events, event_t *event_list) {
> + barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
> +}
> +
> +#define DEFN(TYPE) \
> +INLINE_OVERLOADABLE void prefetch(const global TYPE *p, size_t num) { }
> +#define DEF(TYPE) \
> +DEFN(TYPE); DEFN(TYPE##2); DEFN(TYPE##3); DEFN(TYPE##4); DEFN(TYPE##8); DEFN(TYPE##16)
> +DEF(char);
> +DEF(uchar);
> +DEF(short);
> +DEF(ushort);
> +DEF(int);
> +DEF(uint);
> +DEF(long);
> +DEF(ulong);
> +DEF(float);
> +#undef DEFN
> +#undef DEF
> +
> +/////////////////////////////////////////////////////////////////////////////
> // Atomic functions
> /////////////////////////////////////////////////////////////////////////////
> OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val);
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list