[Beignet] [PATCH 05/14] Libocl: Add sub group broadcast short builtin function

Xiuli Pan xiuli.pan at intel.com
Wed Oct 12 08:56:35 UTC 2016


From: Pan Xiuli <xiuli.pan at intel.com>

Add sub group broadcast and intel sub group broadcast for short type.

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 9 +++++++++
 backend/src/libocl/tmpl/ocl_simd.tmpl.h  | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index 245ce8a..d1bcfa3 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -50,8 +50,17 @@ BROADCAST_IMPL(ulong)
 BROADCAST_IMPL(half)
 BROADCAST_IMPL(float)
 BROADCAST_IMPL(double)
+BROADCAST_IMPL(short)
+BROADCAST_IMPL(ushort)
 #undef BROADCAST_IMPL
 
+OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id) {
+  return __gen_ocl_sub_group_broadcast(a, local_id);
+}
+
+OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id) {
+  return __gen_ocl_sub_group_broadcast(a, local_id);
+}
 
 #define RANGE_OP(RANGE, OP, GEN_TYPE, SIGN) \
     OVERLOADABLE GEN_TYPE __gen_ocl_sub_group_##RANGE##_##OP(bool sign, GEN_TYPE x); \
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index e8dc6f4..c609c2e 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -42,7 +42,11 @@ OVERLOADABLE ulong sub_group_broadcast(ulong a, uint local_id);
 OVERLOADABLE half sub_group_broadcast(half a, uint local_id);
 OVERLOADABLE float sub_group_broadcast(float a, uint local_id);
 OVERLOADABLE double sub_group_broadcast(double a, uint local_id);
+OVERLOADABLE short sub_group_broadcast(short a,uint local_id);
+OVERLOADABLE ushort sub_group_broadcast(ushort a, uint local_id);
 
+OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id);
+OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id);
 /* reduce add */
 OVERLOADABLE int sub_group_reduce_add(int x);
 OVERLOADABLE uint sub_group_reduce_add(uint x);
-- 
2.7.4



More information about the Beignet mailing list