[Beignet] [PATCH V2 05/15] Libocl: Add sub group broadcast short builtin function
Xiuli Pan
xiuli.pan at intel.com
Wed Oct 19 06:37:14 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Add sub group broadcast and intel sub group broadcast for short type.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 9 +++++++++
backend/src/libocl/tmpl/ocl_simd.tmpl.h | 4 ++++
2 files changed, 13 insertions(+)
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index 245ce8a..d1bcfa3 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -50,8 +50,17 @@ BROADCAST_IMPL(ulong)
BROADCAST_IMPL(half)
BROADCAST_IMPL(float)
BROADCAST_IMPL(double)
+BROADCAST_IMPL(short)
+BROADCAST_IMPL(ushort)
#undef BROADCAST_IMPL
+OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id) {
+ return __gen_ocl_sub_group_broadcast(a, local_id);
+}
+
+OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id) {
+ return __gen_ocl_sub_group_broadcast(a, local_id);
+}
#define RANGE_OP(RANGE, OP, GEN_TYPE, SIGN) \
OVERLOADABLE GEN_TYPE __gen_ocl_sub_group_##RANGE##_##OP(bool sign, GEN_TYPE x); \
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index e8dc6f4..c609c2e 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -42,7 +42,11 @@ OVERLOADABLE ulong sub_group_broadcast(ulong a, uint local_id);
OVERLOADABLE half sub_group_broadcast(half a, uint local_id);
OVERLOADABLE float sub_group_broadcast(float a, uint local_id);
OVERLOADABLE double sub_group_broadcast(double a, uint local_id);
+OVERLOADABLE short sub_group_broadcast(short a,uint local_id);
+OVERLOADABLE ushort sub_group_broadcast(ushort a, uint local_id);
+OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id);
+OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id);
/* reduce add */
OVERLOADABLE int sub_group_reduce_add(int x);
OVERLOADABLE uint sub_group_reduce_add(uint x);
--
2.7.4
More information about the Beignet
mailing list