[Beignet] [PATCH V2 09/14] Backend: Add short sub group builtin functions
Xiuli Pan
xiuli.pan at intel.com
Fri Oct 14 03:34:12 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Add intel sub group short type builtins.
V2: Add gen8 part code.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/backend/gen8_context.cpp | 12 +++++++++
backend/src/backend/gen_context.cpp | 12 +++++++++
backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 44 ++++++++++++++++++++++++++++++++
backend/src/libocl/tmpl/ocl_simd.tmpl.h | 36 ++++++++++++++++++++++++++
4 files changed, 104 insertions(+)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 09b38b2..42736e1 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -1351,6 +1351,10 @@ namespace gbe
p->MOV(dataReg, GenRegister::immint64(0x0));
else if (dataReg.type == GEN_TYPE_UL)
p->MOV(dataReg, GenRegister::immuint64(0x0));
+ else if (dataReg.type == GEN_TYPE_W)
+ p->MOV(dataReg, GenRegister::immw(0x0));
+ else if (dataReg.type == GEN_TYPE_UW)
+ p->MOV(dataReg, GenRegister::immuw(0x0));
else
GBE_ASSERT(0); /* unsupported data-type */
}
@@ -1371,6 +1375,10 @@ namespace gbe
p->MOV(dataReg, GenRegister::immint64(0x7FFFFFFFFFFFFFFFL));
else if (dataReg.type == GEN_TYPE_UL)
p->MOV(dataReg, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFL));
+ else if (dataReg.type == GEN_TYPE_W)
+ p->MOV(dataReg, GenRegister::immw(0x7FFF));
+ else if (dataReg.type == GEN_TYPE_UW)
+ p->MOV(dataReg, GenRegister::immuw(0xFFFF));
else
GBE_ASSERT(0); /* unsupported data-type */
}
@@ -1391,6 +1399,10 @@ namespace gbe
p->MOV(dataReg, GenRegister::immint64(0x8000000000000000L));
else if (dataReg.type == GEN_TYPE_UL)
p->MOV(dataReg, GenRegister::immuint64(0x0));
+ else if (dataReg.type == GEN_TYPE_W)
+ p->MOV(dataReg, GenRegister::immw(0x8000));
+ else if (dataReg.type == GEN_TYPE_UW)
+ p->MOV(dataReg, GenRegister::immuw(0x0));
else
GBE_ASSERT(0); /* unsupported data-type */
}
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e907931..a1ae5ea 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2878,6 +2878,10 @@ namespace gbe
p->MOV(dataReg, GenRegister::immint64(0x0));
else if (dataReg.type == GEN_TYPE_UL)
p->MOV(dataReg, GenRegister::immuint64(0x0));
+ else if (dataReg.type == GEN_TYPE_W)
+ p->MOV(dataReg, GenRegister::immw(0x0));
+ else if (dataReg.type == GEN_TYPE_UW)
+ p->MOV(dataReg, GenRegister::immuw(0x0));
else
GBE_ASSERT(0); /* unsupported data-type */
}
@@ -2896,6 +2900,10 @@ namespace gbe
p->MOV(dataReg, GenRegister::immint64(0x7FFFFFFFFFFFFFFFL));
else if (dataReg.type == GEN_TYPE_UL)
p->MOV(dataReg, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFL));
+ else if (dataReg.type == GEN_TYPE_W)
+ p->MOV(dataReg, GenRegister::immw(0x7FFF));
+ else if (dataReg.type == GEN_TYPE_UW)
+ p->MOV(dataReg, GenRegister::immuw(0xFFFF));
else
GBE_ASSERT(0); /* unsupported data-type */
}
@@ -2914,6 +2922,10 @@ namespace gbe
p->MOV(dataReg, GenRegister::immint64(0x8000000000000000L));
else if (dataReg.type == GEN_TYPE_UL)
p->MOV(dataReg, GenRegister::immuint64(0x0));
+ else if (dataReg.type == GEN_TYPE_W)
+ p->MOV(dataReg, GenRegister::immw(0x8000));
+ else if (dataReg.type == GEN_TYPE_UW)
+ p->MOV(dataReg, GenRegister::immuw(0x0));
else
GBE_ASSERT(0); /* unsupported data-type */
}
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index d1bcfa3..90c7cc2 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -76,6 +76,8 @@ RANGE_OP(reduce, add, ulong, false)
RANGE_OP(reduce, add, half, true)
RANGE_OP(reduce, add, float, true)
RANGE_OP(reduce, add, double, true)
+RANGE_OP(reduce, add, short, true)
+RANGE_OP(reduce, add, ushort, false)
/* reduce min */
RANGE_OP(reduce, min, int, true)
RANGE_OP(reduce, min, uint, false)
@@ -84,6 +86,8 @@ RANGE_OP(reduce, min, ulong, false)
RANGE_OP(reduce, min, half, true)
RANGE_OP(reduce, min, float, true)
RANGE_OP(reduce, min, double, true)
+RANGE_OP(reduce, min, short, true)
+RANGE_OP(reduce, min, ushort, false)
/* reduce max */
RANGE_OP(reduce, max, int, true)
RANGE_OP(reduce, max, uint, false)
@@ -92,6 +96,8 @@ RANGE_OP(reduce, max, ulong, false)
RANGE_OP(reduce, max, half, true)
RANGE_OP(reduce, max, float, true)
RANGE_OP(reduce, max, double, true)
+RANGE_OP(reduce, max, short, true)
+RANGE_OP(reduce, max, ushort, false)
/* scan_inclusive add */
RANGE_OP(scan_inclusive, add, int, true)
@@ -101,6 +107,8 @@ RANGE_OP(scan_inclusive, add, ulong, false)
RANGE_OP(scan_inclusive, add, half, true)
RANGE_OP(scan_inclusive, add, float, true)
RANGE_OP(scan_inclusive, add, double, true)
+RANGE_OP(scan_inclusive, add, short, true)
+RANGE_OP(scan_inclusive, add, ushort, false)
/* scan_inclusive min */
RANGE_OP(scan_inclusive, min, int, true)
RANGE_OP(scan_inclusive, min, uint, false)
@@ -109,6 +117,8 @@ RANGE_OP(scan_inclusive, min, ulong, false)
RANGE_OP(scan_inclusive, min, half, true)
RANGE_OP(scan_inclusive, min, float, true)
RANGE_OP(scan_inclusive, min, double, true)
+RANGE_OP(scan_inclusive, min, short, true)
+RANGE_OP(scan_inclusive, min, ushort, false)
/* scan_inclusive max */
RANGE_OP(scan_inclusive, max, int, true)
RANGE_OP(scan_inclusive, max, uint, false)
@@ -117,6 +127,8 @@ RANGE_OP(scan_inclusive, max, ulong, false)
RANGE_OP(scan_inclusive, max, half, true)
RANGE_OP(scan_inclusive, max, float, true)
RANGE_OP(scan_inclusive, max, double, true)
+RANGE_OP(scan_inclusive, max, short, true)
+RANGE_OP(scan_inclusive, max, ushort, false)
/* scan_exclusive add */
RANGE_OP(scan_exclusive, add, int, true)
@@ -126,6 +138,8 @@ RANGE_OP(scan_exclusive, add, ulong, false)
RANGE_OP(scan_exclusive, add, half, true)
RANGE_OP(scan_exclusive, add, float, true)
RANGE_OP(scan_exclusive, add, double, true)
+RANGE_OP(scan_exclusive, add, short, true)
+RANGE_OP(scan_exclusive, add, ushort, false)
/* scan_exclusive min */
RANGE_OP(scan_exclusive, min, int, true)
RANGE_OP(scan_exclusive, min, uint, false)
@@ -134,6 +148,8 @@ RANGE_OP(scan_exclusive, min, ulong, false)
RANGE_OP(scan_exclusive, min, half, true)
RANGE_OP(scan_exclusive, min, float, true)
RANGE_OP(scan_exclusive, min, double, true)
+RANGE_OP(scan_exclusive, min, short, true)
+RANGE_OP(scan_exclusive, min, ushort, false)
/* scan_exclusive max */
RANGE_OP(scan_exclusive, max, int, true)
RANGE_OP(scan_exclusive, max, uint, false)
@@ -142,8 +158,36 @@ RANGE_OP(scan_exclusive, max, ulong, false)
RANGE_OP(scan_exclusive, max, half, true)
RANGE_OP(scan_exclusive, max, float, true)
RANGE_OP(scan_exclusive, max, double, true)
+RANGE_OP(scan_exclusive, max, short, true)
+RANGE_OP(scan_exclusive, max, ushort, false)
#undef RANGE_OP
+
+#define INTEL_RANGE_OP(RANGE, OP, GEN_TYPE, SIGN) \
+ OVERLOADABLE GEN_TYPE intel_sub_group_##RANGE##_##OP(GEN_TYPE x) { \
+ return __gen_ocl_sub_group_##RANGE##_##OP(SIGN, x); \
+ }
+
+INTEL_RANGE_OP(reduce, add, short, true)
+INTEL_RANGE_OP(reduce, add, ushort, false)
+INTEL_RANGE_OP(reduce, min, short, true)
+INTEL_RANGE_OP(reduce, min, ushort, false)
+INTEL_RANGE_OP(reduce, max, short, true)
+INTEL_RANGE_OP(reduce, max, ushort, false)
+INTEL_RANGE_OP(scan_inclusive, add, short, true)
+INTEL_RANGE_OP(scan_inclusive, add, ushort, false)
+INTEL_RANGE_OP(scan_inclusive, min, short, true)
+INTEL_RANGE_OP(scan_inclusive, min, ushort, false)
+INTEL_RANGE_OP(scan_inclusive, max, short, true)
+INTEL_RANGE_OP(scan_inclusive, max, ushort, false)
+INTEL_RANGE_OP(scan_exclusive, add, short, true)
+INTEL_RANGE_OP(scan_exclusive, add, ushort, false)
+INTEL_RANGE_OP(scan_exclusive, min, short, true)
+INTEL_RANGE_OP(scan_exclusive, min, ushort, false)
+INTEL_RANGE_OP(scan_exclusive, max, short, true)
+INTEL_RANGE_OP(scan_exclusive, max, ushort, false)
+
+#undef INTEL_RANGE_OP
PURE CONST uint __gen_ocl_sub_group_block_read_mem(const global uint* p);
PURE CONST uint2 __gen_ocl_sub_group_block_read_mem2(const global uint* p);
PURE CONST uint4 __gen_ocl_sub_group_block_read_mem4(const global uint* p);
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index c609c2e..d0f06d1 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -55,6 +55,10 @@ OVERLOADABLE ulong sub_group_reduce_add(ulong x);
OVERLOADABLE half sub_group_reduce_add(half x);
OVERLOADABLE float sub_group_reduce_add(float x);
OVERLOADABLE double sub_group_reduce_add(double x);
+OVERLOADABLE short sub_group_reduce_add(short x);
+OVERLOADABLE ushort sub_group_reduce_add(ushort x);
+OVERLOADABLE short intel_sug_group_reduce_add(short x);
+OVERLOADABLE ushort intel_sug_group_reduce_add(ushort x);
/* reduce min */
OVERLOADABLE int sub_group_reduce_min(int x);
@@ -64,6 +68,10 @@ OVERLOADABLE ulong sub_group_reduce_min(ulong x);
OVERLOADABLE half sub_group_reduce_min(half x);
OVERLOADABLE float sub_group_reduce_min(float x);
OVERLOADABLE double sub_group_reduce_min(double x);
+OVERLOADABLE short sub_group_reduce_min(short x);
+OVERLOADABLE ushort sub_group_reduce_min(ushort x);
+OVERLOADABLE short intel_sug_group_reduce_min(short x);
+OVERLOADABLE ushort intel_sug_group_reduce_min(ushort x);
/* reduce max */
OVERLOADABLE int sub_group_reduce_max(int x);
@@ -73,6 +81,10 @@ OVERLOADABLE ulong sub_group_reduce_max(ulong x);
OVERLOADABLE half sub_group_reduce_max(half x);
OVERLOADABLE float sub_group_reduce_max(float x);
OVERLOADABLE double sub_group_reduce_max(double x);
+OVERLOADABLE short sub_group_reduce_max(short x);
+OVERLOADABLE ushort sub_group_reduce_max(ushort x);
+OVERLOADABLE short intel_sug_group_reduce_max(short x);
+OVERLOADABLE ushort intel_sug_group_reduce_max(ushort x);
/* scan_inclusive add */
OVERLOADABLE int sub_group_scan_inclusive_add(int x);
@@ -82,6 +94,10 @@ OVERLOADABLE ulong sub_group_scan_inclusive_add(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_add(half x);
OVERLOADABLE float sub_group_scan_inclusive_add(float x);
OVERLOADABLE double sub_group_scan_inclusive_add(double x);
+OVERLOADABLE short sub_group_scan_inclusive_add(short x);
+OVERLOADABLE ushort sub_group_scan_inclusive_add(ushort x);
+OVERLOADABLE short intel_sug_group_scan_inclusive_add(short x);
+OVERLOADABLE ushort intel_sug_group_scan_inclusive_add(ushort x);
/* scan_inclusive min */
OVERLOADABLE int sub_group_scan_inclusive_min(int x);
@@ -91,6 +107,10 @@ OVERLOADABLE ulong sub_group_scan_inclusive_min(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_min(half x);
OVERLOADABLE float sub_group_scan_inclusive_min(float x);
OVERLOADABLE double sub_group_scan_inclusive_min(double x);
+OVERLOADABLE short sub_group_scan_inclusive_min(short x);
+OVERLOADABLE ushort sub_group_scan_inclusive_min(ushort x);
+OVERLOADABLE short intel_sug_group_scan_inclusive_min(short x);
+OVERLOADABLE ushort intel_sug_group_scan_inclusive_min(ushort x);
/* scan_inclusive max */
OVERLOADABLE int sub_group_scan_inclusive_max(int x);
@@ -100,6 +120,10 @@ OVERLOADABLE ulong sub_group_scan_inclusive_max(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_max(half x);
OVERLOADABLE float sub_group_scan_inclusive_max(float x);
OVERLOADABLE double sub_group_scan_inclusive_max(double x);
+OVERLOADABLE short sub_group_scan_inclusive_max(short x);
+OVERLOADABLE ushort sub_group_scan_inclusive_max(ushort x);
+OVERLOADABLE short intel_sug_group_scan_inclusive_max(short x);
+OVERLOADABLE ushort intel_sug_group_scan_inclusive_max(ushort x);
/* scan_exclusive add */
OVERLOADABLE int sub_group_scan_exclusive_add(int x);
@@ -109,6 +133,10 @@ OVERLOADABLE ulong sub_group_scan_exclusive_add(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_add(half x);
OVERLOADABLE float sub_group_scan_exclusive_add(float x);
OVERLOADABLE double sub_group_scan_exclusive_add(double x);
+OVERLOADABLE short sub_group_scan_exclusive_add(short x);
+OVERLOADABLE ushort sub_group_scan_exclusive_add(ushort x);
+OVERLOADABLE short intel_sub_group_scan_exclusive_add(short x);
+OVERLOADABLE ushort intel_sub_group_scan_exclusive_add(ushort x);
/* scan_exclusive min */
OVERLOADABLE int sub_group_scan_exclusive_min(int x);
@@ -118,6 +146,10 @@ OVERLOADABLE ulong sub_group_scan_exclusive_min(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_min(half x);
OVERLOADABLE float sub_group_scan_exclusive_min(float x);
OVERLOADABLE double sub_group_scan_exclusive_min(double x);
+OVERLOADABLE short sub_group_scan_exclusive_min(short x);
+OVERLOADABLE ushort sub_group_scan_exclusive_min(ushort x);
+OVERLOADABLE short intel_sug_group_scan_exclusive_min(short x);
+OVERLOADABLE ushort intel_sug_group_scan_exclusive_min(ushort x);
/* scan_exclusive max */
OVERLOADABLE int sub_group_scan_exclusive_max(int x);
@@ -127,6 +159,10 @@ OVERLOADABLE ulong sub_group_scan_exclusive_max(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_max(half x);
OVERLOADABLE float sub_group_scan_exclusive_max(float x);
OVERLOADABLE double sub_group_scan_exclusive_max(double x);
+OVERLOADABLE short sub_group_scan_exclusive_max(short x);
+OVERLOADABLE ushort sub_group_scan_exclusive_max(ushort x);
+OVERLOADABLE short intel_sug_group_scan_exclusive_max(short x);
+OVERLOADABLE ushort intel_sug_group_scan_exclusive_max(ushort x);
/* shuffle */
OVERLOADABLE half intel_sub_group_shuffle(half x, uint c);
--
2.7.4
More information about the Beignet
mailing list