[Beignet] [PATCH 1/2] add builtin function "shuffle"
Homer Hsing
homer.xing at intel.com
Wed Jul 17 00:03:03 PDT 2013
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/ocl_stdlib.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index bcbb41c..b33fbca 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -5455,6 +5455,91 @@ INLINE_OVERLOADABLE float4 cross(float4 v0, float4 v1) {
}
/////////////////////////////////////////////////////////////////////////////
+// Miscellaneous Vector Functions (see 6.11.12 of OCL 1.1 spec)
+/////////////////////////////////////////////////////////////////////////////
+#define DEC2(TYPE, UTYPE) \
+ INLINE_OVERLOADABLE TYPE##2 shuffle(TYPE##2 x, UTYPE##2 mask) { \
+ TYPE##2 y; \
+ y.s0 = ((TYPE *) &x)[(int)mask.s0 & 1]; \
+ y.s1 = ((TYPE *) &x)[(int)mask.s1 & 1]; \
+ return y; \
+ }
+
+#define DEC3(TYPE, UTYPE) \
+ INLINE_OVERLOADABLE TYPE##3 shuffle(TYPE##3 x, UTYPE##3 mask) { \
+ TYPE##3 y; \
+ y.s0 = ((TYPE *) &x)[(int)mask.s0 & 3]; \
+ y.s1 = ((TYPE *) &x)[(int)mask.s1 & 3]; \
+ y.s2 = ((TYPE *) &x)[(int)mask.s2 & 3]; \
+ return y; \
+ }
+
+#define DEC4(TYPE, UTYPE) \
+ INLINE_OVERLOADABLE TYPE##4 shuffle(TYPE##4 x, UTYPE##4 mask) { \
+ TYPE##4 y; \
+ y.s0 = ((TYPE *) &x)[(int)mask.s0 & 3]; \
+ y.s1 = ((TYPE *) &x)[(int)mask.s1 & 3]; \
+ y.s2 = ((TYPE *) &x)[(int)mask.s2 & 3]; \
+ y.s3 = ((TYPE *) &x)[(int)mask.s3 & 3]; \
+ return y; \
+ }
+
+#define DEC8(TYPE, UTYPE) \
+ INLINE_OVERLOADABLE TYPE##8 shuffle(TYPE##8 x, UTYPE##8 mask) { \
+ TYPE##8 y; \
+ y.s0 = ((TYPE *) &x)[(int)mask.s0 & 7]; \
+ y.s1 = ((TYPE *) &x)[(int)mask.s1 & 7]; \
+ y.s2 = ((TYPE *) &x)[(int)mask.s2 & 7]; \
+ y.s3 = ((TYPE *) &x)[(int)mask.s3 & 7]; \
+ y.s4 = ((TYPE *) &x)[(int)mask.s4 & 7]; \
+ y.s5 = ((TYPE *) &x)[(int)mask.s5 & 7]; \
+ y.s6 = ((TYPE *) &x)[(int)mask.s6 & 7]; \
+ y.s7 = ((TYPE *) &x)[(int)mask.s7 & 7]; \
+ return y; \
+ }
+
+#define DEC16(TYPE, UTYPE) \
+ INLINE_OVERLOADABLE TYPE##16 shuffle(TYPE##16 x, UTYPE##16 mask) { \
+ TYPE##16 y; \
+ y.s0 = ((TYPE *) &x)[(int)mask.s0 & 15]; \
+ y.s1 = ((TYPE *) &x)[(int)mask.s1 & 15]; \
+ y.s2 = ((TYPE *) &x)[(int)mask.s2 & 15]; \
+ y.s3 = ((TYPE *) &x)[(int)mask.s3 & 15]; \
+ y.s4 = ((TYPE *) &x)[(int)mask.s4 & 15]; \
+ y.s5 = ((TYPE *) &x)[(int)mask.s5 & 15]; \
+ y.s6 = ((TYPE *) &x)[(int)mask.s6 & 15]; \
+ y.s7 = ((TYPE *) &x)[(int)mask.s7 & 15]; \
+ y.s8 = ((TYPE *) &x)[(int)mask.s8 & 15]; \
+ y.s9 = ((TYPE *) &x)[(int)mask.s9 & 15]; \
+ y.sa = ((TYPE *) &x)[(int)mask.sa & 15]; \
+ y.sb = ((TYPE *) &x)[(int)mask.sb & 15]; \
+ y.sc = ((TYPE *) &x)[(int)mask.sc & 15]; \
+ y.sd = ((TYPE *) &x)[(int)mask.sd & 15]; \
+ y.se = ((TYPE *) &x)[(int)mask.se & 15]; \
+ y.sf = ((TYPE *) &x)[(int)mask.sf & 15]; \
+ return y; \
+ }
+
+#define DEF(TYPE, UTYPE) \
+ DEC2(TYPE, UTYPE) \
+ DEC3(TYPE, UTYPE) \
+ DEC4(TYPE, UTYPE) \
+ DEC8(TYPE, UTYPE) \
+ DEC16(TYPE, UTYPE)
+DEF(char, uchar)
+DEF(uchar, uchar)
+DEF(short, ushort)
+DEF(ushort, ushort)
+DEF(int, uint)
+DEF(uint, uint)
+DEF(float, float)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+/////////////////////////////////////////////////////////////////////////////
// Vector loads and stores
/////////////////////////////////////////////////////////////////////////////
--
1.8.1.2
More information about the Beignet
mailing list