[Beignet] [PATCH v9 1/4] add builtin function "shuffle"

Homer Hsing homer.xing at intel.com
Tue Jul 23 22:11:01 PDT 2013


Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/ocl_stdlib.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index bcbb41c..646b653 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -5455,6 +5455,91 @@ INLINE_OVERLOADABLE float4 cross(float4 v0, float4 v1) {
 }
 
 /////////////////////////////////////////////////////////////////////////////
+// Miscellaneous Vector Functions (see 6.11.12 of OCL 1.1 spec)
+/////////////////////////////////////////////////////////////////////////////
+#define DEC2(TYPE, XTYPE) \
+  INLINE_OVERLOADABLE TYPE##2 shuffle(XTYPE x, uint2 mask) { \
+    TYPE##2 y; \
+    y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+    y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+    return y; \
+  }
+
+#define DEC3(TYPE, XTYPE) \
+  INLINE_OVERLOADABLE TYPE##3 shuffle(XTYPE x, uint3 mask) { \
+    TYPE##3 y; \
+    y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+    y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+    y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \
+    return y; \
+  }
+
+#define DEC4(TYPE, XTYPE) \
+  INLINE_OVERLOADABLE TYPE##4 shuffle(XTYPE x, uint4 mask) { \
+    TYPE##4 y; \
+    y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+    y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+    y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \
+    y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \
+    return y; \
+  }
+
+#define DEC8(TYPE, XTYPE) \
+  INLINE_OVERLOADABLE TYPE##8 shuffle(XTYPE x, uint8 mask) { \
+    TYPE##8 y; \
+    y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+    y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+    y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \
+    y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \
+    y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \
+    y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \
+    y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \
+    y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \
+    return y; \
+  }
+
+#define DEC16(TYPE, XTYPE) \
+  INLINE_OVERLOADABLE TYPE##16 shuffle(XTYPE x, uint16 mask) { \
+    TYPE##16 y; \
+    y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+    y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+    y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \
+    y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \
+    y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \
+    y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \
+    y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \
+    y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \
+    y.s8 = ((TYPE *) &x)[mask.s8 & (vec_step(x) - 1)]; \
+    y.s9 = ((TYPE *) &x)[mask.s9 & (vec_step(x) - 1)]; \
+    y.sa = ((TYPE *) &x)[mask.sa & (vec_step(x) - 1)]; \
+    y.sb = ((TYPE *) &x)[mask.sb & (vec_step(x) - 1)]; \
+    y.sc = ((TYPE *) &x)[mask.sc & (vec_step(x) - 1)]; \
+    y.sd = ((TYPE *) &x)[mask.sd & (vec_step(x) - 1)]; \
+    y.se = ((TYPE *) &x)[mask.se & (vec_step(x) - 1)]; \
+    y.sf = ((TYPE *) &x)[mask.sf & (vec_step(x) - 1)]; \
+    return y; \
+  }
+
+#define DEF(TYPE) \
+  DEC2(TYPE, TYPE##2); DEC2(TYPE, TYPE##3); DEC2(TYPE, TYPE##4); DEC2(TYPE, TYPE##8); DEC2(TYPE, TYPE##16) \
+  DEC3(TYPE, TYPE##2); DEC3(TYPE, TYPE##3); DEC3(TYPE, TYPE##4); DEC3(TYPE, TYPE##8); DEC3(TYPE, TYPE##16) \
+  DEC4(TYPE, TYPE##2); DEC4(TYPE, TYPE##3); DEC4(TYPE, TYPE##4); DEC4(TYPE, TYPE##8); DEC4(TYPE, TYPE##16) \
+  DEC8(TYPE, TYPE##2); DEC8(TYPE, TYPE##3); DEC8(TYPE, TYPE##4); DEC8(TYPE, TYPE##8); DEC8(TYPE, TYPE##16) \
+  DEC16(TYPE, TYPE##2); DEC16(TYPE, TYPE##3); DEC16(TYPE, TYPE##4); DEC16(TYPE, TYPE##8); DEC16(TYPE, TYPE##16)
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+DEF(float)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+/////////////////////////////////////////////////////////////////////////////
 // Vector loads and stores
 /////////////////////////////////////////////////////////////////////////////
 
-- 
1.8.1.2



More information about the Beignet mailing list