[Beignet] [PATCH 1/2] Add other unsigned interger types mask type of shuffle and shuffle2.

Yang Rong rong.r.yang at intel.com
Tue Nov 12 01:17:13 PST 2013


Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 backend/src/ocl_stdlib.tmpl.h | 103 ++++++++++++++++++++++++------------------
 1 file changed, 59 insertions(+), 44 deletions(-)

diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 50795ef..df663ea 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1933,16 +1933,16 @@ DECL_UNTYPED_RW_ALL(double)
 /////////////////////////////////////////////////////////////////////////////
 // Miscellaneous Vector Functions (see 6.11.12 of OCL 1.1 spec)
 /////////////////////////////////////////////////////////////////////////////
-#define DEC2(TYPE, XTYPE) \
-  INLINE_OVERLOADABLE TYPE##2 shuffle(XTYPE x, uint2 mask) { \
+#define DEC2(TYPE, XTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##2 shuffle(XTYPE x, MASKTYPE##2 mask) { \
     TYPE##2 y; \
     y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
     y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
     return y; \
   }
 
-#define DEC4(TYPE, XTYPE) \
-  INLINE_OVERLOADABLE TYPE##4 shuffle(XTYPE x, uint4 mask) { \
+#define DEC4(TYPE, XTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##4 shuffle(XTYPE x, MASKTYPE##4 mask) { \
     TYPE##4 y; \
     y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
     y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
@@ -1951,8 +1951,8 @@ DECL_UNTYPED_RW_ALL(double)
     return y; \
   }
 
-#define DEC8(TYPE, XTYPE) \
-  INLINE_OVERLOADABLE TYPE##8 shuffle(XTYPE x, uint8 mask) { \
+#define DEC8(TYPE, XTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##8 shuffle(XTYPE x, MASKTYPE##8 mask) { \
     TYPE##8 y; \
     y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
     y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
@@ -1965,8 +1965,8 @@ DECL_UNTYPED_RW_ALL(double)
     return y; \
   }
 
-#define DEC16(TYPE, XTYPE) \
-  INLINE_OVERLOADABLE TYPE##16 shuffle(XTYPE x, uint16 mask) { \
+#define DEC16(TYPE, XTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##16 shuffle(XTYPE x, MASKTYPE##16 mask) { \
     TYPE##16 y; \
     y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
     y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
@@ -1987,11 +1987,18 @@ DECL_UNTYPED_RW_ALL(double)
     return y; \
   }
 
+#define DEFMASK(TYPE, MASKTYPE) \
+  DEC2(TYPE, TYPE##2, MASKTYPE); DEC2(TYPE, TYPE##4, MASKTYPE); DEC2(TYPE, TYPE##8, MASKTYPE); DEC2(TYPE, TYPE##16, MASKTYPE) \
+  DEC4(TYPE, TYPE##2, MASKTYPE); DEC4(TYPE, TYPE##4, MASKTYPE); DEC4(TYPE, TYPE##8, MASKTYPE); DEC4(TYPE, TYPE##16, MASKTYPE) \
+  DEC8(TYPE, TYPE##2, MASKTYPE); DEC8(TYPE, TYPE##4, MASKTYPE); DEC8(TYPE, TYPE##8, MASKTYPE); DEC8(TYPE, TYPE##16, MASKTYPE) \
+  DEC16(TYPE, TYPE##2, MASKTYPE); DEC16(TYPE, TYPE##4, MASKTYPE); DEC16(TYPE, TYPE##8, MASKTYPE); DEC16(TYPE, TYPE##16, MASKTYPE)
+
 #define DEF(TYPE) \
-  DEC2(TYPE, TYPE##2); DEC2(TYPE, TYPE##4); DEC2(TYPE, TYPE##8); DEC2(TYPE, TYPE##16) \
-  DEC4(TYPE, TYPE##2); DEC4(TYPE, TYPE##4); DEC4(TYPE, TYPE##8); DEC4(TYPE, TYPE##16) \
-  DEC8(TYPE, TYPE##2); DEC8(TYPE, TYPE##4); DEC8(TYPE, TYPE##8); DEC8(TYPE, TYPE##16) \
-  DEC16(TYPE, TYPE##2); DEC16(TYPE, TYPE##4); DEC16(TYPE, TYPE##8); DEC16(TYPE, TYPE##16)
+  DEFMASK(TYPE, uchar) \
+  DEFMASK(TYPE, ushort) \
+  DEFMASK(TYPE, uint) \
+  DEFMASK(TYPE, ulong)
+
 DEF(char)
 DEF(uchar)
 DEF(short)
@@ -2002,31 +2009,32 @@ DEF(float)
 DEF(long)
 DEF(ulong)
 #undef DEF
+#undef DEFMASK
 #undef DEC2
 #undef DEC4
 #undef DEC8
 #undef DEC16
 
-#define DEC2(TYPE, ARGTYPE, TEMPTYPE) \
-  INLINE_OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, uint2 mask) { \
+#define DEC2(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##2 mask) { \
     return shuffle((TEMPTYPE)(x, y), mask); \
   }
 
-#define DEC2X(TYPE) \
-  INLINE_OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, uint2 mask) { \
+#define DEC2X(TYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##2 mask) { \
     TYPE##2 z; \
     z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
     z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
     return z; \
   }
 
-#define DEC4(TYPE, ARGTYPE, TEMPTYPE) \
-  INLINE_OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, uint4 mask) { \
+#define DEC4(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##4 mask) { \
     return shuffle((TEMPTYPE)(x, y), mask); \
   }
 
-#define DEC4X(TYPE) \
-  INLINE_OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, uint4 mask) { \
+#define DEC4X(TYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##4 mask) { \
     TYPE##4 z; \
     z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
     z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
@@ -2035,13 +2043,13 @@ DEF(ulong)
     return z; \
   }
 
-#define DEC8(TYPE, ARGTYPE, TEMPTYPE) \
-  INLINE_OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, uint8 mask) { \
+#define DEC8(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##8 mask) { \
     return shuffle((TEMPTYPE)(x, y), mask); \
   }
 
-#define DEC8X(TYPE) \
-  INLINE_OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, uint8 mask) { \
+#define DEC8X(TYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##8 mask) { \
     TYPE##8 z; \
     z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
     z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
@@ -2054,13 +2062,13 @@ DEF(ulong)
     return z; \
   }
 
-#define DEC16(TYPE, ARGTYPE, TEMPTYPE) \
-  INLINE_OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, uint16 mask) { \
+#define DEC16(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##16 mask) { \
     return shuffle((TEMPTYPE)(x, y), mask); \
   }
 
-#define DEC16X(TYPE) \
-  INLINE_OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, uint16 mask) { \
+#define DEC16X(TYPE, MASKTYPE) \
+  INLINE_OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##16 mask) { \
     TYPE##16 z; \
     z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
     z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
@@ -2081,23 +2089,29 @@ DEF(ulong)
     return z; \
   }
 
+#define DEFMASK(TYPE, MASKTYPE) \
+  DEC2(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+  DEC2(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+  DEC2(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+  DEC2X(TYPE, MASKTYPE) \
+  DEC4(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+  DEC4(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+  DEC4(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+  DEC4X(TYPE, MASKTYPE) \
+  DEC8(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+  DEC8(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+  DEC8(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+  DEC8X(TYPE, MASKTYPE) \
+  DEC16(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+  DEC16(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+  DEC16(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+  DEC16X(TYPE, MASKTYPE)
+
 #define DEF(TYPE) \
-  DEC2(TYPE, TYPE##2, TYPE##4) \
-  DEC2(TYPE, TYPE##4, TYPE##8) \
-  DEC2(TYPE, TYPE##8, TYPE##16) \
-  DEC2X(TYPE) \
-  DEC4(TYPE, TYPE##2, TYPE##4) \
-  DEC4(TYPE, TYPE##4, TYPE##8) \
-  DEC4(TYPE, TYPE##8, TYPE##16) \
-  DEC4X(TYPE) \
-  DEC8(TYPE, TYPE##2, TYPE##4) \
-  DEC8(TYPE, TYPE##4, TYPE##8) \
-  DEC8(TYPE, TYPE##8, TYPE##16) \
-  DEC8X(TYPE) \
-  DEC16(TYPE, TYPE##2, TYPE##4) \
-  DEC16(TYPE, TYPE##4, TYPE##8) \
-  DEC16(TYPE, TYPE##8, TYPE##16) \
-  DEC16X(TYPE)
+  DEFMASK(TYPE, uchar) \
+  DEFMASK(TYPE, ushort) \
+  DEFMASK(TYPE, uint) \
+  DEFMASK(TYPE, ulong)
 
 DEF(char)
 DEF(uchar)
@@ -2109,6 +2123,7 @@ DEF(float)
 DEF(long)
 DEF(ulong)
 #undef DEF
+#undef DEFMASK
 #undef DEC2
 #undef DEC2X
 #undef DEC4
-- 
1.8.1.2



More information about the Beignet mailing list