[Beignet] [PATCH 09/18] Add the misc functions into lib ocl
junyan.he at inbox.com
junyan.he at inbox.com
Tue Aug 12 00:32:15 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/libocl/Makefile.in | 2 +-
backend/src/libocl/include/ocl_misc.h | 114 +++++++++++++++++++
backend/src/libocl/lib/ocl_misc.cl | 201 ++++++++++++++++++++++++++++++++++
3 files changed, 316 insertions(+), 1 deletion(-)
create mode 100644 backend/src/libocl/include/ocl_misc.h
create mode 100644 backend/src/libocl/lib/ocl_misc.cl
diff --git a/backend/src/libocl/Makefile.in b/backend/src/libocl/Makefile.in
index c89050e..4467f05 100644
--- a/backend/src/libocl/Makefile.in
+++ b/backend/src/libocl/Makefile.in
@@ -7,7 +7,7 @@ GENERATED_FILES=ocl_as.cl ocl_convert.cl ocl_common.cl ocl_relational.cl
GENERATED_HEADERS=ocl_defines.h ocl_as.h ocl_convert.h ocl_common.h ocl_relational.h
GENERATED_CL_SRCS=$(addprefix lib/, $(GENERATED_FILES))
GENERATED_CL_HEADERS=$(addprefix include/, $(GENERATED_HEADERS))
-CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl ocl_async.cl ocl_sync.cl $(GENERATED_FILES)
+CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl ocl_async.cl ocl_sync.cl ocl_misc.cl $(GENERATED_FILES)
LL_FILE_NAMES=
CL_SRCS=$(addprefix lib/, $(CL_FILE_NAMES))
LL_SRCS=$(addprefix lib/, $(LL_FILE_NAMES))
diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h
new file mode 100644
index 0000000..46612fb
--- /dev/null
+++ b/backend/src/libocl/include/ocl_misc.h
@@ -0,0 +1,114 @@
+#ifndef __OCL_MISC_H__
+#define __OCL_MISC_H__
+
+#include "ocl_types.h"
+
+#define DEC2(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##2 shuffle(XTYPE x, MASKTYPE##2 mask);
+
+#define DEC4(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##4 shuffle(XTYPE x, MASKTYPE##4 mask);
+
+#define DEC8(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##8 shuffle(XTYPE x, MASKTYPE##8 mask);
+
+#define DEC16(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##16 shuffle(XTYPE x, MASKTYPE##16 mask);
+
+#define DEFMASK(TYPE, MASKTYPE) \
+ DEC2(TYPE, TYPE##2, MASKTYPE); DEC2(TYPE, TYPE##4, MASKTYPE); DEC2(TYPE, TYPE##8, MASKTYPE); DEC2(TYPE, TYPE##16, MASKTYPE) \
+ DEC4(TYPE, TYPE##2, MASKTYPE); DEC4(TYPE, TYPE##4, MASKTYPE); DEC4(TYPE, TYPE##8, MASKTYPE); DEC4(TYPE, TYPE##16, MASKTYPE) \
+ DEC8(TYPE, TYPE##2, MASKTYPE); DEC8(TYPE, TYPE##4, MASKTYPE); DEC8(TYPE, TYPE##8, MASKTYPE); DEC8(TYPE, TYPE##16, MASKTYPE) \
+ DEC16(TYPE, TYPE##2, MASKTYPE); DEC16(TYPE, TYPE##4, MASKTYPE); DEC16(TYPE, TYPE##8, MASKTYPE); DEC16(TYPE, TYPE##16, MASKTYPE)
+
+#define DEF(TYPE) \
+ DEFMASK(TYPE, uchar) \
+ DEFMASK(TYPE, ushort) \
+ DEFMASK(TYPE, uint) \
+ DEFMASK(TYPE, ulong)
+
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+DEF(float)
+DEF(long)
+DEF(ulong)
+#undef DEF
+#undef DEFMASK
+#undef DEC2
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
+#define DEC2(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##2 mask);
+
+#define DEC2X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##2 mask);
+
+#define DEC4(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##4 mask);
+
+#define DEC4X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##4 mask);
+
+#define DEC8(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##8 mask);
+
+#define DEC8X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##8 mask);
+
+#define DEC16(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##16 mask);
+
+#define DEC16X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##16 mask);
+
+#define DEFMASK(TYPE, MASKTYPE) \
+ DEC2(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC2(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC2(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC2X(TYPE, MASKTYPE) \
+ DEC4(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC4(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC4(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC4X(TYPE, MASKTYPE) \
+ DEC8(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC8(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC8(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC8X(TYPE, MASKTYPE) \
+ DEC16(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC16(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC16(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC16X(TYPE, MASKTYPE)
+
+#define DEF(TYPE) \
+ DEFMASK(TYPE, uchar) \
+ DEFMASK(TYPE, ushort) \
+ DEFMASK(TYPE, uint) \
+ DEFMASK(TYPE, ulong)
+
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+DEF(float)
+DEF(long)
+DEF(ulong)
+#undef DEF
+#undef DEFMASK
+#undef DEC2
+#undef DEC2X
+#undef DEC4
+#undef DEC4X
+#undef DEC8
+#undef DEC8X
+#undef DEC16
+#undef DEC16X
+
+#endif
diff --git a/backend/src/libocl/lib/ocl_misc.cl b/backend/src/libocl/lib/ocl_misc.cl
new file mode 100644
index 0000000..fbcc94a
--- /dev/null
+++ b/backend/src/libocl/lib/ocl_misc.cl
@@ -0,0 +1,201 @@
+#include "ocl_misc.h"
+
+#define DEC2(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##2 shuffle(XTYPE x, MASKTYPE##2 mask) { \
+ TYPE##2 y; \
+ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+ return y; \
+ }
+
+#define DEC4(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##4 shuffle(XTYPE x, MASKTYPE##4 mask) { \
+ TYPE##4 y; \
+ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+ y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \
+ y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \
+ return y; \
+ }
+
+#define DEC8(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##8 shuffle(XTYPE x, MASKTYPE##8 mask) { \
+ TYPE##8 y; \
+ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+ y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \
+ y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \
+ y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \
+ y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \
+ y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \
+ y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \
+ return y; \
+ }
+
+#define DEC16(TYPE, XTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##16 shuffle(XTYPE x, MASKTYPE##16 mask) { \
+ TYPE##16 y; \
+ y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
+ y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
+ y.s2 = ((TYPE *) &x)[mask.s2 & (vec_step(x) - 1)]; \
+ y.s3 = ((TYPE *) &x)[mask.s3 & (vec_step(x) - 1)]; \
+ y.s4 = ((TYPE *) &x)[mask.s4 & (vec_step(x) - 1)]; \
+ y.s5 = ((TYPE *) &x)[mask.s5 & (vec_step(x) - 1)]; \
+ y.s6 = ((TYPE *) &x)[mask.s6 & (vec_step(x) - 1)]; \
+ y.s7 = ((TYPE *) &x)[mask.s7 & (vec_step(x) - 1)]; \
+ y.s8 = ((TYPE *) &x)[mask.s8 & (vec_step(x) - 1)]; \
+ y.s9 = ((TYPE *) &x)[mask.s9 & (vec_step(x) - 1)]; \
+ y.sa = ((TYPE *) &x)[mask.sa & (vec_step(x) - 1)]; \
+ y.sb = ((TYPE *) &x)[mask.sb & (vec_step(x) - 1)]; \
+ y.sc = ((TYPE *) &x)[mask.sc & (vec_step(x) - 1)]; \
+ y.sd = ((TYPE *) &x)[mask.sd & (vec_step(x) - 1)]; \
+ y.se = ((TYPE *) &x)[mask.se & (vec_step(x) - 1)]; \
+ y.sf = ((TYPE *) &x)[mask.sf & (vec_step(x) - 1)]; \
+ return y; \
+ }
+
+#define DEFMASK(TYPE, MASKTYPE) \
+ DEC2(TYPE, TYPE##2, MASKTYPE); DEC2(TYPE, TYPE##4, MASKTYPE); DEC2(TYPE, TYPE##8, MASKTYPE); DEC2(TYPE, TYPE##16, MASKTYPE) \
+ DEC4(TYPE, TYPE##2, MASKTYPE); DEC4(TYPE, TYPE##4, MASKTYPE); DEC4(TYPE, TYPE##8, MASKTYPE); DEC4(TYPE, TYPE##16, MASKTYPE) \
+ DEC8(TYPE, TYPE##2, MASKTYPE); DEC8(TYPE, TYPE##4, MASKTYPE); DEC8(TYPE, TYPE##8, MASKTYPE); DEC8(TYPE, TYPE##16, MASKTYPE) \
+ DEC16(TYPE, TYPE##2, MASKTYPE); DEC16(TYPE, TYPE##4, MASKTYPE); DEC16(TYPE, TYPE##8, MASKTYPE); DEC16(TYPE, TYPE##16, MASKTYPE)
+
+#define DEF(TYPE) \
+ DEFMASK(TYPE, uchar) \
+ DEFMASK(TYPE, ushort) \
+ DEFMASK(TYPE, uint) \
+ DEFMASK(TYPE, ulong)
+
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+DEF(float)
+DEF(long)
+DEF(ulong)
+#undef DEF
+#undef DEFMASK
+#undef DEC2
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
+#define DEC2(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##2 mask) { \
+ return shuffle((TEMPTYPE)(x, y), mask); \
+ }
+
+#define DEC2X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##2 mask) { \
+ TYPE##2 z; \
+ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
+ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
+ return z; \
+ }
+
+#define DEC4(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##4 mask) { \
+ return shuffle((TEMPTYPE)(x, y), mask); \
+ }
+
+#define DEC4X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##4 mask) { \
+ TYPE##4 z; \
+ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
+ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
+ z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \
+ z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \
+ return z; \
+ }
+
+#define DEC8(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##8 mask) { \
+ return shuffle((TEMPTYPE)(x, y), mask); \
+ }
+
+#define DEC8X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##8 mask) { \
+ TYPE##8 z; \
+ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
+ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
+ z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \
+ z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \
+ z.s4 = mask.s4 < 16 ? ((TYPE *)&x)[mask.s4] : ((TYPE *)&y)[mask.s4 & 15]; \
+ z.s5 = mask.s5 < 16 ? ((TYPE *)&x)[mask.s5] : ((TYPE *)&y)[mask.s5 & 15]; \
+ z.s6 = mask.s6 < 16 ? ((TYPE *)&x)[mask.s6] : ((TYPE *)&y)[mask.s6 & 15]; \
+ z.s7 = mask.s7 < 16 ? ((TYPE *)&x)[mask.s7] : ((TYPE *)&y)[mask.s7 & 15]; \
+ return z; \
+ }
+
+#define DEC16(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##16 mask) { \
+ return shuffle((TEMPTYPE)(x, y), mask); \
+ }
+
+#define DEC16X(TYPE, MASKTYPE) \
+ OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##16 mask) { \
+ TYPE##16 z; \
+ z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
+ z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
+ z.s2 = mask.s2 < 16 ? ((TYPE *)&x)[mask.s2] : ((TYPE *)&y)[mask.s2 & 15]; \
+ z.s3 = mask.s3 < 16 ? ((TYPE *)&x)[mask.s3] : ((TYPE *)&y)[mask.s3 & 15]; \
+ z.s4 = mask.s4 < 16 ? ((TYPE *)&x)[mask.s4] : ((TYPE *)&y)[mask.s4 & 15]; \
+ z.s5 = mask.s5 < 16 ? ((TYPE *)&x)[mask.s5] : ((TYPE *)&y)[mask.s5 & 15]; \
+ z.s6 = mask.s6 < 16 ? ((TYPE *)&x)[mask.s6] : ((TYPE *)&y)[mask.s6 & 15]; \
+ z.s7 = mask.s7 < 16 ? ((TYPE *)&x)[mask.s7] : ((TYPE *)&y)[mask.s7 & 15]; \
+ z.s8 = mask.s8 < 16 ? ((TYPE *)&x)[mask.s8] : ((TYPE *)&y)[mask.s8 & 15]; \
+ z.s9 = mask.s9 < 16 ? ((TYPE *)&x)[mask.s9] : ((TYPE *)&y)[mask.s9 & 15]; \
+ z.sa = mask.sa < 16 ? ((TYPE *)&x)[mask.sa] : ((TYPE *)&y)[mask.sa & 15]; \
+ z.sb = mask.sb < 16 ? ((TYPE *)&x)[mask.sb] : ((TYPE *)&y)[mask.sb & 15]; \
+ z.sc = mask.sc < 16 ? ((TYPE *)&x)[mask.sc] : ((TYPE *)&y)[mask.sc & 15]; \
+ z.sd = mask.sd < 16 ? ((TYPE *)&x)[mask.sd] : ((TYPE *)&y)[mask.sd & 15]; \
+ z.se = mask.se < 16 ? ((TYPE *)&x)[mask.se] : ((TYPE *)&y)[mask.se & 15]; \
+ z.sf = mask.sf < 16 ? ((TYPE *)&x)[mask.sf] : ((TYPE *)&y)[mask.sf & 15]; \
+ return z; \
+ }
+
+#define DEFMASK(TYPE, MASKTYPE) \
+ DEC2(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC2(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC2(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC2X(TYPE, MASKTYPE) \
+ DEC4(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC4(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC4(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC4X(TYPE, MASKTYPE) \
+ DEC8(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC8(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC8(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC8X(TYPE, MASKTYPE) \
+ DEC16(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
+ DEC16(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
+ DEC16(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
+ DEC16X(TYPE, MASKTYPE)
+
+#define DEF(TYPE) \
+ DEFMASK(TYPE, uchar) \
+ DEFMASK(TYPE, ushort) \
+ DEFMASK(TYPE, uint) \
+ DEFMASK(TYPE, ulong)
+
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+DEF(float)
+DEF(long)
+DEF(ulong)
+#undef DEF
+#undef DEFMASK
+#undef DEC2
+#undef DEC2X
+#undef DEC4
+#undef DEC4X
+#undef DEC8
+#undef DEC8X
+#undef DEC16
+#undef DEC16X
--
1.8.3.2
More information about the Beignet
mailing list