[Beignet] [PATCH 05/18] Add the atomic functions into libocl.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Aug 12 00:31:45 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/libocl/Makefile.in | 15 +-
backend/src/libocl/include/ocl_atom.h | 84 ++++
backend/src/libocl/lib/ocl_atom.cl | 122 ++++++
backend/src/libocl/script/gen_convert.sh | 653 +++++++++++++++++++++++++++++++
4 files changed, 871 insertions(+), 3 deletions(-)
create mode 100644 backend/src/libocl/include/ocl_atom.h
create mode 100644 backend/src/libocl/lib/ocl_atom.cl
create mode 100755 backend/src/libocl/script/gen_convert.sh
diff --git a/backend/src/libocl/Makefile.in b/backend/src/libocl/Makefile.in
index 0264391..06adc08 100644
--- a/backend/src/libocl/Makefile.in
+++ b/backend/src/libocl/Makefile.in
@@ -3,11 +3,11 @@
HEADER_INSTALL_PREFIX=@OCL_HEADER_DIR@
BITCODE_INSTALL_PREFIX=@OCL_BITCODE_DIR@
-GENERATED_FILES=ocl_as.cl
-GENERATED_HEADERS=ocl_defines.h ocl_as.h
+GENERATED_FILES=ocl_as.cl ocl_convert.cl
+GENERATED_HEADERS=ocl_defines.h ocl_as.h ocl_convert.h
GENERATED_CL_SRCS=$(addprefix lib/, $(GENERATED_FILES))
GENERATED_CL_HEADERS=$(addprefix include/, $(GENERATED_HEADERS))
-CL_FILE_NAMES=ocl_workitem.cl $(GENERATED_FILES)
+CL_FILE_NAMES=ocl_workitem.cl ocl_atom.cl $(GENERATED_FILES)
LL_FILE_NAMES=
CL_SRCS=$(addprefix lib/, $(CL_FILE_NAMES))
LL_SRCS=$(addprefix lib/, $(LL_FILE_NAMES))
@@ -30,6 +30,15 @@ include/ocl_as.h:script/gen_common.inc
@echo "Generate the header: $@"
@script/gen_as.sh -p > $@
+
+lib/ocl_convert.cl:script/gen_common.inc
+ @echo "Generate the source: $@"
+ @script/gen_convert.sh > $@
+
+include/ocl_convert.h:script/gen_common.inc
+ @echo "Generate the header: $@"
+ @script/gen_convert.sh -p > $@
+
include/ocl_defines.h:include/ocl_defines.inh
@echo "Generate the header: $@"
@rm -f $@
diff --git a/backend/src/libocl/include/ocl_atom.h b/backend/src/libocl/include/ocl_atom.h
new file mode 100644
index 0000000..b2cfcbf
--- /dev/null
+++ b/backend/src/libocl/include/ocl_atom.h
@@ -0,0 +1,84 @@
+#ifndef __OCL_ATOM_H__
+#define __OCL_ATOM_H__
+#include "ocl_types.h"
+
+/////////////////////////////////////////////////////////////////////////////
+// Atomic functions
+/////////////////////////////////////////////////////////////////////////////
+
+OVERLOADABLE uint atomic_add(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_add(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_add(volatile __global int *p, int val);
+OVERLOADABLE int atomic_add(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_sub(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_sub(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_sub(volatile __global int *p, int val);
+OVERLOADABLE int atomic_sub(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_and(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_and(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_and(volatile __global int *p, int val);
+OVERLOADABLE int atomic_and(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_or(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_or(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_or(volatile __global int *p, int val);
+OVERLOADABLE int atomic_or(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_xor(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_xor(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_xor(volatile __global int *p, int val);
+OVERLOADABLE int atomic_xor(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_xchg(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_xchg(volatile __local uint *p, uint val);
+OVERLOADABLE int atomic_xchg(volatile __global int *p, int val);
+OVERLOADABLE int atomic_xchg(volatile __local int *p, int val);
+
+OVERLOADABLE int atomic_min(volatile __global int *p, int val);
+OVERLOADABLE int atomic_min(volatile __local int *p, int val);
+
+OVERLOADABLE int atomic_max(volatile __global int *p, int val);
+OVERLOADABLE int atomic_max(volatile __local int *p, int val);
+
+OVERLOADABLE uint atomic_min(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_min(volatile __local uint *p, uint val);
+
+OVERLOADABLE uint atomic_max(volatile __global uint *p, uint val);
+OVERLOADABLE uint atomic_max(volatile __local uint *p, uint val);
+
+OVERLOADABLE float atomic_xchg (volatile __global float *p, float val);
+OVERLOADABLE float atomic_xchg (volatile __local float *p, float val);
+
+OVERLOADABLE uint atomic_inc (volatile __global uint *p);
+OVERLOADABLE uint atomic_inc (volatile __local uint *p);
+OVERLOADABLE int atomic_inc (volatile __global int *p);
+OVERLOADABLE int atomic_inc (volatile __local int *p);
+
+OVERLOADABLE uint atomic_dec (volatile __global uint *p);
+OVERLOADABLE uint atomic_dec (volatile __local uint *p);
+OVERLOADABLE int atomic_dec (volatile __global int *p);
+OVERLOADABLE int atomic_dec (volatile __local int *p);
+
+OVERLOADABLE uint atomic_cmpxchg (volatile __global uint *p, uint cmp, uint val);
+OVERLOADABLE uint atomic_cmpxchg (volatile __local uint *p, uint cmp, uint val);
+OVERLOADABLE int atomic_cmpxchg (volatile __global int *p, int cmp, int val);
+OVERLOADABLE int atomic_cmpxchg (volatile __local int *p, int cmp, int val);
+
+
+// XXX for conformance test
+// The following atom_xxx api is on OpenCL spec 1.0.
+#define atom_sub atomic_sub
+#define atom_and atomic_and
+#define atom_or atomic_or
+#define atom_xor atomic_xor
+#define atom_xchg atomic_xchg
+#define atom_min atomic_min
+#define atom_max atomic_max
+#define atom_inc atomic_inc
+#define atom_dec atomic_dec
+#define atom_cmpxchg atomic_cmpxchg
+
+
+#endif /* __OCL_ATOM_H__ */
diff --git a/backend/src/libocl/lib/ocl_atom.cl b/backend/src/libocl/lib/ocl_atom.cl
new file mode 100644
index 0000000..ad09d9e
--- /dev/null
+++ b/backend/src/libocl/lib/ocl_atom.cl
@@ -0,0 +1,122 @@
+#include "ocl_atom.h"
+#include "ocl_as.h"
+
+OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_add(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_sub(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_and(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_or(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xor(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_xchg(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_inc(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__global uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_dec(__local uint *p);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__global uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__local uint *p, uint cmp, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_imax(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umin(__local uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__global uint *p, uint val);
+OVERLOADABLE uint __gen_ocl_atomic_umax(__local uint *p, uint val);
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE, PREFIX) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE val) { \
+ return (TYPE)__gen_ocl_##PREFIX##NAME((SPACE uint *)p, val); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE, PREFIX) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global, PREFIX) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local, PREFIX)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint, atomic_) \
+ DECL_ATOMIC_OP_TYPE(NAME, int, atomic_)
+
+DECL_ATOMIC_OP(add)
+DECL_ATOMIC_OP(sub)
+DECL_ATOMIC_OP(and)
+DECL_ATOMIC_OP(or)
+DECL_ATOMIC_OP(xor)
+DECL_ATOMIC_OP(xchg)
+DECL_ATOMIC_OP_TYPE(min, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(max, int, atomic_i)
+DECL_ATOMIC_OP_TYPE(min, uint, atomic_u)
+DECL_ATOMIC_OP_TYPE(max, uint, atomic_u)
+
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE, PREFIX) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE val) { \
+ return as_float(__gen_ocl_##PREFIX##NAME((SPACE uint *)p, as_uint(val))); \
+ }
+DECL_ATOMIC_OP_SPACE(xchg, float, __global, atomic_)
+DECL_ATOMIC_OP_SPACE(xchg, float, __local, atomic_)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p) { \
+ return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint) \
+ DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(inc)
+DECL_ATOMIC_OP(dec)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \
+ OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE cmp, TYPE val) { \
+ return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p, (uint)cmp, (uint)val); \
+ }
+
+#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \
+ DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local)
+
+#define DECL_ATOMIC_OP(NAME) \
+ DECL_ATOMIC_OP_TYPE(NAME, uint) \
+ DECL_ATOMIC_OP_TYPE(NAME, int)
+
+DECL_ATOMIC_OP(cmpxchg)
+
+#undef DECL_ATOMIC_OP
+#undef DECL_ATOMIC_OP_TYPE
+#undef DECL_ATOMIC_OP_SPACE
+
+// XXX for conformance test
+// The following atom_xxx api is on OpenCL spec 1.0.
+// But the conformance test suite will test them anyway.
+#define atom_add atomic_add
+#define atom_sub atomic_sub
+#define atom_and atomic_and
+#define atom_or atomic_or
+#define atom_xor atomic_xor
+#define atom_xchg atomic_xchg
+#define atom_min atomic_min
+#define atom_max atomic_max
+#define atom_inc atomic_inc
+#define atom_dec atomic_dec
+#define atom_cmpxchg atomic_cmpxchg
+
+
diff --git a/backend/src/libocl/script/gen_convert.sh b/backend/src/libocl/script/gen_convert.sh
new file mode 100755
index 0000000..a6d1a06
--- /dev/null
+++ b/backend/src/libocl/script/gen_convert.sh
@@ -0,0 +1,653 @@
+#! /bin/sh -e
+
+if [ $1"a" = "-pa" ]; then
+ echo "#ifndef __OCL_CONVERT_H__"
+ echo "#define __OCL_CONVERT_H__"
+ echo "#include \"ocl_types.h\""
+ echo
+else
+ echo "#include \"ocl_convert.h\""
+ echo
+fi
+
+#should be called at parent dir
+. ./script/gen_common.inc
+
+# For all vector lengths and types, generate conversion functions
+for vector_length in $VECTOR_LENGTHS; do
+ if test $vector_length -eq 1; then
+ for ftype in $TYPES; do
+ fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+ for ttype in $TYPES; do
+ tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tbasetype convert_$tbasetype($fbasetype v);"
+ else
+ echo "OVERLOADABLE $tbasetype convert_$tbasetype($fbasetype v) {"
+ echo " return ($tbasetype)v;"
+ echo "}"
+ echo
+ fi
+ done
+ done
+ else
+ for ftype in $TYPES; do
+ fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+ for ttype in $TYPES; do
+ tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
+ if test $fbasetype = $tbasetype; then
+ if test $vector_length -gt 1; then
+ fvectortype=$fbasetype$vector_length
+ tvectortype=$tbasetype$vector_length
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v);"
+ else
+ echo "OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v) { return v; }"
+ fi
+ else
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tbasetype convert_$tbasetype($fbasetype v);"
+ else
+ echo "OVERLOADABLE $tbasetype convert_$tbasetype($fbasetype v) { return v; }"
+ fi
+ fi
+ continue
+ fi
+ fvectortype=$fbasetype$vector_length
+ tvectortype=$tbasetype$vector_length
+ construct="($tbasetype)(v.s0)"
+ if test $vector_length -gt 1; then
+ construct="$construct, ($tbasetype)(v.s1)"
+ fi
+ if test $vector_length -gt 2; then
+ construct="$construct, ($tbasetype)(v.s2)"
+ fi
+ if test $vector_length -gt 3; then
+ construct="$construct, ($tbasetype)(v.s3)"
+ fi
+ if test $vector_length -gt 4; then
+ construct="$construct, ($tbasetype)(v.s4)"
+ construct="$construct, ($tbasetype)(v.s5)"
+ construct="$construct, ($tbasetype)(v.s6)"
+ construct="$construct, ($tbasetype)(v.s7)"
+ fi
+ if test $vector_length -gt 8; then
+ construct="$construct, ($tbasetype)(v.s8)"
+ construct="$construct, ($tbasetype)(v.s9)"
+ construct="$construct, ($tbasetype)(v.sA)"
+ construct="$construct, ($tbasetype)(v.sB)"
+ construct="$construct, ($tbasetype)(v.sC)"
+ construct="$construct, ($tbasetype)(v.sD)"
+ construct="$construct, ($tbasetype)(v.sE)"
+ construct="$construct, ($tbasetype)(v.sF)"
+ fi
+
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v);"
+ else
+ echo "OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v) {"
+ echo " return ($tvectortype)($construct);"
+ echo "}"
+ echo
+ fi
+ done
+ done
+ fi
+done
+
+echo '
+#define DEF(DSTTYPE, SRCTYPE) \
+OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x);
+DEF(char, uchar);
+DEF(char, short);
+DEF(char, ushort);
+DEF(char, int);
+DEF(char, uint);
+DEF(char, float);
+DEF(uchar, char);
+DEF(uchar, short);
+DEF(uchar, ushort);
+DEF(uchar, int);
+DEF(uchar, uint);
+DEF(uchar, float);
+DEF(short, ushort);
+DEF(short, int);
+DEF(short, uint);
+DEF(short, float);
+DEF(ushort, short);
+DEF(ushort, int);
+DEF(ushort, uint);
+DEF(ushort, float);
+DEF(int, uint);
+DEF(int, float);
+DEF(uint, int);
+DEF(uint, float);
+#undef DEF
+'
+
+if [ $1"a" = "-pa" ]; then
+ echo "#define DEF(DSTTYPE, SRCTYPE, MIN, MAX) OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x);"
+else
+ echo '
+#define DEF(DSTTYPE, SRCTYPE, MIN, MAX) \
+OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \
+ return x >= MAX ? (DSTTYPE)MAX : x <= MIN ? (DSTTYPE)MIN : x; \
+}
+'
+fi
+
+echo '
+DEF(char, long, -128, 127);
+DEF(uchar, long, 0, 255);
+DEF(short, long, -32768, 32767);
+DEF(ushort, long, 0, 65535);
+DEF(int, long, -0x7fffffff-1, 0x7fffffff);
+DEF(uint, long, 0, 0xffffffffu);
+DEF(long, float, -9.223372036854776e+18f, 9.223372036854776e+18f);
+DEF(ulong, float, 0, 1.8446744073709552e+19f);
+#undef DEF
+'
+
+if [ $1"a" = "-pa" ]; then
+ echo "#define DEF(DSTTYPE, SRCTYPE, MAX) OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x);"
+else
+ echo '
+#define DEF(DSTTYPE, SRCTYPE, MAX) \
+OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \
+ return x >= MAX ? (DSTTYPE)MAX : x; \
+}
+'
+fi
+
+echo '
+DEF(char, ulong, 127);
+DEF(uchar, ulong, 255);
+DEF(short, ulong, 32767);
+DEF(ushort, ulong, 65535);
+DEF(int, ulong, 0x7fffffff);
+DEF(uint, ulong, 0xffffffffu);
+#undef DEF
+'
+
+if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE long convert_long_sat(ulong x);"
+else
+ echo '
+OVERLOADABLE long convert_long_sat(ulong x) {
+ ulong MAX = 0x7ffffffffffffffful;
+ return x >= MAX ? MAX : x;
+}
+'
+fi
+
+if [ $1"a" = "-pa" ]; then
+ echo "#define DEF(DSTTYPE, SRCTYPE) OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x);"
+else
+ echo '
+#define DEF(DSTTYPE, SRCTYPE) \
+ OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \
+ return x <= 0 ? 0 : x; \
+}
+'
+fi
+
+echo '
+ DEF(ushort, char);
+ DEF(uint, char);
+ DEF(uint, short);
+ DEF(ulong, char);
+ DEF(ulong, short);
+ DEF(ulong, int);
+ DEF(ulong, long);
+ #undef DEF
+'
+
+if [ $1"a" = "-pa" ]; then
+ echo "#define DEF(DSTTYPE, SRCTYPE) OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x);"
+else
+ echo '
+#define DEF(DSTTYPE, SRCTYPE) \
+ OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x) { \
+ return x; \
+}
+'
+fi
+
+echo '
+DEF(char, char);
+DEF(uchar, uchar);
+DEF(short, char);
+DEF(short, uchar);
+DEF(short, short);
+DEF(ushort, uchar);
+DEF(ushort, ushort);
+DEF(int, char);
+DEF(int, uchar);
+DEF(int, short);
+DEF(int, ushort);
+DEF(int, int);
+DEF(uint, uchar);
+DEF(uint, ushort);
+DEF(uint, uint);
+DEF(long, char);
+DEF(long, uchar);
+DEF(long, short);
+DEF(long, ushort);
+DEF(long, int);
+DEF(long, uint);
+DEF(long, long);
+DEF(ulong, uchar);
+DEF(ulong, ushort);
+DEF(ulong, uint);
+DEF(ulong, ulong);
+#undef DEF
+'
+
+# vector convert_DSTTYPE_sat function
+for vector_length in $VECTOR_LENGTHS; do
+ if test $vector_length -eq 1; then continue; fi
+
+ for ftype in $TYPES; do
+ fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+ if test $fbasetype = "double"; then continue; fi
+
+ for ttype in $TYPES; do
+ tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
+ if test $tbasetype = "double" -o $tbasetype = "float"; then continue; fi
+
+ fvectortype=$fbasetype$vector_length
+ tvectortype=$tbasetype$vector_length
+ conv="convert_${tbasetype}_sat"
+
+ construct="$conv(v.s0)"
+ if test $vector_length -gt 1; then
+ construct="$construct, $conv(v.s1)"
+ fi
+ if test $vector_length -gt 2; then
+ construct="$construct, $conv(v.s2)"
+ fi
+ if test $vector_length -gt 3; then
+ construct="$construct, $conv(v.s3)"
+ fi
+ if test $vector_length -gt 4; then
+ construct="$construct, $conv(v.s4)"
+ construct="$construct, $conv(v.s5)"
+ construct="$construct, $conv(v.s6)"
+ construct="$construct, $conv(v.s7)"
+ fi
+ if test $vector_length -gt 8; then
+ construct="$construct, $conv(v.s8)"
+ construct="$construct, $conv(v.s9)"
+ construct="$construct, $conv(v.sA)"
+ construct="$construct, $conv(v.sB)"
+ construct="$construct, $conv(v.sC)"
+ construct="$construct, $conv(v.sD)"
+ construct="$construct, $conv(v.sE)"
+ construct="$construct, $conv(v.sF)"
+ fi
+
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tvectortype convert_${tvectortype}_sat($fvectortype v);"
+ else
+ echo "OVERLOADABLE $tvectortype convert_${tvectortype}_sat($fvectortype v) {"
+ echo " return ($tvectortype)($construct);"
+ echo "}"
+ echo
+ fi
+ done
+ done
+done
+
+if [ $1"a" != "-pa" ]; then
+echo '
+float __gen_ocl_rndz(float x);
+float __gen_ocl_rnde(float x);
+float __gen_ocl_rndu(float x);
+float __gen_ocl_rndd(float x);
+OVERLOADABLE float __convert_float_rtz(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f;
+ if((l > x && x > 0) || x >= 0x7fffffc000000000 ||
+ (l < x && x < 0)) {
+ u.u -= 1;
+ }
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtp(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f; //can not use u.f < x
+ if(l < x && x < 0x7fffffc000000000) {
+ if(x > 0)
+ u.u = u.u + 1;
+ else
+ u.u = u.u - 1;
+ }
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtn(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f; //avoid overflow
+ if(l > x || x >= 0x7fffffc000000000) {
+ if(x > 0)
+ u.u = u.u - 1;
+ else
+ u.u = u.u + 1;
+ }
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtz(ulong x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong l = u.f;
+ if(l > x || x >= 0xffffff8000000000)
+ u.u -= 1;
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtp(ulong x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong l = u.f; //can not use u.f < x
+ if(l < x && x < 0xffffff8000000000)
+ u.u = u.u + 1;
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtn(ulong x)
+{
+ return __convert_float_rtz(x);
+}
+OVERLOADABLE float __convert_float_rtz(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long i = u.f;
+ if((i > x && x > 0) ||
+ (i < x && x < 0)) {
+ u.u -= 1;
+ }
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtp(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ int i = u.f;
+ if(i < x) {
+ if(x > 0)
+ u.u += 1;
+ else
+ u.u -= 1;
+ }
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtn(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long i = u.f; //avoid overflow
+ if(i > x) {
+ if(x > 0)
+ u.u = u.u - 1;
+ else
+ u.u = u.u + 1;
+ }
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtz(uint x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong i = u.f;
+ if(i > x)
+ u.u -= 1;
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtp(uint x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ uint i = u.f;
+ if(i < x)
+ u.u += 1;
+ return u.f;
+}
+OVERLOADABLE float __convert_float_rtn(uint x)
+{
+ return __convert_float_rtz(x);
+}
+'
+fi
+
+# convert_DSTTYPE_ROUNDING function
+for vector_length in $VECTOR_LENGTHS; do
+ for ftype in $TYPES; do
+ fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+ if test $fbasetype = "double"; then continue; fi
+
+ for ttype in $TYPES; do
+ tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
+ if test $tbasetype = "double"; then continue; fi
+
+ if test $vector_length -eq 1; then
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rte($fbasetype x);"
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rtz($fbasetype x);"
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rtp($fbasetype x);"
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rtn($fbasetype x);"
+ else
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rte($fbasetype x)"
+ if test $fbasetype = "float" -a $tbasetype != "float"; then
+ echo "{ return __gen_ocl_rnde(x); }"
+ else
+ echo "{ return x; }"
+ fi
+
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rtz($fbasetype x)"
+ if test $fbasetype = "float" -a $tbasetype != "float"; then
+ echo "{ return __gen_ocl_rndz(x); }"
+ elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then
+ echo "{ return __convert_${tbasetype}_rtz(x); }"
+ else
+ echo "{ return x; }"
+ fi
+
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rtp($fbasetype x)"
+ if test $fbasetype = "float" -a $tbasetype != "float"; then
+ echo "{ return __gen_ocl_rndu(x); }"
+ elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then
+ echo "{ return __convert_${tbasetype}_rtp(x); }"
+ else
+ echo "{ return x; }"
+ fi
+
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_rtn($fbasetype x)"
+ if test $fbasetype = "float" -a $tbasetype != "float"; then
+ echo "{ return __gen_ocl_rndd(x); }"
+ elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then
+ echo "{ return __convert_${tbasetype}_rtn(x); }"
+ else
+ echo "{ return x; }"
+ fi
+ fi
+
+ continue
+ fi
+
+ for rounding in $ROUNDING_MODES; do
+ fvectortype=$fbasetype$vector_length
+ tvectortype=$tbasetype$vector_length
+ conv="convert_${tbasetype}_${rounding}"
+
+ construct="$conv(v.s0)"
+ if test $vector_length -gt 1; then
+ construct="$construct, $conv(v.s1)"
+ fi
+ if test $vector_length -gt 2; then
+ construct="$construct, $conv(v.s2)"
+ fi
+ if test $vector_length -gt 3; then
+ construct="$construct, $conv(v.s3)"
+ fi
+ if test $vector_length -gt 4; then
+ construct="$construct, $conv(v.s4)"
+ construct="$construct, $conv(v.s5)"
+ construct="$construct, $conv(v.s6)"
+ construct="$construct, $conv(v.s7)"
+ fi
+ if test $vector_length -gt 8; then
+ construct="$construct, $conv(v.s8)"
+ construct="$construct, $conv(v.s9)"
+ construct="$construct, $conv(v.sA)"
+ construct="$construct, $conv(v.sB)"
+ construct="$construct, $conv(v.sC)"
+ construct="$construct, $conv(v.sD)"
+ construct="$construct, $conv(v.sE)"
+ construct="$construct, $conv(v.sF)"
+ fi
+
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tvectortype convert_${tvectortype}_${rounding}($fvectortype v);"
+ else
+ echo "OVERLOADABLE $tvectortype convert_${tvectortype}_${rounding}($fvectortype v) {"
+ echo " return ($tvectortype)($construct);"
+ echo "}"
+ echo
+ fi
+ done
+ done
+ done
+done
+
+# convert_DSTTYPE_sat_ROUNDING function
+for vector_length in $VECTOR_LENGTHS; do
+ for ftype in $TYPES; do
+ fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+ if test $fbasetype = "double"; then continue; fi
+
+ for ttype in $TYPES; do
+ tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
+ if test $tbasetype = "double" -o $tbasetype = "float"; then continue; fi
+
+ if test $vector_length -eq 1; then
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rte($fbasetype x);"
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rtz($fbasetype x);"
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rtp($fbasetype x);"
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rtn($fbasetype x);"
+ else
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rte($fbasetype x)"
+ if test $fbasetype = "float"; then
+ echo "{ return convert_${tbasetype}_sat(__gen_ocl_rnde(x)); }"
+ else
+ echo "{ return convert_${tbasetype}_sat(x); }"
+ fi
+
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rtz($fbasetype x)"
+ if test $fbasetype = "float"; then
+ echo "{ return convert_${tbasetype}_sat(__gen_ocl_rndz(x)); }"
+ else
+ echo "{ return convert_${tbasetype}_sat(x); }"
+ fi
+
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rtp($fbasetype x)"
+ if test $fbasetype = "float"; then
+ echo "{ return convert_${tbasetype}_sat(__gen_ocl_rndu(x)); }"
+ else
+ echo "{ return convert_${tbasetype}_sat(x); }"
+ fi
+
+ echo "OVERLOADABLE $tbasetype convert_${tbasetype}_sat_rtn($fbasetype x)"
+ if test $fbasetype = "float"; then
+ echo "{ return convert_${tbasetype}_sat(__gen_ocl_rndd(x)); }"
+ else
+ echo "{ return convert_${tbasetype}_sat(x); }"
+ fi
+ fi
+ continue
+ fi
+
+ for rounding in $ROUNDING_MODES; do
+ fvectortype=$fbasetype$vector_length
+ tvectortype=$tbasetype$vector_length
+ conv="convert_${tbasetype}_sat_${rounding}"
+
+ construct="$conv(v.s0)"
+ if test $vector_length -gt 1; then
+ construct="$construct, $conv(v.s1)"
+ fi
+ if test $vector_length -gt 2; then
+ construct="$construct, $conv(v.s2)"
+ fi
+ if test $vector_length -gt 3; then
+ construct="$construct, $conv(v.s3)"
+ fi
+ if test $vector_length -gt 4; then
+ construct="$construct, $conv(v.s4)"
+ construct="$construct, $conv(v.s5)"
+ construct="$construct, $conv(v.s6)"
+ construct="$construct, $conv(v.s7)"
+ fi
+ if test $vector_length -gt 8; then
+ construct="$construct, $conv(v.s8)"
+ construct="$construct, $conv(v.s9)"
+ construct="$construct, $conv(v.sA)"
+ construct="$construct, $conv(v.sB)"
+ construct="$construct, $conv(v.sC)"
+ construct="$construct, $conv(v.sD)"
+ construct="$construct, $conv(v.sE)"
+ construct="$construct, $conv(v.sF)"
+ fi
+
+ if [ $1"a" = "-pa" ]; then
+ echo "OVERLOADABLE $tvectortype convert_${tvectortype}_sat_${rounding}($fvectortype v);"
+ else
+ echo "OVERLOADABLE $tvectortype convert_${tvectortype}_sat_${rounding}($fvectortype v) {"
+ echo " return ($tvectortype)($construct);"
+ echo "}"
+ echo
+ fi
+ done
+ done
+ done
+done
+
+if [ $1"a" = "-pa" ]; then
+ echo "#endif /* __OCL_CONVERT_H__ */"
+fi
--
1.8.3.2
More information about the Beignet
mailing list