[Beignet] [PATCH 2/2] Fix rtz, rtp, rtn when convert int/uint/long/ulong to float.
Yang Rong
rong.r.yang at intel.com
Mon Dec 16 23:32:13 PST 2013
Convert input to float and convert float to input type again, as c. Compare the
input and c, if not match the rtz/rtp/rtn require, +/- 1 ULP.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/gen_convert.sh | 154 ++++++++++++++++++++++++++++++++++++++++
backend/src/ocl_convert.h | 172 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 314 insertions(+), 12 deletions(-)
diff --git a/backend/src/gen_convert.sh b/backend/src/gen_convert.sh
index 793ed5b..0aafc3f 100755
--- a/backend/src/gen_convert.sh
+++ b/backend/src/gen_convert.sh
@@ -230,6 +230,154 @@ float __gen_ocl_rndz(float x);
float __gen_ocl_rnde(float x);
float __gen_ocl_rndu(float x);
float __gen_ocl_rndd(float x);
+INLINE_OVERLOADABLE float __convert_float_rtz(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f;
+ if((l > x && x > 0) || x >= 0x7fffffc000000000 ||
+ (l < x && x < 0)) {
+ u.u -= 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtp(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f; //can not use u.f < x
+ if(l < x && x < 0x7fffffc000000000) {
+ if(x > 0)
+ u.u = u.u + 1;
+ else
+ u.u = u.u - 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f; //avoid overflow
+ if(l > x || x >= 0x7fffffc000000000) {
+ if(x > 0)
+ u.u = u.u - 1;
+ else
+ u.u = u.u + 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtz(ulong x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong l = u.f;
+ if(l > x || x >= 0xffffff8000000000)
+ u.u -= 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtp(ulong x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong l = u.f; //can not use u.f < x
+ if(l < x && x < 0xffffff8000000000)
+ u.u = u.u + 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(ulong x)
+{
+ return __convert_float_rtz(x);
+}
+INLINE_OVERLOADABLE float __convert_float_rtz(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long i = u.f;
+ if((i > x && x > 0) ||
+ (i < x && x < 0)) {
+ u.u -= 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtp(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ int i = u.f;
+ if(i < x) {
+ if(x > 0)
+ u.u += 1;
+ else
+ u.u -= 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long i = u.f; //avoid overflow
+ if(i > x) {
+ if(x > 0)
+ u.u = u.u - 1;
+ else
+ u.u = u.u + 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE long __convert_float_rtz(uint x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong i = u.f;
+ if(i > x)
+ u.u -= 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE long __convert_float_rtp(uint x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ uint i = u.f;
+ if(i < x)
+ u.u += 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(uint x)
+{
+ return __convert_float_rtz(x);
+}
'
# convert_DSTTYPE_ROUNDING function
@@ -253,6 +401,8 @@ for vector_length in $VECTOR_LENGTHS; do
echo "INLINE_OVERLOADABLE $tbasetype convert_${tbasetype}_rtz($fbasetype x)"
if test $fbasetype = "float" -a $tbasetype != "float"; then
echo "{ return __gen_ocl_rndz(x); }"
+ elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then
+ echo "{ return __convert_${tbasetype}_rtz(x); }"
else
echo "{ return x; }"
fi
@@ -260,6 +410,8 @@ for vector_length in $VECTOR_LENGTHS; do
echo "INLINE_OVERLOADABLE $tbasetype convert_${tbasetype}_rtp($fbasetype x)"
if test $fbasetype = "float" -a $tbasetype != "float"; then
echo "{ return __gen_ocl_rndu(x); }"
+ elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then
+ echo "{ return __convert_${tbasetype}_rtp(x); }"
else
echo "{ return x; }"
fi
@@ -267,6 +419,8 @@ for vector_length in $VECTOR_LENGTHS; do
echo "INLINE_OVERLOADABLE $tbasetype convert_${tbasetype}_rtn($fbasetype x)"
if test $fbasetype = "float" -a $tbasetype != "float"; then
echo "{ return __gen_ocl_rndd(x); }"
+ elif [ "$fbasetype" = "int" -o "$fbasetype" = "uint" -o "$fbasetype" = "long" -o "$fbasetype" = "ulong" ] && [ "$tbasetype" = "float" ]; then
+ echo "{ return __convert_${tbasetype}_rtn(x); }"
else
echo "{ return x; }"
fi
diff --git a/backend/src/ocl_convert.h b/backend/src/ocl_convert.h
index e37cecb..9ef8bd0 100644
--- a/backend/src/ocl_convert.h
+++ b/backend/src/ocl_convert.h
@@ -3800,6 +3800,154 @@ float __gen_ocl_rndz(float x);
float __gen_ocl_rnde(float x);
float __gen_ocl_rndu(float x);
float __gen_ocl_rndd(float x);
+INLINE_OVERLOADABLE float __convert_float_rtz(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f;
+ if((l > x && x > 0) || x >= 0x7fffffc000000000 ||
+ (l < x && x < 0)) {
+ u.u -= 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtp(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f; //can not use u.f < x
+ if(l < x && x < 0x7fffffc000000000) {
+ if(x > 0)
+ u.u = u.u + 1;
+ else
+ u.u = u.u - 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(long x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long l = u.f; //avoid overflow
+ if(l > x || x >= 0x7fffffc000000000) {
+ if(x > 0)
+ u.u = u.u - 1;
+ else
+ u.u = u.u + 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtz(ulong x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong l = u.f;
+ if(l > x || x >= 0xffffff8000000000)
+ u.u -= 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtp(ulong x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong l = u.f; //can not use u.f < x
+ if(l < x && x < 0xffffff8000000000)
+ u.u = u.u + 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(ulong x)
+{
+ return __convert_float_rtz(x);
+}
+INLINE_OVERLOADABLE float __convert_float_rtz(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long i = u.f;
+ if((i > x && x > 0) ||
+ (i < x && x < 0)) {
+ u.u -= 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtp(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ int i = u.f;
+ if(i < x) {
+ if(x > 0)
+ u.u += 1;
+ else
+ u.u -= 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(int x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ long i = u.f; //avoid overflow
+ if(i > x) {
+ if(x > 0)
+ u.u = u.u - 1;
+ else
+ u.u = u.u + 1;
+ }
+ return u.f;
+}
+INLINE_OVERLOADABLE long __convert_float_rtz(uint x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ ulong i = u.f;
+ if(i > x)
+ u.u -= 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE long __convert_float_rtp(uint x)
+{
+ union {
+ uint u;
+ float f;
+ } u;
+ u.f = x;
+ uint i = u.f;
+ if(i < x)
+ u.u += 1;
+ return u.f;
+}
+INLINE_OVERLOADABLE float __convert_float_rtn(uint x)
+{
+ return __convert_float_rtz(x);
+}
INLINE_OVERLOADABLE long convert_long_rte(long x)
{ return x; }
@@ -3868,11 +4016,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(long x)
INLINE_OVERLOADABLE float convert_float_rte(long x)
{ return x; }
INLINE_OVERLOADABLE float convert_float_rtz(long x)
-{ return x; }
+{ return __convert_float_rtz(x); }
INLINE_OVERLOADABLE float convert_float_rtp(long x)
-{ return x; }
+{ return __convert_float_rtp(x); }
INLINE_OVERLOADABLE float convert_float_rtn(long x)
-{ return x; }
+{ return __convert_float_rtn(x); }
INLINE_OVERLOADABLE long convert_long_rte(ulong x)
{ return x; }
INLINE_OVERLOADABLE long convert_long_rtz(ulong x)
@@ -3940,11 +4088,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(ulong x)
INLINE_OVERLOADABLE float convert_float_rte(ulong x)
{ return x; }
INLINE_OVERLOADABLE float convert_float_rtz(ulong x)
-{ return x; }
+{ return __convert_float_rtz(x); }
INLINE_OVERLOADABLE float convert_float_rtp(ulong x)
-{ return x; }
+{ return __convert_float_rtp(x); }
INLINE_OVERLOADABLE float convert_float_rtn(ulong x)
-{ return x; }
+{ return __convert_float_rtn(x); }
INLINE_OVERLOADABLE long convert_long_rte(int x)
{ return x; }
INLINE_OVERLOADABLE long convert_long_rtz(int x)
@@ -4012,11 +4160,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(int x)
INLINE_OVERLOADABLE float convert_float_rte(int x)
{ return x; }
INLINE_OVERLOADABLE float convert_float_rtz(int x)
-{ return x; }
+{ return __convert_float_rtz(x); }
INLINE_OVERLOADABLE float convert_float_rtp(int x)
-{ return x; }
+{ return __convert_float_rtp(x); }
INLINE_OVERLOADABLE float convert_float_rtn(int x)
-{ return x; }
+{ return __convert_float_rtn(x); }
INLINE_OVERLOADABLE long convert_long_rte(uint x)
{ return x; }
INLINE_OVERLOADABLE long convert_long_rtz(uint x)
@@ -4084,11 +4232,11 @@ INLINE_OVERLOADABLE uchar convert_uchar_rtn(uint x)
INLINE_OVERLOADABLE float convert_float_rte(uint x)
{ return x; }
INLINE_OVERLOADABLE float convert_float_rtz(uint x)
-{ return x; }
+{ return __convert_float_rtz(x); }
INLINE_OVERLOADABLE float convert_float_rtp(uint x)
-{ return x; }
+{ return __convert_float_rtp(x); }
INLINE_OVERLOADABLE float convert_float_rtn(uint x)
-{ return x; }
+{ return __convert_float_rtn(x); }
INLINE_OVERLOADABLE long convert_long_rte(short x)
{ return x; }
INLINE_OVERLOADABLE long convert_long_rtz(short x)
--
1.8.1.2
More information about the Beignet
mailing list