[Beignet] [PATCH] [PATCH]GBE: improve precision of ldexp
Song, Ruiling
ruiling.song at intel.com
Mon Dec 30 22:04:48 PST 2013
One comment. The patch Tested OK.
-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Lv Meng
Sent: Monday, December 23, 2013 8:21 AM
To: beignet at lists.freedesktop.org
Cc: Lv, Meng
Subject: [Beignet] [PATCH] [PATCH]GBE: improve precision of ldexp
Signed-off-by: Lv Meng <meng.lv at intel.com>
---
backend/src/ocl_stdlib.tmpl.h | 100 +++++++++++++++++++++--------------------
1 file changed, 51 insertions(+), 49 deletions(-)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 2345ecb..6ae7cd8 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -173,6 +173,12 @@ do { \
} while (0)
#endif
+int __ocl_finitef (float x){
It is safe to declare it as "INLINE".
+ unsigned ix;
+ GEN_OCL_GET_FLOAT_WORD (ix, x);
+ return (ix & 0x7fffffff) < 0x7f800000; }
+
#define HUGE_VALF (__ocl_inff())
#define INFINITY (__ocl_inff())
#define NAN (__ocl_nanf())
@@ -1651,6 +1657,14 @@ INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); } INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) {
return __gen_ocl_pow(x, 0.3333333333f); }
+INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y)
+{
+ union { unsigned u; float f; } ux, uy;
+ ux.f = x;
+ uy.f = y;
+ ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u);
+ return ux.f;
+}
+
#define BODY \
*cosval = native_cos(x); \
return native_sin(x);
@@ -1688,6 +1702,37 @@ INLINE float __gen_ocl_asin_util(float x) {
float w = p / q;
return x + x*w;
}
+float __gen_ocl_scalbnf (float x, int n){
+ float two25 = 3.355443200e+07, /* 0x4c000000 */
+ twom25 = 2.9802322388e-08, /* 0x33000000 */
+ huge = 1.0e+30,
+ tiny = 1.0e-30;
+ int k,ix;
+ GEN_OCL_GET_FLOAT_WORD(ix,x);
+ k = (ix&0x7f800000)>>23; /* extract exponent */
+ if (k==0) { /* 0 or subnormal x */
+ if ((ix&0x7fffffff)==0) return x; /* +-0 */
+ x *= two25;
+ GEN_OCL_GET_FLOAT_WORD(ix,x);
+ k = ((ix&0x7f800000)>>23) - 25;
+ }
+ if (k==0xff) return x+x; /* NaN or Inf */
+ if (n< -50000)
+ return tiny*__gen_ocl_internal_copysign(tiny,x); /*underflow*/
+ if (n> 50000 || k+n > 0xfe)
+ return huge*__gen_ocl_internal_copysign(huge,x); /* overflow */
+ /* Now k and n are bounded we know that k = k+n does not overflow. */
+ k = k+n;
+ if (k > 0) { /* normal result */
+ GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23));
+ return x;
+ }
+ if (k <= -25)
+ return tiny*__gen_ocl_internal_copysign(tiny,x); /*underflow*/
+ k += 25; /* subnormal result */
+ GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23));
+ return x*twom25;
+}
INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) {
uint ix;
@@ -1751,13 +1796,6 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) { INLINE_OVERLOADABLE float __gen_ocl_internal_atanh(float x) {
return 0.5f * native_sqrt((1 + x) / (1 - x)); } -INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y) {
- union { unsigned u; float f; } ux, uy;
- ux.f = x;
- uy.f = y;
- ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u);
- return ux.f;
-}
INLINE_OVERLOADABLE float __gen_ocl_internal_erf(float x) {
return M_2_SQRTPI_F * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / 10 - __gen_ocl_pow(x, 7) / 42 + __gen_ocl_pow(x, 9) / 216); } @@ -2308,6 +2346,11 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, float p){
return x;
}
+INLINE_OVERLOADABLE float __gen_ocl_internal_ldexp(float x, int n) {
+ if(!__ocl_finitef(x)||x==(float)0.0) return x;
+ x = __gen_ocl_scalbnf(x,n);
+ return x;
+}
// TODO use llvm intrinsics definitions #define cos native_cos @@ -2338,6 +2381,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, float p){ #define erfc __gen_ocl_internal_erfc #define fmod __gen_ocl_internal_fmod #define remainder __gen_ocl_internal_remainder
+#define ldexp __gen_ocl_internal_ldexp
PURE CONST float __gen_ocl_mad(float a, float b, float c); INLINE_OVERLOADABLE float mad(float a, float b, float c) {
return __gen_ocl_mad(a, b, c);
@@ -2551,48 +2595,6 @@ INLINE_OVERLOADABLE float remquo(float x, float y, local int *quo) { BODY; } INLINE_OVERLOADABLE float remquo(float x, float y, private int *quo) { BODY; } #undef BODY INLINE_OVERLOADABLE float native_divide(float x, float y) { return x/y; } -INLINE_OVERLOADABLE float ldexp(float x, int n) {
- union { float f; unsigned u; } u;
- u.f = x;
- unsigned s = u.u & 0x80000000u, v = u.u & 0x7fffffff, d = 0;
- if(v >= 0x7f800000)
- return x;
- if(v == 0)
- return x;
- int e = v >> 23;
- v &= 0x7fffff;
- if(e >= 1)
- v |= 0x800000;
- else {
- v <<= 1;
- while(v < 0x800000) {
- v <<= 1;
- e --;
- }
- }
- e = add_sat(e, n);
- if(e >= 255) {
- u.u = s | 0x7f800000;
- return u.f;
- }
- if(e > 0) {
- u.u = s | (e << 23) | (v & 0x7fffff);
- return u.f;
- }
- if(e <= -23) {
- u.u = s;
- return u.f;
- }
- while(e <= 0) {
- d = (d >> 1) | (v << 31);
- v >>= 1;
- e ++;
- }
- if(d > 0x80000000u)
- v ++;
- u.u = s | v;
- return u.f;
-}
INLINE_OVERLOADABLE float pown(float x, int n) {
if (x == 0 && n == 0)
return 1;
--
1.7.10.4
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list