[Beignet] [PATCH 1/3] libocl: Improve precision of pow/powr.
Zhigang Gong
zhigang.gong at linux.intel.com
Sun Dec 21 20:18:25 PST 2014
The whole patchset LGTM, will push latter. Thanks.
On Mon, Dec 22, 2014 at 10:00:38AM +0800, Ruiling Song wrote:
> pow:
> When splitting a float into two floats. normally, we use 0xfffff000 as
> the mask. This leaves 12bit effective mantissa in high bits. after some
> calculation, it seems lost some bit in high bits. so I change the mask
> to 0xffffe000, which only leave 11bit mantissa in the high bits. Then
> the precision can meet OpenCL Spec requirement.
>
> powr:
> powr() defined different edge case behavior in OpenCL Spec 7.5
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/libocl/tmpl/ocl_math.tmpl.cl | 81 ++++++++++++++++++++++++------
> 1 file changed, 67 insertions(+), 14 deletions(-)
>
> diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> index c0b2076..4d05c07 100644
> --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> @@ -1926,7 +1926,6 @@ OVERLOADABLE float __gen_ocl_internal_round(float x) {
> return y;
> }
> OVERLOADABLE float __gen_ocl_internal_ceil(float x) { return __gen_ocl_rndu(x); }
> -OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); }
> OVERLOADABLE float __gen_ocl_internal_rint(float x) {
> return __gen_ocl_rnde(x);
> }
> @@ -3016,6 +3015,7 @@ OVERLOADABLE float __gen_ocl_internal_pow(float x, float y) {
> }
> /* y==zero: x**0 = 1 */
> if(iy==0) return one;
> + /* pow(+1, y) returns 1 for any y, even a NAN */
> if(hx==0x3f800000) return one;
> /* +-NaN return x+y */
> if(ix > 0x7f800000 || iy > 0x7f800000)
> @@ -3115,6 +3115,7 @@ OVERLOADABLE float __gen_ocl_internal_pow(float x, float y) {
> GEN_OCL_SET_FLOAT_WORD(t_h,is+0x00400000+(k<<21));
> t_l = ax - (t_h-bp[k]);
> s_l = v*((u-s_h*t_h)-s_h*t_l);
> +
> /* compute log(ax) */
> s2 = s*s;
> r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
> @@ -3122,7 +3123,7 @@ OVERLOADABLE float __gen_ocl_internal_pow(float x, float y) {
> s2 = s_h*s_h;
> t_h = 3.0f+s2+r;
> GEN_OCL_GET_FLOAT_WORD(is,t_h);
> - GEN_OCL_SET_FLOAT_WORD(t_h,is&0xfffff000);
> + GEN_OCL_SET_FLOAT_WORD(t_h,is&0xffffe000);
> t_l = r-((t_h-3.0f)-s2);
> /* u+v = s*(1+...) */
> u = s_h*t_h;
> @@ -3130,7 +3131,7 @@ OVERLOADABLE float __gen_ocl_internal_pow(float x, float y) {
> /* 2/(3log2)*(s+...) */
> p_h = u+v;
> GEN_OCL_GET_FLOAT_WORD(is,p_h);
> - GEN_OCL_SET_FLOAT_WORD(p_h,is&0xfffff000);
> + GEN_OCL_SET_FLOAT_WORD(p_h,is&0xffffe000);
> p_l = v-(p_h-u);
> z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
> z_l = cp_l*p_h+p_l*cp+dp_l[k];
> @@ -3138,13 +3139,13 @@ OVERLOADABLE float __gen_ocl_internal_pow(float x, float y) {
> t = (float)n;
> t1 = (((z_h+z_l)+dp_h[k])+t);
> GEN_OCL_GET_FLOAT_WORD(is,t1);
> - GEN_OCL_SET_FLOAT_WORD(t1,is&0xfffff000);
> + GEN_OCL_SET_FLOAT_WORD(t1,is&0xffffe000);
> t2 = z_l-(((t1-t)-dp_h[k])-z_h);
> }
>
> /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
> GEN_OCL_GET_FLOAT_WORD(is,y);
> - GEN_OCL_SET_FLOAT_WORD(y1,is&0xfffff000);
> + GEN_OCL_SET_FLOAT_WORD(y1,is&0xffffe000);
> p_l = (y-y1)*t1+y*t2;
> p_h = y1*t1;
> z = p_l+p_h;
> @@ -3320,6 +3321,54 @@ OVERLOADABLE float remquo(float x, float y, local int *quo) { BODY; }
> OVERLOADABLE float remquo(float x, float y, private int *quo) { BODY; }
> #undef BODY
>
> +OVERLOADABLE float powr(float x, float y) {
> + unsigned int hx, sx, hy, sy;
> +
> + if (__ocl_math_fastpath_flag)
> + return __gen_ocl_pow(x,y);
> + else {
> + if (isnan(x) || isnan(y)) return NAN;
> + GEN_OCL_GET_FLOAT_WORD(hx,x);
> + GEN_OCL_GET_FLOAT_WORD(hy,y);
> + sx = (hx & 0x80000000) >> 31;
> + sy = (hy & 0x80000000) >> 31;
> +
> + if ((hx&0x7fffffff) < 0x00800000) { /* x < 2**-126 */
> + x = 0.0f;/* Gen does not support subnormal number now */
> + hx = hx &0x80000000;
> + }
> + if ((hy&0x7fffffff) < 0x00800000) { /* y < 2**-126 */
> + y = 0.0;/* Gen does not support subnormal number now */
> + hy = hy &0x80000000;
> + }
> +
> + // (x < 0) ** y = NAN (y!=0)
> + if ((sx && (hx & 0x7fffffff))) return NAN;
> +
> + // +/-0 ** +/-0 = NAN
> + if ( !(hx&0x7fffffff) && !(hy&0x7fffffff)) return NAN;
> +
> + // +inf ** +/-0 = NAN
> + if ( ((hx & 0x7f800000) ==0x7f800000) && !(hy&0x7fffffff)) return NAN;
> +
> + // others except nan/inf/0 ** 0 = 1.0
> + if (!(hy&0x7fffffff)) return 1.0f;
> +
> + // +1 ** inf = NAN; +1 ** finite = 1;
> + if (hx == 0x3f800000) {
> + return isinf(y) ? NAN : 1.0f;
> + }
> +
> + if ( !(hx & 0x7fffffff)) {
> + // +/-0 ** y<0 = +inf
> + // +/-0 ** y>0 = +0
> + return sy ? INFINITY : 0.0f;
> + }
> +
> + return __gen_ocl_internal_pow(x,y);
> + }
> +}
> +
> OVERLOADABLE float pown(float x, int n) {
> if (x == 0.f && n == 0)
> return 1.f;
> @@ -3329,15 +3378,19 @@ OVERLOADABLE float pown(float x, int n) {
> }
>
> OVERLOADABLE float pow(float x, float y) {
> - int n;
> - if (x == 0.f && y == 0.f)
> - return 1.f;
> - if (x >= 0.f)
> - return powr(x, y);
> - n = y;
> - if ((float)n == y)//is exact integer
> - return pown(x, n);
> - return NAN;
> + if (!__ocl_math_fastpath_flag)
> + return __gen_ocl_internal_pow(x,y);
> + else {
> + int n;
> + if (x == 0.f && y == 0.f)
> + return 1.f;
> + if (x >= 0.f)
> + return powr(x, y);
> + n = y;
> + if ((float)n == y)//is exact integer
> + return pown(x, n);
> + return NAN;
> + }
> }
>
> OVERLOADABLE float rootn(float x, int n) {
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list