[Beignet] [PATCH] backend: refine hypot function
Yang, Rong R
rong.r.yang at intel.com
Fri Jun 9 08:36:15 UTC 2017
Pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Pan, Xiuli
> Sent: Wednesday, June 7, 2017 15:53
> To: Wang, Rander <rander.wang at intel.com>; beignet at freedesktop.org
> Cc: Wang, Rander <rander.wang at intel.com>
> Subject: Re: [Beignet] [PATCH] backend: refine hypot function
>
> LGTM.
> Only test about correctness performance need recheck.
>
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> rander.wang
> Sent: Thursday, May 18, 2017 16:18
> To: beignet at freedesktop.org
> Cc: Wang, Rander <rander.wang at intel.com>
> Subject: [Beignet] [PATCH] backend: refine hypot function
>
> the test OCL_Magnitude of opencv is slow on beignet because
> of hypot. refine the hypot, change algorithm and remove
> unnecessary code to get 30% up
>
> Signed-off-by: rander.wang <rander.wang at intel.com>
> ---
> backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 75
> ++++++++++++++++++++-----
> 1 file changed, 61 insertions(+), 14 deletions(-)
>
> diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> index 6b942db..ab03cb4 100644
> --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
> @@ -2894,12 +2894,36 @@ float __gen_ocl_internal_pown(float x, int y) {
> return as_float((a & (0x807FFFFFu)) | (u & 0x80000000u) | 0x3F000000);
> float __gen_ocl_internal_frexp(float x, int *exp) { BODY; }
>
> +float __fast_scalbnf (float x, int n){
> + /* copy from fdlibm */
> + float two25 = 3.355443200e+07, /* 0x4c000000 */
> + twom25 = 2.9802322388e-08, /* 0x33000000 */
> + huge = 1.0e+30,
> + tiny = 1.0e-30;
> + int k,ix,t,tmp;
> + float retVal;
> +
> + GEN_OCL_GET_FLOAT_WORD(ix,x);
> + k = (ix&0x7f800000)>>23; /* extract exponent */ t = k; k = k+n; tmp
> + = (ix&0x807fffff); x = as_float(tmp |(k << 23)); retVal = (k > 0)?
> + x:0.0f; retVal = (k > 0xfe)? INFINITY:retVal; retVal = (k <= -25)?
> + 0.0f:retVal; x = as_float(tmp | ((k + 25) << 23)); retVal = ((k > 0)
> + && (k <= 25)) ? x*twom25:retVal; retVal = (t == 0)?
> + 0.0f:retVal;
> +
> + return retVal;
> +}
> +
> OVERLOADABLE float hypot(float x, float y) {
> if (__ocl_math_fastpath_flag)
> return __gen_ocl_internal_fastpath_hypot(x, y);
>
> - //return __gen_ocl_sqrt(x*x + y*y);
> - float a,b,an,bn,cn;
> + float a,b,an,bn,cn, retVal;
> int e;
> if (isfinite (x) && isfinite (y)){ /* Determine absolute values. */
> x = __gen_ocl_fabs (x);
> @@ -2907,19 +2931,42 @@ OVERLOADABLE float hypot(float x, float y) {
> /* Find the bigger and the smaller one. */
> a = max(x,y);
> b = min(x,y);
> - /* Now 0 <= b <= a. */
> - /* Write a = an * 2^e, b = bn * 2^e with 0 <= bn <= an < 1. */
> - an = __gen_ocl_internal_frexp (a, &e);
> - bn = ldexp (b, - e);
> - /* Through the normalization, no unneeded overflow or underflow will
> occur here. */
> - cn = __gen_ocl_sqrt (an * an + bn * bn);
> - return ldexp (cn, e);
> - }else{
> - if (isinf (x) || isinf (y)) /* x or y is infinite. Return +Infinity. */
> - return INFINITY;
> - else /* x or y is NaN. Return NaN. */
> - return x + y;
> +
> + bool skip = false;
> + uint u = as_uint(a);
> + uint x = u;
> + if (x == 0) {
> + e = 0;
> + an = x;
> + skip = true;
> + }
> +
> + if (x >= 0x800000) {
> + e = (x >> 23) - 126;
> + an = as_float((u & (0x807FFFFFu)) | 0x3F000000);
> + skip = true;
> + }
> +
> + if(!skip)
> + {
> + int msbOne = clz(x);
> + x <<= (msbOne -8);
> + e = -117 -msbOne;
> + an = as_float((x & (0x807FFFFFu)) | 0x3F000000);
> + }
> +
> + bn = __fast_scalbnf (b, - e);
> + /* Through the normalization, no unneeded overflow or underflow
> will occur here. */
> + cn = __gen_ocl_sqrt (mad(an, an, bn * bn));
> + retVal = __fast_scalbnf (cn, e);
> }
> + else
> + {
> + retVal = NAN; /* x or y is NaN. Return NaN. */
> + retVal = (isinf (x) || isinf (y)) ? INFINITY:retVal; /* x or y is
> + infinite. Return +Infinity. */ }
> +
> + return retVal;
> }
>
> OVERLOADABLE float powr(float x, float y) {
> --
> 2.7.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list