[Beignet] [PATCH] add address space qualifier to "remquo"

Xing, Homer homer.xing at intel.com
Wed Jul 24 22:22:23 PDT 2013


I thought your script can auto generate vector version for "frexp" and "fract". Now we know the script can't. I will add address qualifier by hand later ...

-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com] 
Sent: Thursday, July 25, 2013 1:18 PM
To: Xing, Homer
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH] add address space qualifier to "remquo"

Those two commits are incomplete compare with your patches for remquo.
You may want to review your previous patch again. I just copy/paste part of the code as below, and you may find the problem easier here.

INLINE_OVERLOADABLE float2 frexp(float2 x, int2 *exp) {
  return (float2)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp)); }

INLINE_OVERLOADABLE float3 frexp(float3 x, int3 *exp) {
  return (float3)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp)); }

INLINE_OVERLOADABLE float4 frexp(float4 x, int4 *exp) {
  return (float4)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp)); }

INLINE_OVERLOADABLE float8 frexp(float8 x, int8 *exp) {
  return (float8)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp), frexp(x.s4, 4 + (int *)exp), frexp(x.s5, 5 + (int *)exp), frexp(x.s6, 6 + (int *)exp), frexp(x.s7, 7 + (int *)exp)); }

INLINE_OVERLOADABLE float16 frexp(float16 x, int16 *exp) {
  return (float16)(frexp(x.s0, (int *)exp), frexp(x.s1, 1 + (int *)exp), frexp(x.s2, 2 + (int *)exp), frexp(x.s3, 3 + (int *)exp), frexp(x.s4, 4 + (int *)exp), frexp(x.s5, 5 + (int *)exp), frexp(x.s6, 6 + (int *)exp), frexp(x.s7, 7 + (int *)exp), frexp(x.s8, 8 + (int *)exp), frexp(x.s9, 9 + (int *)exp), frexp(x.sa, 10 + (int *)exp), frexp(x.sb, 11 + (int *)exp), frexp(x.sc, 12 + (int *)exp), frexp(x.sd, 13 + (int *)exp), frexp(x.se, 14 + (int *)exp), frexp(x.sf, 15 + (int *)exp)); }

And the fract part is very similar.

On Thu, Jul 25, 2013 at 04:41:54AM +0000, Xing, Homer wrote:
> Frexp and fract have already added address space qualifier,  in my 
> commit c79b42a7.
> 
> The commit has been pushed to the master branch ...
> 
> -----Original Message-----
> From: beignet-bounces+homer.xing=intel.com at lists.freedesktop.org 
> [mailto:beignet-bounces+homer.xing=intel.com at lists.freedesktop.org] On 
> Behalf Of Zhigang Gong
> Sent: Wednesday, July 24, 2013 5:27 PM
> To: Xing, Homer
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] add address space qualifier to "remquo"
> 
> LGTM, pushed. Thanks.
> 
> You may also need to do the same thing for frexp and fract.
> 
> On Wed, Jul 24, 2013 at 03:20:56PM +0800, Homer Hsing wrote:
> > renamed origin "remquo" to "__gen_ocl_remquo", added new "remquo" 
> > with address space qualifier
> > 
> > Signed-off-by: Homer Hsing <homer.xing at intel.com>
> > ---
> >  backend/src/ocl_stdlib.tmpl.h | 36
> > +++++++++++++++++++++++++-----------
> >  1 file changed, 25 insertions(+), 11 deletions(-)
> > 
> > diff --git a/backend/src/ocl_stdlib.tmpl.h 
> > b/backend/src/ocl_stdlib.tmpl.h index 45883af..84f444b 100644
> > --- a/backend/src/ocl_stdlib.tmpl.h
> > +++ b/backend/src/ocl_stdlib.tmpl.h
> > @@ -928,7 +928,7 @@ INLINE_OVERLOADABLE float16 fract(float16 x, float16 *p) {
> >                     fract(x.sf, 15 + (float *)p));  }
> >  
> > -INLINE_OVERLOADABLE float remquo(float x, float y, int *quo) {
> > +INLINE_OVERLOADABLE float __gen_ocl_remquo(float x, float y, int
> > +*quo) {
> >    uint hx = as_uint(x), ix = hx & 0x7FFFFFFF, hy = as_uint(y), iy = hy & 0x7FFFFFFF;
> >    if (ix > 0x7F800000 || iy > 0x7F800000 || ix == 0x7F800000 || iy == 0)
> >      return nan(0u);
> > @@ -942,26 +942,40 @@ INLINE_OVERLOADABLE float remquo(float x, float y, int *quo) {
> >    return as_float(hr);
> >  }
> >  
> > -INLINE_OVERLOADABLE float2 remquo(float2 x, float2 y, int2 *i) {
> > -  return (float2)(remquo(x.s0, y.s0, (int *)i), remquo(x.s1, y.s1, 
> > 1
> > + (int *)i));
> > +INLINE_OVERLOADABLE float2 __gen_ocl_remquo(float2 x, float2 y, 
> > +int2
> > +*i) {
> > +  return (float2)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), 
> > +__gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i));
> >  }
> >  
> > -INLINE_OVERLOADABLE float3 remquo(float3 x, float3 y, int3 *i) {
> > -  return (float3)(remquo(x.s0, y.s0, (int *)i), remquo(x.s1, y.s1, 
> > 1
> > + (int *)i), remquo(x.s2, y.s2, 2 + (int *)i)); INLINE_OVERLOADABLE 
> > +float3 __gen_ocl_remquo(float3 x, float3 y, int3
> > +*i) {
> > +  return (float3)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), 
> > +__gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, 
> > +y.s2, 2 + (int *)i));
> >  }
> >  
> > -INLINE_OVERLOADABLE float4 remquo(float4 x, float4 y, int4 *i) {
> > -  return (float4)(remquo(x.s0, y.s0, (int *)i), remquo(x.s1, y.s1, 
> > 1
> > + (int *)i), remquo(x.s2, y.s2, 2 + (int *)i), remquo(x.s3, y.s3, 3 
> > + +
> > (int *)i));
> > +INLINE_OVERLOADABLE float4 __gen_ocl_remquo(float4 x, float4 y, 
> > +int4
> > +*i) {
> > +  return (float4)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), 
> > +__gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, 
> > +y.s2, 2 + (int *)i), __gen_ocl_remquo(x.s3, y.s3, 3 + (int *)i));
> >  }
> >  
> > -INLINE_OVERLOADABLE float8 remquo(float8 x, float8 y, int8 *i) {
> > -  return (float8)(remquo(x.s0, y.s0, (int *)i), remquo(x.s1, y.s1, 
> > 1
> > + (int *)i), remquo(x.s2, y.s2, 2 + (int *)i), remquo(x.s3, y.s3, 3 
> > + +
> > (int *)i), remquo(x.s4, y.s4, 4 + (int *)i), remquo(x.s5, y.s5, 5 + 
> > (int *)i), remquo(x.s6, y.s6, 6 + (int *)i), remquo(x.s7, y.s7, 7 + 
> > (int *)i));
> > +INLINE_OVERLOADABLE float8 __gen_ocl_remquo(float8 x, float8 y, 
> > +int8
> > +*i) {
> > +  return (float8)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), 
> > +__gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, 
> > +y.s2, 2 + (int *)i), __gen_ocl_remquo(x.s3, y.s3, 3 + (int *)i), 
> > +__gen_ocl_remquo(x.s4, y.s4, 4 + (int *)i), __gen_ocl_remquo(x.s5, 
> > +y.s5, 5 + (int *)i), __gen_ocl_remquo(x.s6, y.s6, 6 + (int *)i), 
> > +__gen_ocl_remquo(x.s7, y.s7, 7 + (int *)i));
> >  }
> >  
> > -INLINE_OVERLOADABLE float16 remquo(float16 x, float16 y, int16 *i) 
> > {
> > -  return (float16)(remquo(x.s0, y.s0, (int *)i), remquo(x.s1, y.s1, 
> > 1
> > + (int *)i), remquo(x.s2, y.s2, 2 + (int *)i), remquo(x.s3, y.s3, 3 
> > + +
> > (int *)i), remquo(x.s4, y.s4, 4 + (int *)i), remquo(x.s5, y.s5, 5 + 
> > (int *)i), remquo(x.s6, y.s6, 6 + (int *)i), remquo(x.s7, y.s7, 7 + 
> > (int *)i), remquo(x.s8, y.s8, 8 + (int *)i), remquo(x.s9, y.s9, 9 + 
> > (int *)i), remquo(x.sa, y.sa, 10 + (int *)i), remquo(x.sb, y.sb, 11 
> > + (int *)i), remquo(x.sc, y.sc, 12 + (int *)i), remquo(x.sd, y.sd, 
> > 13 + (int *)i), remquo(x.se, y.se, 14 + (int *)i), remquo(x.sf, 
> > y.sf, 15 + (int *)i));
> > +INLINE_OVERLOADABLE float16 __gen_ocl_remquo(float16 x, float16 y,
> > +int16 *i) {
> > +  return (float16)(__gen_ocl_remquo(x.s0, y.s0, (int *)i), 
> > +__gen_ocl_remquo(x.s1, y.s1, 1 + (int *)i), __gen_ocl_remquo(x.s2, 
> > +y.s2, 2 + (int *)i), __gen_ocl_remquo(x.s3, y.s3, 3 + (int *)i), 
> > +__gen_ocl_remquo(x.s4, y.s4, 4 + (int *)i), __gen_ocl_remquo(x.s5, 
> > +y.s5, 5 + (int *)i), __gen_ocl_remquo(x.s6, y.s6, 6 + (int *)i), 
> > +__gen_ocl_remquo(x.s7, y.s7, 7 + (int *)i), __gen_ocl_remquo(x.s8, 
> > +y.s8, 8 + (int *)i), __gen_ocl_remquo(x.s9, y.s9, 9 + (int *)i), 
> > +__gen_ocl_remquo(x.sa, y.sa, 10 + (int *)i), __gen_ocl_remquo(x.sb, 
> > +y.sb, 11 + (int *)i), __gen_ocl_remquo(x.sc, y.sc, 12 + (int *)i), 
> > +__gen_ocl_remquo(x.sd, y.sd, 13 + (int *)i), __gen_ocl_remquo(x.se, 
> > +y.se, 14 + (int *)i), __gen_ocl_remquo(x.sf, y.sf, 15 + (int *)i));
> >  }
> >  
> > +INLINE_OVERLOADABLE float remquo(float x, float y, global int *quo) 
> > +{ return __gen_ocl_remquo(x, y, (int *)quo); } INLINE_OVERLOADABLE 
> > +float remquo(float x, float y, local int *quo) { return 
> > +__gen_ocl_remquo(x, y, (int *)quo); } INLINE_OVERLOADABLE float 
> > +remquo(float x, float y, private int *quo) { return 
> > +__gen_ocl_remquo(x, y, (int *)quo); } #define DEF(n) \
> > +  INLINE_OVERLOADABLE float##n remquo(float##n x, float##n y, 
> > +global int##n *quo) { return __gen_ocl_remquo(x, y, (int##n *)quo); 
> > +} \
> > +  INLINE_OVERLOADABLE float##n remquo(float##n x, float##n y, local 
> > +int##n *quo) { return __gen_ocl_remquo(x, y, (int##n *)quo); } \
> > +  INLINE_OVERLOADABLE float##n remquo(float##n x, float##n y, 
> > +private int##n *quo) { return __gen_ocl_remquo(x, y, (int##n 
> > +*)quo); }
> > +DEF(2)
> > +DEF(3)
> > +DEF(4)
> > +DEF(8)
> > +DEF(16)
> > +#undef DEF
> > +
> >  INLINE_OVERLOADABLE float native_divide(float x, float y) { return 
> > x/y; }  INLINE_OVERLOADABLE float ldexp(float x, int n) {
> >    return __gen_ocl_pow(2, n) * x;
> > --
> > 1.8.1.2
> > 
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list