[Beignet] [PATCH] add all versions of built-in function "select"
Sun, Yi
yi.sun at intel.com
Tue Sep 3 02:21:23 PDT 2013
Hi Zhigang,
Your patch introduced lots of warning while compiling like following:
/home/OpenCL/beignet/backend/src/ocl_stdlib.h:14305:68: warning: shift count >= width of type
{return (long2)(select(param0.s0, param1.s0, (long)(param2.s0 & (1 << (sizeof(long)*8 - 1)))), select(param0.s1, param1.s1, (long)(param2.s1 & (1 << (sizeof(long)*8 - 1))))); }
^ ~~~~~~~~~~~~~~~~~~~~
The detail is attached.
Thanks
--Sun, Yi
> -----Original Message-----
> From: beignet-bounces+yi.sun=intel.com at lists.freedesktop.org
> [mailto:beignet-bounces+yi.sun=intel.com at lists.freedesktop.org] On Behalf Of
> Xing, Homer
> Sent: Tuesday, September 3, 2013 3:40 PM
> To: Zhigang Gong
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] add all versions of built-in function "select"
>
> OK. Your patch looks good to me.
>
> -----Original Message-----
> From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com]
> Sent: Tuesday, September 3, 2013 3:41 PM
> To: Xing, Homer
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] add all versions of built-in function "select"
>
> Homer,
>
> It's better to enhance our autoneration script to generate the select vector
> functions. The attached is the patch, please review and give your comments,
> thanks.
>
>
> On Mon, Sep 02, 2013 at 01:03:30PM +0800, Homer Hsing wrote:
> > some data type was missing in vector version of built-in function "select"
> > this patch adds missing versions.
> >
> > Signed-off-by: Homer Hsing <homer.xing at intel.com>
> > ---
> > backend/src/ocl_stdlib.tmpl.h | 107
> > +++++++++++++++++++++++++++++++++---------
> > 1 file changed, 84 insertions(+), 23 deletions(-)
> >
> > diff --git a/backend/src/ocl_stdlib.tmpl.h
> > b/backend/src/ocl_stdlib.tmpl.h index 7d8d480..8bb15e6 100644
> > --- a/backend/src/ocl_stdlib.tmpl.h
> > +++ b/backend/src/ocl_stdlib.tmpl.h
> > @@ -1439,29 +1439,90 @@ DEF(float, int) DEF(float, uint) #undef DEF
> >
> > -// This will be optimized out by LLVM and will output LLVM select
> > instructions -#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
> > -INLINE_OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4
> > cond) { \
> > - TYPE4 dst; \
> > - const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
> > - const TYPE x1 = src1.x; \
> > - const TYPE y0 = src0.y; \
> > - const TYPE y1 = src1.y; \
> > - const TYPE z0 = src0.z; \
> > - const TYPE z1 = src1.z; \
> > - const TYPE w0 = src0.w; \
> > - const TYPE w1 = src1.w; \
> > - dst.x = (cond.x & MASK) ? x1 : x0; \
> > - dst.y = (cond.y & MASK) ? y1 : y0; \
> > - dst.z = (cond.z & MASK) ? z1 : z0; \
> > - dst.w = (cond.w & MASK) ? w1 : w0; \
> > - return dst; \
> > -}
> > -DECL_SELECT4(int4, int, int4, 0x80000000) -DECL_SELECT4(int4, int,
> > uint4, 0x80000000) -DECL_SELECT4(float4, float, int4, 0x80000000)
> > -DECL_SELECT4(float4, float, uint4, 0x80000000) -#undef DECL_SELECT4
> > +#define DEF2(TYPE, COND_TYPE, MASK) \
> > + INLINE_OVERLOADABLE TYPE##2 select(TYPE##2 x, TYPE##2 y,
> COND_TYPE##2 z) { \
> > + return (TYPE##2)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> > + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK))); \
> > + }
> > +
> > +#define DEF3(TYPE, COND_TYPE, MASK) \
> > + INLINE_OVERLOADABLE TYPE##3 select(TYPE##3 x, TYPE##3 y,
> COND_TYPE##3 z) { \
> > + return (TYPE##3)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> > + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> > + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK))); \
> > + }
> > +
> > +#define DEF4(TYPE, COND_TYPE, MASK) \
> > + INLINE_OVERLOADABLE TYPE##4 select(TYPE##4 x, TYPE##4 y,
> COND_TYPE##4 z) { \
> > + return (TYPE##4)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> > + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> > + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> > + select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK))); \
> > + }
> > +
> > +#define DEF8(TYPE, COND_TYPE, MASK) \
> > + INLINE_OVERLOADABLE TYPE##8 select(TYPE##8 x, TYPE##8 y,
> COND_TYPE##8 z) { \
> > + return (TYPE##8)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> > + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> > + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> > + select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> > + select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> > + select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> > + select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> > + select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK))); \
> > + }
> > +
> > +#define DEF16(TYPE, COND_TYPE, MASK) \
> > + INLINE_OVERLOADABLE TYPE##16 select(TYPE##16 x, TYPE##16 y,
> COND_TYPE##16 z) { \
> > + return (TYPE##16)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> > + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> > + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> > + select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> > + select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> > + select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> > + select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> > + select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK)), \
> > + select(x.s8, y.s8, (COND_TYPE)(z.s8 & MASK)), \
> > + select(x.s9, y.s9, (COND_TYPE)(z.s9 & MASK)), \
> > + select(x.sa, y.sa, (COND_TYPE)(z.sa & MASK)), \
> > + select(x.sb, y.sb, (COND_TYPE)(z.sb & MASK)), \
> > + select(x.sc, y.sc, (COND_TYPE)(z.sc & MASK)), \
> > + select(x.sd, y.sd, (COND_TYPE)(z.sd & MASK)), \
> > + select(x.se, y.se, (COND_TYPE)(z.se & MASK)), \
> > + select(x.sf, y.sf, (COND_TYPE)(z.sf & MASK))); \
> > + }
> > +
> > +#define DEF(TYPE, COND_TYPE, MASK) \
> > + DEF2(TYPE, COND_TYPE, MASK) \
> > + DEF3(TYPE, COND_TYPE, MASK) \
> > + DEF4(TYPE, COND_TYPE, MASK) \
> > + DEF8(TYPE, COND_TYPE, MASK) \
> > + DEF16(TYPE, COND_TYPE, MASK)
> > +
> > +DEF(char, char, 0x80)
> > +DEF(char, uchar, 0x80)
> > +DEF(uchar, char, 0x80)
> > +DEF(uchar, uchar, 0x80)
> > +DEF(short, short, 0x8000)
> > +DEF(short, ushort, 0x8000)
> > +DEF(ushort, short, 0x8000)
> > +DEF(ushort, ushort, 0x8000)
> > +DEF(int, int, 0x80000000)
> > +DEF(int, uint, 0x80000000)
> > +DEF(uint, int, 0x80000000)
> > +DEF(uint, uint, 0x80000000)
> > +DEF(long, long, 0x8000000000000000UL) DEF(long, ulong,
> > +0x8000000000000000UL) DEF(ulong, long, 0x8000000000000000UL)
> > +DEF(ulong, ulong, 0x8000000000000000UL) DEF(float, int, 0x80000000)
> > +DEF(float, uint, 0x80000000) #undef DEF #undef DEF2 #undef DEF3
> > +#undef DEF4 #undef DEF8 #undef DEF16
> >
> >
> > //////////////////////////////////////////////////////////////////////
> > /////// // Common Functions (see 6.11.4 of OCL 1.1 spec)
> > --
> > 1.8.1.2
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
-------------- next part --------------
A non-text attachment was scrubbed...
Name: out.log
Type: application/octet-stream
Size: 69863 bytes
Desc: out.log
URL: <http://lists.freedesktop.org/archives/beignet/attachments/20130903/ddd48967/attachment-0001.obj>
More information about the Beignet
mailing list