[Beignet] [PATCH] add all versions of built-in function "select"
Xing, Homer
homer.xing at intel.com
Tue Sep 3 00:40:11 PDT 2013
OK. Your patch looks good to me.
-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com]
Sent: Tuesday, September 3, 2013 3:41 PM
To: Xing, Homer
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH] add all versions of built-in function "select"
Homer,
It's better to enhance our autoneration script to generate the select vector functions. The attached is the patch, please review and give your comments, thanks.
On Mon, Sep 02, 2013 at 01:03:30PM +0800, Homer Hsing wrote:
> some data type was missing in vector version of built-in function "select"
> this patch adds missing versions.
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/ocl_stdlib.tmpl.h | 107
> +++++++++++++++++++++++++++++++++---------
> 1 file changed, 84 insertions(+), 23 deletions(-)
>
> diff --git a/backend/src/ocl_stdlib.tmpl.h
> b/backend/src/ocl_stdlib.tmpl.h index 7d8d480..8bb15e6 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -1439,29 +1439,90 @@ DEF(float, int) DEF(float, uint) #undef DEF
>
> -// This will be optimized out by LLVM and will output LLVM select
> instructions -#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
> -INLINE_OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4
> cond) { \
> - TYPE4 dst; \
> - const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
> - const TYPE x1 = src1.x; \
> - const TYPE y0 = src0.y; \
> - const TYPE y1 = src1.y; \
> - const TYPE z0 = src0.z; \
> - const TYPE z1 = src1.z; \
> - const TYPE w0 = src0.w; \
> - const TYPE w1 = src1.w; \
> - dst.x = (cond.x & MASK) ? x1 : x0; \
> - dst.y = (cond.y & MASK) ? y1 : y0; \
> - dst.z = (cond.z & MASK) ? z1 : z0; \
> - dst.w = (cond.w & MASK) ? w1 : w0; \
> - return dst; \
> -}
> -DECL_SELECT4(int4, int, int4, 0x80000000) -DECL_SELECT4(int4, int,
> uint4, 0x80000000) -DECL_SELECT4(float4, float, int4, 0x80000000)
> -DECL_SELECT4(float4, float, uint4, 0x80000000) -#undef DECL_SELECT4
> +#define DEF2(TYPE, COND_TYPE, MASK) \
> + INLINE_OVERLOADABLE TYPE##2 select(TYPE##2 x, TYPE##2 y, COND_TYPE##2 z) { \
> + return (TYPE##2)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK))); \
> + }
> +
> +#define DEF3(TYPE, COND_TYPE, MASK) \
> + INLINE_OVERLOADABLE TYPE##3 select(TYPE##3 x, TYPE##3 y, COND_TYPE##3 z) { \
> + return (TYPE##3)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK))); \
> + }
> +
> +#define DEF4(TYPE, COND_TYPE, MASK) \
> + INLINE_OVERLOADABLE TYPE##4 select(TYPE##4 x, TYPE##4 y, COND_TYPE##4 z) { \
> + return (TYPE##4)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> + select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK))); \
> + }
> +
> +#define DEF8(TYPE, COND_TYPE, MASK) \
> + INLINE_OVERLOADABLE TYPE##8 select(TYPE##8 x, TYPE##8 y, COND_TYPE##8 z) { \
> + return (TYPE##8)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> + select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> + select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> + select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> + select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> + select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK))); \
> + }
> +
> +#define DEF16(TYPE, COND_TYPE, MASK) \
> + INLINE_OVERLOADABLE TYPE##16 select(TYPE##16 x, TYPE##16 y, COND_TYPE##16 z) { \
> + return (TYPE##16)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> + select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> + select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> + select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> + select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> + select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> + select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> + select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK)), \
> + select(x.s8, y.s8, (COND_TYPE)(z.s8 & MASK)), \
> + select(x.s9, y.s9, (COND_TYPE)(z.s9 & MASK)), \
> + select(x.sa, y.sa, (COND_TYPE)(z.sa & MASK)), \
> + select(x.sb, y.sb, (COND_TYPE)(z.sb & MASK)), \
> + select(x.sc, y.sc, (COND_TYPE)(z.sc & MASK)), \
> + select(x.sd, y.sd, (COND_TYPE)(z.sd & MASK)), \
> + select(x.se, y.se, (COND_TYPE)(z.se & MASK)), \
> + select(x.sf, y.sf, (COND_TYPE)(z.sf & MASK))); \
> + }
> +
> +#define DEF(TYPE, COND_TYPE, MASK) \
> + DEF2(TYPE, COND_TYPE, MASK) \
> + DEF3(TYPE, COND_TYPE, MASK) \
> + DEF4(TYPE, COND_TYPE, MASK) \
> + DEF8(TYPE, COND_TYPE, MASK) \
> + DEF16(TYPE, COND_TYPE, MASK)
> +
> +DEF(char, char, 0x80)
> +DEF(char, uchar, 0x80)
> +DEF(uchar, char, 0x80)
> +DEF(uchar, uchar, 0x80)
> +DEF(short, short, 0x8000)
> +DEF(short, ushort, 0x8000)
> +DEF(ushort, short, 0x8000)
> +DEF(ushort, ushort, 0x8000)
> +DEF(int, int, 0x80000000)
> +DEF(int, uint, 0x80000000)
> +DEF(uint, int, 0x80000000)
> +DEF(uint, uint, 0x80000000)
> +DEF(long, long, 0x8000000000000000UL) DEF(long, ulong,
> +0x8000000000000000UL) DEF(ulong, long, 0x8000000000000000UL)
> +DEF(ulong, ulong, 0x8000000000000000UL) DEF(float, int, 0x80000000)
> +DEF(float, uint, 0x80000000) #undef DEF #undef DEF2 #undef DEF3
> +#undef DEF4 #undef DEF8 #undef DEF16
>
>
> //////////////////////////////////////////////////////////////////////
> /////// // Common Functions (see 6.11.4 of OCL 1.1 spec)
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list