[Beignet] [PATCH] add all versions of built-in function "select"

Zhigang Gong zhigang.gong at linux.intel.com
Tue Sep 3 00:40:53 PDT 2013


Homer,

It's better to enhance our autoneration script to generate the select
vector functions. The attached is the patch, please review and give
your comments, thanks.


On Mon, Sep 02, 2013 at 01:03:30PM +0800, Homer Hsing wrote:
> some data type was missing in vector version of built-in function "select"
> this patch adds missing versions.
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/ocl_stdlib.tmpl.h | 107 +++++++++++++++++++++++++++++++++---------
>  1 file changed, 84 insertions(+), 23 deletions(-)
> 
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 7d8d480..8bb15e6 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -1439,29 +1439,90 @@ DEF(float, int)
>  DEF(float, uint)
>  #undef DEF
>  
> -// This will be optimized out by LLVM and will output LLVM select instructions
> -#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
> -INLINE_OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
> -  TYPE4 dst; \
> -  const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
> -  const TYPE x1 = src1.x; \
> -  const TYPE y0 = src0.y; \
> -  const TYPE y1 = src1.y; \
> -  const TYPE z0 = src0.z; \
> -  const TYPE z1 = src1.z; \
> -  const TYPE w0 = src0.w; \
> -  const TYPE w1 = src1.w; \
> -  dst.x = (cond.x & MASK) ? x1 : x0; \
> -  dst.y = (cond.y & MASK) ? y1 : y0; \
> -  dst.z = (cond.z & MASK) ? z1 : z0; \
> -  dst.w = (cond.w & MASK) ? w1 : w0; \
> -  return dst; \
> -}
> -DECL_SELECT4(int4, int, int4, 0x80000000)
> -DECL_SELECT4(int4, int, uint4, 0x80000000)
> -DECL_SELECT4(float4, float, int4, 0x80000000)
> -DECL_SELECT4(float4, float, uint4, 0x80000000)
> -#undef DECL_SELECT4
> +#define DEF2(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##2 select(TYPE##2 x, TYPE##2 y, COND_TYPE##2 z) { \
> +    return (TYPE##2)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK))); \
> +  }
> +
> +#define DEF3(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##3 select(TYPE##3 x, TYPE##3 y, COND_TYPE##3 z) { \
> +    return (TYPE##3)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK))); \
> +  }
> +
> +#define DEF4(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##4 select(TYPE##4 x, TYPE##4 y, COND_TYPE##4 z) { \
> +    return (TYPE##4)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> +      select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK))); \
> +  }
> +
> +#define DEF8(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##8 select(TYPE##8 x, TYPE##8 y, COND_TYPE##8 z) { \
> +    return (TYPE##8)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> +      select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> +      select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> +      select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> +      select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> +      select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK))); \
> +  }
> +
> +#define DEF16(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##16 select(TYPE##16 x, TYPE##16 y, COND_TYPE##16 z) { \
> +    return (TYPE##16)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> +      select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> +      select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> +      select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> +      select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> +      select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK)), \
> +      select(x.s8, y.s8, (COND_TYPE)(z.s8 & MASK)), \
> +      select(x.s9, y.s9, (COND_TYPE)(z.s9 & MASK)), \
> +      select(x.sa, y.sa, (COND_TYPE)(z.sa & MASK)), \
> +      select(x.sb, y.sb, (COND_TYPE)(z.sb & MASK)), \
> +      select(x.sc, y.sc, (COND_TYPE)(z.sc & MASK)), \
> +      select(x.sd, y.sd, (COND_TYPE)(z.sd & MASK)), \
> +      select(x.se, y.se, (COND_TYPE)(z.se & MASK)), \
> +      select(x.sf, y.sf, (COND_TYPE)(z.sf & MASK))); \
> +  }
> +
> +#define DEF(TYPE, COND_TYPE, MASK) \
> +  DEF2(TYPE, COND_TYPE, MASK) \
> +  DEF3(TYPE, COND_TYPE, MASK) \
> +  DEF4(TYPE, COND_TYPE, MASK) \
> +  DEF8(TYPE, COND_TYPE, MASK) \
> +  DEF16(TYPE, COND_TYPE, MASK)
> +
> +DEF(char, char, 0x80)
> +DEF(char, uchar, 0x80)
> +DEF(uchar, char, 0x80)
> +DEF(uchar, uchar, 0x80)
> +DEF(short, short, 0x8000)
> +DEF(short, ushort, 0x8000)
> +DEF(ushort, short, 0x8000)
> +DEF(ushort, ushort, 0x8000)
> +DEF(int, int, 0x80000000)
> +DEF(int, uint, 0x80000000)
> +DEF(uint, int, 0x80000000)
> +DEF(uint, uint, 0x80000000)
> +DEF(long, long, 0x8000000000000000UL)
> +DEF(long, ulong, 0x8000000000000000UL)
> +DEF(ulong, long, 0x8000000000000000UL)
> +DEF(ulong, ulong, 0x8000000000000000UL)
> +DEF(float, int, 0x80000000)
> +DEF(float, uint, 0x80000000)
> +#undef DEF
> +#undef DEF2
> +#undef DEF3
> +#undef DEF4
> +#undef DEF8
> +#undef DEF16
>  
>  /////////////////////////////////////////////////////////////////////////////
>  // Common Functions (see 6.11.4 of OCL 1.1 spec)
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-GBE-Support-builtin-vector-functions-for-select-auto.patch
Type: text/x-diff
Size: 3282 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/beignet/attachments/20130903/455d496e/attachment-0001.patch>


More information about the Beignet mailing list