[Beignet] [PATCH 1/2] Add other unsigned interger types mask type of shuffle and shuffle2.

Zhigang Gong zhigang.gong at linux.intel.com
Wed Nov 13 00:01:35 PST 2013


LGTM, pushed, thanks.

On Tue, Nov 12, 2013 at 05:17:13PM +0800, Yang Rong wrote:
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/ocl_stdlib.tmpl.h | 103 ++++++++++++++++++++++++------------------
>  1 file changed, 59 insertions(+), 44 deletions(-)
> 
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 50795ef..df663ea 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -1933,16 +1933,16 @@ DECL_UNTYPED_RW_ALL(double)
>  /////////////////////////////////////////////////////////////////////////////
>  // Miscellaneous Vector Functions (see 6.11.12 of OCL 1.1 spec)
>  /////////////////////////////////////////////////////////////////////////////
> -#define DEC2(TYPE, XTYPE) \
> -  INLINE_OVERLOADABLE TYPE##2 shuffle(XTYPE x, uint2 mask) { \
> +#define DEC2(TYPE, XTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##2 shuffle(XTYPE x, MASKTYPE##2 mask) { \
>      TYPE##2 y; \
>      y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
>      y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
>      return y; \
>    }
>  
> -#define DEC4(TYPE, XTYPE) \
> -  INLINE_OVERLOADABLE TYPE##4 shuffle(XTYPE x, uint4 mask) { \
> +#define DEC4(TYPE, XTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##4 shuffle(XTYPE x, MASKTYPE##4 mask) { \
>      TYPE##4 y; \
>      y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
>      y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
> @@ -1951,8 +1951,8 @@ DECL_UNTYPED_RW_ALL(double)
>      return y; \
>    }
>  
> -#define DEC8(TYPE, XTYPE) \
> -  INLINE_OVERLOADABLE TYPE##8 shuffle(XTYPE x, uint8 mask) { \
> +#define DEC8(TYPE, XTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##8 shuffle(XTYPE x, MASKTYPE##8 mask) { \
>      TYPE##8 y; \
>      y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
>      y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
> @@ -1965,8 +1965,8 @@ DECL_UNTYPED_RW_ALL(double)
>      return y; \
>    }
>  
> -#define DEC16(TYPE, XTYPE) \
> -  INLINE_OVERLOADABLE TYPE##16 shuffle(XTYPE x, uint16 mask) { \
> +#define DEC16(TYPE, XTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##16 shuffle(XTYPE x, MASKTYPE##16 mask) { \
>      TYPE##16 y; \
>      y.s0 = ((TYPE *) &x)[mask.s0 & (vec_step(x) - 1)]; \
>      y.s1 = ((TYPE *) &x)[mask.s1 & (vec_step(x) - 1)]; \
> @@ -1987,11 +1987,18 @@ DECL_UNTYPED_RW_ALL(double)
>      return y; \
>    }
>  
> +#define DEFMASK(TYPE, MASKTYPE) \
> +  DEC2(TYPE, TYPE##2, MASKTYPE); DEC2(TYPE, TYPE##4, MASKTYPE); DEC2(TYPE, TYPE##8, MASKTYPE); DEC2(TYPE, TYPE##16, MASKTYPE) \
> +  DEC4(TYPE, TYPE##2, MASKTYPE); DEC4(TYPE, TYPE##4, MASKTYPE); DEC4(TYPE, TYPE##8, MASKTYPE); DEC4(TYPE, TYPE##16, MASKTYPE) \
> +  DEC8(TYPE, TYPE##2, MASKTYPE); DEC8(TYPE, TYPE##4, MASKTYPE); DEC8(TYPE, TYPE##8, MASKTYPE); DEC8(TYPE, TYPE##16, MASKTYPE) \
> +  DEC16(TYPE, TYPE##2, MASKTYPE); DEC16(TYPE, TYPE##4, MASKTYPE); DEC16(TYPE, TYPE##8, MASKTYPE); DEC16(TYPE, TYPE##16, MASKTYPE)
> +
>  #define DEF(TYPE) \
> -  DEC2(TYPE, TYPE##2); DEC2(TYPE, TYPE##4); DEC2(TYPE, TYPE##8); DEC2(TYPE, TYPE##16) \
> -  DEC4(TYPE, TYPE##2); DEC4(TYPE, TYPE##4); DEC4(TYPE, TYPE##8); DEC4(TYPE, TYPE##16) \
> -  DEC8(TYPE, TYPE##2); DEC8(TYPE, TYPE##4); DEC8(TYPE, TYPE##8); DEC8(TYPE, TYPE##16) \
> -  DEC16(TYPE, TYPE##2); DEC16(TYPE, TYPE##4); DEC16(TYPE, TYPE##8); DEC16(TYPE, TYPE##16)
> +  DEFMASK(TYPE, uchar) \
> +  DEFMASK(TYPE, ushort) \
> +  DEFMASK(TYPE, uint) \
> +  DEFMASK(TYPE, ulong)
> +
>  DEF(char)
>  DEF(uchar)
>  DEF(short)
> @@ -2002,31 +2009,32 @@ DEF(float)
>  DEF(long)
>  DEF(ulong)
>  #undef DEF
> +#undef DEFMASK
>  #undef DEC2
>  #undef DEC4
>  #undef DEC8
>  #undef DEC16
>  
> -#define DEC2(TYPE, ARGTYPE, TEMPTYPE) \
> -  INLINE_OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, uint2 mask) { \
> +#define DEC2(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##2 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##2 mask) { \
>      return shuffle((TEMPTYPE)(x, y), mask); \
>    }
>  
> -#define DEC2X(TYPE) \
> -  INLINE_OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, uint2 mask) { \
> +#define DEC2X(TYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##2 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##2 mask) { \
>      TYPE##2 z; \
>      z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
>      z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
>      return z; \
>    }
>  
> -#define DEC4(TYPE, ARGTYPE, TEMPTYPE) \
> -  INLINE_OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, uint4 mask) { \
> +#define DEC4(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##4 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##4 mask) { \
>      return shuffle((TEMPTYPE)(x, y), mask); \
>    }
>  
> -#define DEC4X(TYPE) \
> -  INLINE_OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, uint4 mask) { \
> +#define DEC4X(TYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##4 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##4 mask) { \
>      TYPE##4 z; \
>      z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
>      z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
> @@ -2035,13 +2043,13 @@ DEF(ulong)
>      return z; \
>    }
>  
> -#define DEC8(TYPE, ARGTYPE, TEMPTYPE) \
> -  INLINE_OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, uint8 mask) { \
> +#define DEC8(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##8 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##8 mask) { \
>      return shuffle((TEMPTYPE)(x, y), mask); \
>    }
>  
> -#define DEC8X(TYPE) \
> -  INLINE_OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, uint8 mask) { \
> +#define DEC8X(TYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##8 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##8 mask) { \
>      TYPE##8 z; \
>      z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
>      z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
> @@ -2054,13 +2062,13 @@ DEF(ulong)
>      return z; \
>    }
>  
> -#define DEC16(TYPE, ARGTYPE, TEMPTYPE) \
> -  INLINE_OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, uint16 mask) { \
> +#define DEC16(TYPE, ARGTYPE, TEMPTYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##16 shuffle2(ARGTYPE x, ARGTYPE y, MASKTYPE##16 mask) { \
>      return shuffle((TEMPTYPE)(x, y), mask); \
>    }
>  
> -#define DEC16X(TYPE) \
> -  INLINE_OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, uint16 mask) { \
> +#define DEC16X(TYPE, MASKTYPE) \
> +  INLINE_OVERLOADABLE TYPE##16 shuffle2(TYPE##16 x, TYPE##16 y, MASKTYPE##16 mask) { \
>      TYPE##16 z; \
>      z.s0 = mask.s0 < 16 ? ((TYPE *)&x)[mask.s0] : ((TYPE *)&y)[mask.s0 & 15]; \
>      z.s1 = mask.s1 < 16 ? ((TYPE *)&x)[mask.s1] : ((TYPE *)&y)[mask.s1 & 15]; \
> @@ -2081,23 +2089,29 @@ DEF(ulong)
>      return z; \
>    }
>  
> +#define DEFMASK(TYPE, MASKTYPE) \
> +  DEC2(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
> +  DEC2(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
> +  DEC2(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
> +  DEC2X(TYPE, MASKTYPE) \
> +  DEC4(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
> +  DEC4(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
> +  DEC4(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
> +  DEC4X(TYPE, MASKTYPE) \
> +  DEC8(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
> +  DEC8(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
> +  DEC8(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
> +  DEC8X(TYPE, MASKTYPE) \
> +  DEC16(TYPE, TYPE##2, TYPE##4, MASKTYPE) \
> +  DEC16(TYPE, TYPE##4, TYPE##8, MASKTYPE) \
> +  DEC16(TYPE, TYPE##8, TYPE##16, MASKTYPE) \
> +  DEC16X(TYPE, MASKTYPE)
> +
>  #define DEF(TYPE) \
> -  DEC2(TYPE, TYPE##2, TYPE##4) \
> -  DEC2(TYPE, TYPE##4, TYPE##8) \
> -  DEC2(TYPE, TYPE##8, TYPE##16) \
> -  DEC2X(TYPE) \
> -  DEC4(TYPE, TYPE##2, TYPE##4) \
> -  DEC4(TYPE, TYPE##4, TYPE##8) \
> -  DEC4(TYPE, TYPE##8, TYPE##16) \
> -  DEC4X(TYPE) \
> -  DEC8(TYPE, TYPE##2, TYPE##4) \
> -  DEC8(TYPE, TYPE##4, TYPE##8) \
> -  DEC8(TYPE, TYPE##8, TYPE##16) \
> -  DEC8X(TYPE) \
> -  DEC16(TYPE, TYPE##2, TYPE##4) \
> -  DEC16(TYPE, TYPE##4, TYPE##8) \
> -  DEC16(TYPE, TYPE##8, TYPE##16) \
> -  DEC16X(TYPE)
> +  DEFMASK(TYPE, uchar) \
> +  DEFMASK(TYPE, ushort) \
> +  DEFMASK(TYPE, uint) \
> +  DEFMASK(TYPE, ulong)
>  
>  DEF(char)
>  DEF(uchar)
> @@ -2109,6 +2123,7 @@ DEF(float)
>  DEF(long)
>  DEF(ulong)
>  #undef DEF
> +#undef DEFMASK
>  #undef DEC2
>  #undef DEC2X
>  #undef DEC4
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list