[Beignet] [patch v4] libocl: reimplement clz with lzd instruction instead of fbh.

Zhigang Gong zhigang.gong at linux.intel.com
Tue Jan 27 00:16:50 PST 2015


This patch LGTM, will push latter, thanks.

On Tue, Jan 27, 2015 at 11:39:21AM +0800, xionghu.luo at intel.com wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
> 
> the fbh style is inefficient.
> 
> v2: use llvm.ctlz to call llvm intrinsic instead of beignet non-standard
> intrinsic call style; remove the non-standard clz call path.
> 
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
>  backend/src/libocl/CMakeLists.txt           |  2 +-
>  backend/src/libocl/src/ocl_clz.ll           | 44 ++++++++++++++++
>  backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 78 +++++------------------------
>  backend/src/libocl/tmpl/ocl_integer.tmpl.h  |  9 ++++
>  4 files changed, 67 insertions(+), 66 deletions(-)
>  create mode 100644 backend/src/libocl/src/ocl_clz.ll
> 
> diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
> index 314d373..16f00ee 100644
> --- a/backend/src/libocl/CMakeLists.txt
> +++ b/backend/src/libocl/CMakeLists.txt
> @@ -181,7 +181,7 @@ MACRO(ADD_LL_TO_BC_TARGET M)
>  	)
>  ENDMACRO(ADD_LL_TO_BC_TARGET)
>  
> -SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset)
> +SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz)
>  FOREACH(f ${OCL_LL_MODULES})
>      COPY_THE_LL(${f})
>      ADD_LL_TO_BC_TARGET(${f})
> diff --git a/backend/src/libocl/src/ocl_clz.ll b/backend/src/libocl/src/ocl_clz.ll
> new file mode 100644
> index 0000000..0863b6f
> --- /dev/null
> +++ b/backend/src/libocl/src/ocl_clz.ll
> @@ -0,0 +1,44 @@
> +declare i8 @llvm.ctlz.i8(i8, i1)
> +declare i16 @llvm.ctlz.i16(i16, i1)
> +declare i32 @llvm.ctlz.i32(i32, i1)
> +declare i64 @llvm.ctlz.i64(i64, i1)
> +
> +define i8 @clz_s8(i8 %x) nounwind readnone alwaysinline {
> +  %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0)
> +  ret i8 %call
> +}
> +
> +define i8 @clz_u8(i8 %x) nounwind readnone alwaysinline {
> +  %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0)
> +  ret i8 %call
> +}
> +
> +define i16 @clz_s16(i16 %x) nounwind readnone alwaysinline {
> +  %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0)
> +  ret i16 %call
> +}
> +
> +define i16 @clz_u16(i16 %x) nounwind readnone alwaysinline {
> +  %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0)
> +  ret i16 %call
> +}
> +
> +define i32 @clz_s32(i32 %x) nounwind readnone alwaysinline {
> +  %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
> +  ret i32 %call
> +}
> +
> +define i32 @clz_u32(i32 %x) nounwind readnone alwaysinline {
> +  %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
> +  ret i32 %call
> +}
> +
> +define i64 @clz_s64(i64 %x) nounwind readnone alwaysinline {
> +  %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0)
> +  ret i64 %call
> +}
> +
> +define i64 @clz_u64(i64 %x) nounwind readnone alwaysinline {
> +  %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0)
> +  ret i64 %call
> +}
> diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
> index 6da0bab..a5e1dbc 100644
> --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
> @@ -19,6 +19,8 @@
>  
>  PURE CONST uint __gen_ocl_fbh(uint);
>  PURE CONST uint __gen_ocl_fbl(uint);
> +
> +
>  PURE CONST OVERLOADABLE uint __gen_ocl_cbit(uint);
>  PURE CONST OVERLOADABLE uint __gen_ocl_cbit(int);
>  PURE CONST OVERLOADABLE uint __gen_ocl_cbit(ushort);
> @@ -26,71 +28,17 @@ PURE CONST OVERLOADABLE uint __gen_ocl_cbit(short);
>  PURE CONST OVERLOADABLE uint __gen_ocl_cbit(uchar);
>  PURE CONST OVERLOADABLE uint __gen_ocl_cbit(char);
>  
> -OVERLOADABLE char clz(char x) {
> -  if (x < 0)
> -    return 0;
> -  if (x == 0)
> -    return 8;
> -  return __gen_ocl_fbh(x) - 24;
> -}
> -
> -OVERLOADABLE uchar clz(uchar x) {
> -  if (x == 0)
> -    return 8;
> -  return __gen_ocl_fbh(x) - 24;
> -}
> -
> -OVERLOADABLE short clz(short x) {
> -  if (x < 0)
> -    return 0;
> -  if (x == 0)
> -    return 16;
> -  return __gen_ocl_fbh(x) - 16;
> -}
> -
> -OVERLOADABLE ushort clz(ushort x) {
> -  if (x == 0)
> -    return 16;
> -  return __gen_ocl_fbh(x) - 16;
> -}
> -
> -OVERLOADABLE int clz(int x) {
> -  if (x < 0)
> -    return 0;
> -  if (x == 0)
> -    return 32;
> -  return __gen_ocl_fbh(x);
> -}
> -
> -OVERLOADABLE uint clz(uint x) {
> -  if (x == 0)
> -    return 32;
> -  return __gen_ocl_fbh(x);
> -}
> -
> -OVERLOADABLE long clz(long x) {
> -  union { int i[2]; long x; } u;
> -  u.x = x;
> -  if (u.i[1] & 0x80000000u)
> -    return 0;
> -  if (u.i[1] == 0 && u.i[0] == 0)
> -    return 64;
> -  uint v = clz(u.i[1]);
> -  if(v == 32)
> -    v += clz(u.i[0]);
> -  return v;
> -}
> -
> -OVERLOADABLE ulong clz(ulong x) {
> -  if (x == 0)
> -    return 64;
> -  union { uint i[2]; ulong x; } u;
> -  u.x = x;
> -  uint v = clz(u.i[1]);
> -  if(v == 32)
> -    v += clz(u.i[0]);
> -  return v;
> -}
> +#define SDEF(TYPE, TYPE_NAME, SIZE)        \
> +OVERLOADABLE TYPE clz(TYPE x){ return clz_##TYPE_NAME##SIZE(x);}
> +SDEF(char, s, 8);
> +SDEF(uchar, u, 8);
> +SDEF(short, s, 16);
> +SDEF(ushort, u, 16);
> +SDEF(int, s, 32);
> +SDEF(uint, u, 32);
> +SDEF(long, s, 64);
> +SDEF(ulong, u, 64);
> +#undef SDEF
>  
>  #define SDEF(TYPE)        \
>  OVERLOADABLE TYPE popcount(TYPE x){ return __gen_ocl_cbit(x);}
> diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
> index f067b8d..4b3b5ae 100644
> --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h
> +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
> @@ -45,6 +45,15 @@ OVERLOADABLE uint clz(uint x);
>  OVERLOADABLE long clz(long x);
>  OVERLOADABLE ulong clz(ulong x);
>  
> +char   clz_s8(char);
> +uchar  clz_u8(uchar);
> +short  clz_s16(short);
> +ushort clz_u16(ushort);
> +int    clz_s32(int);
> +uint   clz_u32(uint);
> +long   clz_s64(long);
> +ulong  clz_u64(ulong);
> +
>  OVERLOADABLE char popcount(char x);
>  OVERLOADABLE uchar popcount(uchar x);
>  OVERLOADABLE short popcount(short x);
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list