[Beignet] [PATCH] Refined the fmax and fmin builtins.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Mar 24 22:30:54 PDT 2014


LGTM, pushed, thanks.

On Mon, Mar 24, 2014 at 04:27:31PM +0800, Yang Rong wrote:
> Because GEN's select instruction with cmod .l and .ge will handle NaN case, so
> use the compare and select instruction in gen ir for fmax and fmin, and will be
> optimized to one sel_cmp, need not check isnan.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/llvm/llvm_gen_backend.cpp      | 18 ++++++++++++++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx |  2 ++
>  backend/src/ocl_stdlib.tmpl.h              | 10 ++++------
>  3 files changed, 24 insertions(+), 6 deletions(-)
> 
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 49fbc7b..c459f25 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2134,6 +2134,8 @@ namespace gbe
>        case GEN_OCL_UPSAMPLE_INT:
>        case GEN_OCL_UPSAMPLE_LONG:
>        case GEN_OCL_MAD:
> +      case GEN_OCL_FMAX:
> +      case GEN_OCL_FMIN:
>        case GEN_OCL_SADD_SAT_CHAR:
>        case GEN_OCL_SADD_SAT_SHORT:
>        case GEN_OCL_SADD_SAT_INT:
> @@ -2622,6 +2624,22 @@ namespace gbe
>              ctx.MAD(getType(ctx, I.getType()), dst, src0, src1, src2);
>              break;
>            }
> +          case GEN_OCL_FMAX:
> +          case GEN_OCL_FMIN:{
> +            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
> +            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
> +            const ir::Register dst = this->getRegister(&I);
> +            const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL);
> +            //Becasue cmp's sources are same as sel's source, so cmp instruction and sel
> +            //instruction will be merged to one sel_cmp instruction in the gen selection
> +            //Add two intruction here for simple.
> +            if(it->second == GEN_OCL_FMAX)
> +              ctx.GE(getType(ctx, I.getType()), cmp, src0, src1);
> +            else
> +              ctx.LT(getType(ctx, I.getType()), cmp, src0, src1);
> +            ctx.SEL(getType(ctx, I.getType()), dst, cmp, src0, src1);
> +            break;
> +          }
>            case GEN_OCL_HADD: {
>              GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
>              GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 00d69f0..5bf794a 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -33,6 +33,8 @@ DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde)
>  DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu)
>  DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd)
>  DECL_LLVM_GEN_FUNCTION(MAD, __gen_ocl_mad)
> +DECL_LLVM_GEN_FUNCTION(FMAX, __gen_ocl_fmax)
> +DECL_LLVM_GEN_FUNCTION(FMIN, __gen_ocl_fmin)
>  
>  // Barrier function
>  DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local)
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index e3ac632..e823b5f 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -3169,6 +3169,8 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_exp10(float x){
>  #define remainder __gen_ocl_internal_remainder
>  #define ldexp __gen_ocl_internal_ldexp
>  PURE CONST float __gen_ocl_mad(float a, float b, float c);
> +PURE CONST float __gen_ocl_fmax(float a, float b);
> +PURE CONST float __gen_ocl_fmin(float a, float b);
>  INLINE_OVERLOADABLE float mad(float a, float b, float c) {
>    return __gen_ocl_mad(a, b, c);
>  }
> @@ -3224,14 +3226,10 @@ DECL_MIN_MAX_CLAMP(long)
>  DECL_MIN_MAX_CLAMP(ulong)
>  #undef DECL_MIN_MAX_CLAMP
>  INLINE_OVERLOADABLE float max(float a, float b) {
> -  if(isnan(b))
> -    return a;
> -  return a > b ? a : b;
> +  return __gen_ocl_fmax(a, b);
>  }
>  INLINE_OVERLOADABLE float min(float a, float b) {
> -  if(isnan(b))
> -    return a;
> -  return a < b ? a : b;
> +  return __gen_ocl_fmin(a, b);
>  }
>  INLINE_OVERLOADABLE float clamp(float v, float l, float u) {
>    return max(min(v, u), l);
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list