[Beignet] [PATCH v2 1/4] replace sin/cos with llvm intrinsic.

Zhigang Gong zhigang.gong at linux.intel.com
Thu Feb 5 21:06:42 PST 2015


This version LGTM, will push latter, thanks.

On Fri, Jan 30, 2015 at 02:24:31PM +0800, xionghu.luo at intel.com wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
> 
> replace sin/cos non-stardard math intrinsic call with llvm intrinsic.
> translate them to llvm.xxx for fast path, refine the calls to call
> emitUnaryCallInst.
> v2: some file changes like ocl_math.tmpl.cl and hxx missed.
> 
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
>  backend/src/libocl/tmpl/ocl_math.tmpl.cl   |  4 +-
>  backend/src/llvm/llvm_gen_backend.cpp      | 59 ++++++++----------------------
>  backend/src/llvm/llvm_gen_ocl_function.hxx |  2 -
>  3 files changed, 18 insertions(+), 47 deletions(-)
> 
> diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> index 49c4efa..8f726ff 100644
> --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> @@ -24,8 +24,8 @@
>  constant int __ocl_math_fastpath_flag = 1;
>  
>  PURE CONST float __gen_ocl_fabs(float x);
> -PURE CONST float __gen_ocl_sin(float x);
> -PURE CONST float __gen_ocl_cos(float x);
> +CONST float __gen_ocl_sin(float x) __asm("llvm.sin" ".f32");
> +CONST float __gen_ocl_cos(float x) __asm("llvm.cos" ".f32");
>  PURE CONST float __gen_ocl_sqrt(float x);
>  PURE CONST float __gen_ocl_rsqrt(float x);
>  PURE CONST float __gen_ocl_log(float x);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index d10d5c0..34c571e 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2794,13 +2794,13 @@ error:
>            case Intrinsic::bswap:
>              this->newRegister(&I);
>            break;
> +          case Intrinsic::fabs:
>            case Intrinsic::sqrt:
>            case Intrinsic::ceil:
>            case Intrinsic::fma:
>            case Intrinsic::trunc:
> -            this->newRegister(&I);
> -          break;
> -          case Intrinsic::fabs:
> +          case Intrinsic::sin:
> +          case Intrinsic::cos:
>              this->newRegister(&I);
>            break;
>            default:
> @@ -2854,8 +2854,6 @@ error:
>        case GEN_OCL_FBH:
>        case GEN_OCL_FBL:
>        case GEN_OCL_CBIT:
> -      case GEN_OCL_COS:
> -      case GEN_OCL_SIN:
>        case GEN_OCL_SQR:
>        case GEN_OCL_RSQ:
>        case GEN_OCL_LOG:
> @@ -3056,6 +3054,13 @@ error:
>      if (Function *F = I.getCalledFunction()) {
>        if (F->getIntrinsicID() != 0) {
>          const ir::Function &fn = ctx.getFunction();
> +
> +        // Get the function arguments
> +        CallSite CS(&I);
> +        CallSite::arg_iterator AI = CS.arg_begin();
> +#if GBE_DEBUG
> +        CallSite::arg_iterator AE = CS.arg_end();
> +#endif /* GBE_DEBUG */
>          switch (F->getIntrinsicID()) {
>            case Intrinsic::stacksave:
>            {
> @@ -3212,29 +3217,6 @@ error:
>              }
>            }
>            break;
> -          case Intrinsic::sqrt:
> -          {
> -            const ir::Register dst = this->getRegister(&I);
> -            const ir::Register src = this->getRegister(I.getOperand(0));
> -            ctx.ALU1(ir::OP_SQR, ir::TYPE_FLOAT, dst, src);
> -          }
> -          break;
> -          case Intrinsic::fabs:
> -          {
> -            ir::Type srcType = getType(ctx, I.getType());
> -            const ir::Register dst = this->getRegister(&I);
> -            const ir::Register src = this->getRegister(I.getOperand(0));
> -            ctx.ALU1(ir::OP_ABS, srcType, dst, src);
> -          }
> -          break;
> -          case Intrinsic::ceil:
> -          {
> -            ir::Type srcType = getType(ctx, I.getType());
> -            const ir::Register dst = this->getRegister(&I);
> -            const ir::Register src = this->getRegister(I.getOperand(0));
> -            ctx.ALU1(ir::OP_RNDU, srcType, dst, src);
> -          }
> -          break;
>            case Intrinsic::ctlz:
>            {
>              Type *llvmDstType = I.getType();
> @@ -3286,19 +3268,12 @@ error:
>              ctx.MAD(srcType, dst, src0, src1, src2);
>            }
>            break;
> -          case Intrinsic::trunc:
> -          {
> -            Type *llvmDstType = I.getType();
> -            Type *llvmSrcType = I.getOperand(0)->getType();
> -            ir::Type dstType = getType(ctx, llvmDstType);
> -            ir::Type srcType = getType(ctx, llvmSrcType);
> -            GBE_ASSERT(srcType == dstType);
> -
> -            const ir::Register dst = this->getRegister(&I);
> -            const ir::Register src = this->getRegister(I.getOperand(0));
> -            ctx.RNDZ(dstType, dst, src);
> -          }
> -          break;
> +          case Intrinsic::sqrt: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
> +          case Intrinsic::ceil: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
> +          case Intrinsic::fabs: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
> +          case Intrinsic::trunc: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
> +          case Intrinsic::sin: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
> +          case Intrinsic::cos: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
>            default: NOT_IMPLEMENTED;
>          }
>        } else {
> @@ -3367,8 +3342,6 @@ error:
>              ctx.REGION(dst, src, x.getIntegerValue());
>              break;
>            }
> -          case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
> -          case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
>            case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
>            case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break;
>            case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 8ec8336..0ae7ec2 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -20,8 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
>  
>  // Math function
>  DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs)
> -DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)
> -DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
>  DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
>  DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)
>  DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log)
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list