[Beignet] [PATCH v2 1/4] replace sin/cos with llvm intrinsic.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu Feb 5 21:06:42 PST 2015
This version LGTM, will push latter, thanks.
On Fri, Jan 30, 2015 at 02:24:31PM +0800, xionghu.luo at intel.com wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
>
> replace sin/cos non-stardard math intrinsic call with llvm intrinsic.
> translate them to llvm.xxx for fast path, refine the calls to call
> emitUnaryCallInst.
> v2: some file changes like ocl_math.tmpl.cl and hxx missed.
>
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
> backend/src/libocl/tmpl/ocl_math.tmpl.cl | 4 +-
> backend/src/llvm/llvm_gen_backend.cpp | 59 ++++++++----------------------
> backend/src/llvm/llvm_gen_ocl_function.hxx | 2 -
> 3 files changed, 18 insertions(+), 47 deletions(-)
>
> diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> index 49c4efa..8f726ff 100644
> --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
> @@ -24,8 +24,8 @@
> constant int __ocl_math_fastpath_flag = 1;
>
> PURE CONST float __gen_ocl_fabs(float x);
> -PURE CONST float __gen_ocl_sin(float x);
> -PURE CONST float __gen_ocl_cos(float x);
> +CONST float __gen_ocl_sin(float x) __asm("llvm.sin" ".f32");
> +CONST float __gen_ocl_cos(float x) __asm("llvm.cos" ".f32");
> PURE CONST float __gen_ocl_sqrt(float x);
> PURE CONST float __gen_ocl_rsqrt(float x);
> PURE CONST float __gen_ocl_log(float x);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index d10d5c0..34c571e 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2794,13 +2794,13 @@ error:
> case Intrinsic::bswap:
> this->newRegister(&I);
> break;
> + case Intrinsic::fabs:
> case Intrinsic::sqrt:
> case Intrinsic::ceil:
> case Intrinsic::fma:
> case Intrinsic::trunc:
> - this->newRegister(&I);
> - break;
> - case Intrinsic::fabs:
> + case Intrinsic::sin:
> + case Intrinsic::cos:
> this->newRegister(&I);
> break;
> default:
> @@ -2854,8 +2854,6 @@ error:
> case GEN_OCL_FBH:
> case GEN_OCL_FBL:
> case GEN_OCL_CBIT:
> - case GEN_OCL_COS:
> - case GEN_OCL_SIN:
> case GEN_OCL_SQR:
> case GEN_OCL_RSQ:
> case GEN_OCL_LOG:
> @@ -3056,6 +3054,13 @@ error:
> if (Function *F = I.getCalledFunction()) {
> if (F->getIntrinsicID() != 0) {
> const ir::Function &fn = ctx.getFunction();
> +
> + // Get the function arguments
> + CallSite CS(&I);
> + CallSite::arg_iterator AI = CS.arg_begin();
> +#if GBE_DEBUG
> + CallSite::arg_iterator AE = CS.arg_end();
> +#endif /* GBE_DEBUG */
> switch (F->getIntrinsicID()) {
> case Intrinsic::stacksave:
> {
> @@ -3212,29 +3217,6 @@ error:
> }
> }
> break;
> - case Intrinsic::sqrt:
> - {
> - const ir::Register dst = this->getRegister(&I);
> - const ir::Register src = this->getRegister(I.getOperand(0));
> - ctx.ALU1(ir::OP_SQR, ir::TYPE_FLOAT, dst, src);
> - }
> - break;
> - case Intrinsic::fabs:
> - {
> - ir::Type srcType = getType(ctx, I.getType());
> - const ir::Register dst = this->getRegister(&I);
> - const ir::Register src = this->getRegister(I.getOperand(0));
> - ctx.ALU1(ir::OP_ABS, srcType, dst, src);
> - }
> - break;
> - case Intrinsic::ceil:
> - {
> - ir::Type srcType = getType(ctx, I.getType());
> - const ir::Register dst = this->getRegister(&I);
> - const ir::Register src = this->getRegister(I.getOperand(0));
> - ctx.ALU1(ir::OP_RNDU, srcType, dst, src);
> - }
> - break;
> case Intrinsic::ctlz:
> {
> Type *llvmDstType = I.getType();
> @@ -3286,19 +3268,12 @@ error:
> ctx.MAD(srcType, dst, src0, src1, src2);
> }
> break;
> - case Intrinsic::trunc:
> - {
> - Type *llvmDstType = I.getType();
> - Type *llvmSrcType = I.getOperand(0)->getType();
> - ir::Type dstType = getType(ctx, llvmDstType);
> - ir::Type srcType = getType(ctx, llvmSrcType);
> - GBE_ASSERT(srcType == dstType);
> -
> - const ir::Register dst = this->getRegister(&I);
> - const ir::Register src = this->getRegister(I.getOperand(0));
> - ctx.RNDZ(dstType, dst, src);
> - }
> - break;
> + case Intrinsic::sqrt: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
> + case Intrinsic::ceil: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
> + case Intrinsic::fabs: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
> + case Intrinsic::trunc: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
> + case Intrinsic::sin: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
> + case Intrinsic::cos: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
> default: NOT_IMPLEMENTED;
> }
> } else {
> @@ -3367,8 +3342,6 @@ error:
> ctx.REGION(dst, src, x.getIntegerValue());
> break;
> }
> - case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
> - case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
> case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
> case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break;
> case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 8ec8336..0ae7ec2 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -20,8 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
>
> // Math function
> DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs)
> -DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)
> -DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
> DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
> DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt)
> DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log)
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list