[Beignet] [PATCH 3/3] add llvm intrinsic call translate.
Song, Ruiling
ruiling.song at intel.com
Thu Jan 15 00:43:50 PST 2015
> + case Intrinsic::copysign:
> + {
> + ir::Type srcType = getType(ctx, I.getType());
> + const ir::Register dst = this->getRegister(&I);
> + const ir::Register src0 = this->getRegister(I.getOperand(0));
> + const ir::Register src1 = this->getRegister(I.getOperand(1));
> + const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL);
> +
> + const ir::Register tmp1 = ctx.reg(getFamily(srcType));
> + const ir::Register tmp2 = ctx.reg(getFamily(srcType));
> +
> + const ir::RegisterFamily family = getFamily(srcType);
> + const ir::ImmediateIndex zero =
> ctx.newFloatImmediate((float)0.0);
> + const ir::Register zeroReg = ctx.reg(family);
> + ctx.LOADI(srcType, zeroReg, zero);
> +
> + ctx.GE(srcType, cmp, src1, zeroReg);
> + ctx.ALU1(ir::OP_ABS, srcType, tmp1, src0);
> + ctx.SUB(srcType, tmp2, zeroReg, tmp1);
> + ctx.SEL(srcType, dst, cmp, tmp1, tmp2);
For the copysign implementation, I would still prefer you use current implementation in backend/src/libocl/tmpl/ocl_math.tmpl.cl
Gen hardware does not support denorm number. So when the input y is denorm number, the GE compare could not return exact result.
The current implementation in libocl is robust enough for all corner cases. What do you think?
> + }
> + break;
> default: NOT_IMPLEMENTED;
> }
> } else {
> +
> + LibFunc::Func Func;
> + if(isLibFuncFunc(&I, Func))
> + {
> + switch (Func) {
> + case LibFunc::copysignf:
> + {
> + ir::Type srcType = getType(ctx, I.getType());
> + const ir::Register dst = this->getRegister(&I);
> + const ir::Register src0 =
> this->getRegister(I.getOperand(0));
> + const ir::Register src1 =
> this->getRegister(I.getOperand(1));
> + const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL);
> +
> + const ir::Register tmp1 = ctx.reg(getFamily(srcType));
> + const ir::Register tmp2 = ctx.reg(getFamily(srcType));
> +
> + const ir::RegisterFamily family = getFamily(srcType);
> + const ir::ImmediateIndex zero =
> ctx.newFloatImmediate((float)0.0);
> + const ir::Register zeroReg = ctx.reg(family);
> + ctx.LOADI(srcType, zeroReg, zero);
> +
> + ctx.GE(srcType, cmp, src1, zeroReg);
> + ctx.ALU1(ir::OP_ABS, srcType, tmp1, src0);
> + ctx.SUB(srcType, tmp2, zeroReg, tmp1);
And also here.
> + ctx.SEL(srcType, dst, cmp, tmp1, tmp2);
> + }
> + break;
> + default:
> + GBE_ASSERTM(false, "Unsupported libFuncs");
> + }
> + return;
> + }
> +
More information about the Beignet
mailing list