[Beignet] [PATCH 3/3] add llvm intrinsic call translate.

Song, Ruiling ruiling.song at intel.com
Thu Jan 15 00:43:50 PST 2015


> +          case Intrinsic::copysign:
> +          {
> +            ir::Type srcType = getType(ctx, I.getType());
> +            const ir::Register dst = this->getRegister(&I);
> +            const ir::Register src0 = this->getRegister(I.getOperand(0));
> +            const ir::Register src1 = this->getRegister(I.getOperand(1));
> +            const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL);
> +
> +            const ir::Register tmp1 = ctx.reg(getFamily(srcType));
> +            const ir::Register tmp2 = ctx.reg(getFamily(srcType));
> +
> +            const ir::RegisterFamily family = getFamily(srcType);
> +            const ir::ImmediateIndex zero =
> ctx.newFloatImmediate((float)0.0);
> +            const ir::Register zeroReg = ctx.reg(family);
> +            ctx.LOADI(srcType, zeroReg, zero);
> +
> +            ctx.GE(srcType, cmp, src1, zeroReg);
> +            ctx.ALU1(ir::OP_ABS, srcType, tmp1, src0);
> +            ctx.SUB(srcType, tmp2, zeroReg, tmp1);
> +            ctx.SEL(srcType, dst, cmp, tmp1, tmp2);
For the copysign implementation, I would still prefer you use current implementation in backend/src/libocl/tmpl/ocl_math.tmpl.cl
Gen hardware does not support denorm number. So when the input y is denorm number, the GE compare could not return exact result.
The current implementation in libocl is robust enough for all corner cases. What do you think?
> +          }
> +          break;
>            default: NOT_IMPLEMENTED;
>          }
>        } else {
> +
> +        LibFunc::Func Func;
> +        if(isLibFuncFunc(&I, Func))
> +        {
> +          switch (Func) {
> +            case LibFunc::copysignf:
> +              {
> +                ir::Type srcType = getType(ctx, I.getType());
> +                const ir::Register dst = this->getRegister(&I);
> +                const ir::Register src0 =
> this->getRegister(I.getOperand(0));
> +                const ir::Register src1 =
> this->getRegister(I.getOperand(1));
> +                const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL);
> +
> +                const ir::Register tmp1 = ctx.reg(getFamily(srcType));
> +                const ir::Register tmp2 = ctx.reg(getFamily(srcType));
> +
> +                const ir::RegisterFamily family = getFamily(srcType);
> +                const ir::ImmediateIndex zero =
> ctx.newFloatImmediate((float)0.0);
> +                const ir::Register zeroReg = ctx.reg(family);
> +                ctx.LOADI(srcType, zeroReg, zero);
> +
> +                ctx.GE(srcType, cmp, src1, zeroReg);
> +                ctx.ALU1(ir::OP_ABS, srcType, tmp1, src0);
> +                ctx.SUB(srcType, tmp2, zeroReg, tmp1);


And also here.

> +                ctx.SEL(srcType, dst, cmp, tmp1, tmp2);
> +              }
> +              break;
> +            default:
> +              GBE_ASSERTM(false, "Unsupported libFuncs");
> +          }
> +          return;
> +        }
> + 



More information about the Beignet mailing list