[Beignet] [PATCH v2] support converting 64-bit integer to 32-bit float
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Sep 16 22:27:46 PDT 2013
Pushed, thanks.
On Fri, Sep 13, 2013 at 09:41:02AM +0800, Homer Hsing wrote:
> version 2:
> improve algorithm to convert signed integer
> fix source operand type in llvm_gen_backend
> enable predicate in addWithCarry
> change test case to test signed integer
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 45 +++++++++++++++++++++-
> backend/src/backend/gen_context.hpp | 2 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 17 ++++++++
> backend/src/backend/gen_insn_selection.hxx | 1 +
> backend/src/llvm/llvm_gen_backend.cpp | 2 +-
> kernels/compiler_long_convert.cl | 5 +++
> utests/compiler_long_convert.cpp | 41 ++++++++++++++++++++
> 8 files changed, 112 insertions(+), 2 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 0d584df..a1df963 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -578,6 +578,49 @@ namespace gbe
> p->pop();
> }
>
> + void GenContext::UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp) {
> + p->MOV(dst, high);
> + p->MUL(dst, dst, GenRegister::immf(65536.f * 65536.f));
> + tmp.type = GEN_TYPE_F;
> + p->MOV(tmp, low);
> + p->ADD(dst, dst, tmp);
> + }
> +
> + void GenContext::emitI64ToFloatInstruction(const SelectionInstruction &insn) {
> + GenRegister src = ra->genReg(insn.src(0));
> + GenRegister dest = ra->genReg(insn.dst(0));
> + GenRegister high = ra->genReg(insn.dst(1));
> + GenRegister low = ra->genReg(insn.dst(2));
> + GenRegister tmp = ra->genReg(insn.dst(3));
> + loadTopHalf(high, src);
> + loadBottomHalf(low, src);
> + if(!src.is_signed_int()) {
> + UnsignedI64ToFloat(dest, high, low, tmp);
> + } else {
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.physicalFlag = 1;
> + p->curr.flag = 1;
> + p->curr.subFlag = 0;
> + p->CMP(GEN_CONDITIONAL_GE, high, GenRegister::immud(0x80000000));
> + p->curr.predicate = GEN_PREDICATE_NORMAL;
> + p->NOT(high, high);
> + p->NOT(low, low);
> + p->MOV(tmp, GenRegister::immud(1));
> + addWithCarry(low, low, tmp);
> + p->ADD(high, high, tmp);
> + p->pop();
> + UnsignedI64ToFloat(dest, high, low, tmp);
> + p->push();
> + p->curr.physicalFlag = 1;
> + p->curr.flag = 1;
> + p->curr.subFlag = 0;
> + dest.type = GEN_TYPE_UD;
> + p->OR(dest, dest, GenRegister::immud(0x80000000));
> + p->pop();
> + }
> + }
> +
> void GenContext::emitI64CompareInstruction(const SelectionInstruction &insn) {
> GenRegister src0 = ra->genReg(insn.src(0));
> GenRegister src1 = ra->genReg(insn.src(1));
> @@ -728,11 +771,11 @@ namespace gbe
> int execWidth = p->curr.execWidth;
> GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
> p->push();
> - p->curr.predicate = GEN_PREDICATE_NONE;
> p->curr.execWidth = 8;
> p->ADDC(dest, src0, src1);
> p->MOV(src1, acc0);
> if (execWidth == 16) {
> + p->curr.quarterControl = 1;
> p->ADDC(GenRegister::suboffset(dest, 8),
> GenRegister::suboffset(src0, 8),
> GenRegister::suboffset(src1, 8));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 4601242..6b37276 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -88,6 +88,7 @@ namespace gbe
> void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
> void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
> void saveFlag(GenRegister dest, int flag, int subFlag);
> + void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp);
>
> /*! Final Gen ISA emission helper functions */
> void emitLabelInstruction(const SelectionInstruction &insn);
> @@ -99,6 +100,7 @@ namespace gbe
> void emitI64HADDInstruction(const SelectionInstruction &insn);
> void emitI64ShiftInstruction(const SelectionInstruction &insn);
> void emitI64CompareInstruction(const SelectionInstruction &insn);
> + void emitI64ToFloatInstruction(const SelectionInstruction &insn);
> void emitCompareInstruction(const SelectionInstruction &insn);
> void emitJumpInstruction(const SelectionInstruction &insn);
> void emitIndirectMoveInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 445b461..49b3170 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -7,6 +7,7 @@ DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2)
> DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2)
> +DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
> DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 1bb1f46..241164b 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -469,6 +469,8 @@ namespace gbe
> #undef ALU2WithTemp
> #undef ALU3
> #undef I64Shift
> + /*! Convert 64-bit integer to 32-bit float */
> + void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]);
> /*! (x+y)>>1 without mod. overflow */
> void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
> /*! Shift a 64-bit integer */
> @@ -1075,6 +1077,14 @@ namespace gbe
> insn->extra.function = conditional;
> }
>
> + void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 4, 1);
> + insn->dst(0) = dst;
> + insn->src(0) = src;
> + for(int i = 0; i < 3; i ++)
> + insn->dst(i + 1) = tmp[i];
> + }
> +
> void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
> SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2);
> insn->dst(0) = dst;
> @@ -2421,6 +2431,13 @@ namespace gbe
> sel.MOV(dst, unpacked);
> } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
> sel.CONVI64_TO_I(dst, src);
> + } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) {
> + GenRegister tmp[3];
> + for(int i=0; i<3; i++) {
> + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
> + tmp[i].type = GEN_TYPE_UD;
> + }
> + sel.CONVI64_TO_F(dst, src, tmp);
> } else if (dst.isdf()) {
> ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> sel.MOV_DF(dst, src, sel.selReg(r));
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index d3f21d6..b411ed2 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -68,3 +68,4 @@ DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
> DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
> DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
> DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
> +DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction)
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 3c04565..c98f563 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1516,7 +1516,7 @@ namespace gbe
> Type *llvmSrcType = I.getOperand(0)->getType();
> const ir::Type dstType = getType(ctx, llvmDstType);
> ir::Type srcType;
> - if (I.getOpcode() == Instruction::ZExt) {
> + if (I.getOpcode() == Instruction::ZExt || I.getOpcode() == Instruction::UIToFP) {
> srcType = getUnsignedType(ctx, llvmSrcType);
> } else {
> srcType = getType(ctx, llvmSrcType);
> diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl
> index 03df147..e5f7939 100644
> --- a/kernels/compiler_long_convert.cl
> +++ b/kernels/compiler_long_convert.cl
> @@ -12,3 +12,8 @@ kernel void compiler_long_convert_2(global char *dst1, global short *dst2, globa
> dst2[i] = src[i];
> dst3[i] = src[i];
> }
> +
> +kernel void compiler_long_convert_to_float(global float *dst, global long *src) {
> + int i = get_global_id(0);
> + dst[i] = src[i];
> +}
> diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp
> index fe976be..97f9d62 100644
> --- a/utests/compiler_long_convert.cpp
> +++ b/utests/compiler_long_convert.cpp
> @@ -116,3 +116,44 @@ void compiler_long_convert_2(void)
> }
>
> MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_2);
> +
> +// convert 64-bit integer to 32-bit float
> +void compiler_long_convert_to_float(void)
> +{
> + const size_t n = 16;
> + int64_t src[n];
> +
> + // Setup kernel and buffers
> + OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", "compiler_long_convert_to_float");
> + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
> + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
> + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
> + globals[0] = n;
> + locals[0] = 16;
> +
> + // Run random tests
> + for (int32_t i = 0; i < (int32_t) n; ++i) {
> + src[i] = -(int64_t)i;
> + }
> + OCL_MAP_BUFFER(1);
> + memcpy(buf_data[1], src, sizeof(src));
> + OCL_UNMAP_BUFFER(1);
> +
> + // Run the kernel on GPU
> + OCL_NDRANGE(1);
> +
> + // Compare
> + OCL_MAP_BUFFER(0);
> + OCL_MAP_BUFFER(1);
> + float *dst = ((float *)buf_data[0]);
> + for (int32_t i = 0; i < (int32_t) n; ++i) {
> + //printf("%f\n", dst[i]);
> + OCL_ASSERT(dst[i] == src[i]);
> + }
> + OCL_UNMAP_BUFFER(0);
> + OCL_UNMAP_BUFFER(1);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_to_float);
> +
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list