[Beignet] [PATCH v2] add 64-bit version of "hadd"
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Sep 11 00:59:22 PDT 2013
Pushed, thanks.
On Wed, Sep 11, 2013 at 07:29:20AM +0000, Song, Ruiling wrote:
> LGTM.
>
> -----Original Message-----
> From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
> Sent: Wednesday, September 11, 2013 11:05 AM
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH v2] add 64-bit version of "hadd"
>
> v2:
> keep top carry bit
>
> passed piglit test cases:
>
> piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-hadd-1.0.generated.cl
> piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-hadd-1.0.generated.cl
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 27 ++++++++++++++++++++++
> backend/src/backend/gen_context.hpp | 1 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 19 +++++++++++++++
> backend/src/backend/gen_insn_selection.hxx | 1 +
> backend/src/ir/instruction.cpp | 1 +
> backend/src/ir/instruction.hpp | 2 ++
> backend/src/ir/instruction.hxx | 1 +
> backend/src/llvm/llvm_gen_backend.cpp | 11 +++++++++
> backend/src/llvm/llvm_gen_ocl_function.hxx | 3 ++-
> backend/src/ocl_stdlib.tmpl.h | 15 ++++++++----
> 11 files changed, 77 insertions(+), 5 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index b7a7cd6..84cc094 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -458,6 +458,33 @@ namespace gbe
> p->pop();
> }
>
> + void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) {
> + GenRegister dest = ra->genReg(insn.dst(0));
> + GenRegister x = ra->genReg(insn.src(0));
> + GenRegister y = ra->genReg(insn.src(1));
> + GenRegister a = ra->genReg(insn.dst(1));
> + GenRegister b = ra->genReg(insn.dst(2));
> + GenRegister c = ra->genReg(insn.dst(3));
> + GenRegister d = ra->genReg(insn.dst(4));
> + a.type = b.type = c.type = d.type = GEN_TYPE_UD;
> + loadBottomHalf(a, x);
> + loadBottomHalf(b, y);
> + loadTopHalf(c, x);
> + loadTopHalf(d, y);
> + addWithCarry(a, a, b);
> + addWithCarry(c, c, b);
> + addWithCarry(c, c, d);
> + p->ADD(b, b, d);
> + p->SHR(a, a, GenRegister::immud(1));
> + p->SHL(d, c, GenRegister::immud(31));
> + p->OR(a, a, d);
> + p->SHR(c, c, GenRegister::immud(1));
> + p->SHL(d, b, GenRegister::immud(31));
> + p->OR(c, c, d);
> + storeBottomHalf(dest, a);
> + storeTopHalf(dest, c);
> + }
> +
> void GenContext::emitI64ShiftInstruction(const SelectionInstruction &insn) {
> GenRegister dest = ra->genReg(insn.dst(0));
> GenRegister x = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 14e4550..4601242 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -96,6 +96,7 @@ namespace gbe
> void emitBinaryInstruction(const SelectionInstruction &insn);
> void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
> void emitTernaryInstruction(const SelectionInstruction &insn);
> + void emitI64HADDInstruction(const SelectionInstruction &insn);
> void emitI64ShiftInstruction(const SelectionInstruction &insn);
> void emitI64CompareInstruction(const SelectionInstruction &insn);
> void emitCompareInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index af2b0ee..445b461 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -6,6 +6,7 @@ DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2)
> +DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
> DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 0f62da6..49ef601 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -470,6 +470,8 @@ namespace gbe
> #undef ALU2WithTemp
> #undef ALU3
> #undef I64Shift
> + /*! (x+y)>>1 without mod. overflow */
> + void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
> /*! Shift a 64-bit integer */
> void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
> /*! Compare 64-bit integer */
> @@ -1074,6 +1076,15 @@ namespace gbe
> insn->extra.function = conditional;
> }
>
> + void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2);
> + insn->dst(0) = dst;
> + insn->src(0) = src0;
> + insn->src(1) = src1;
> + for(int i = 0; i < 4; i ++)
> + insn->dst(i + 1) = tmp[i];
> + }
> +
> void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
> SelectionInstruction *insn = this->appendInsn(opcode, 7, 2);
> insn->dst(0) = dst;
> @@ -1668,6 +1679,14 @@ namespace gbe
> sel.RHADD(dst, src0, src1, temp);
> break;
> }
> + case OP_I64HADD:
> + {
> + GenRegister tmp[4];
> + for(int i=0; i<4; i++)
> + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
> + sel.I64HADD(dst, src0, src1, tmp);
> + break;
> + }
> case OP_UPSAMPLE_SHORT:
> sel.UPSAMPLE_SHORT(dst, src0, src1);
> break;
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index ea19fab..0083f7d 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -62,6 +62,7 @@ DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction) DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
> +DECL_SELECTION_IR(I64HADD, I64HADDInstruction)
> DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 115d70e..6bbe37e 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -1348,6 +1348,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
> DECL_EMIT_FUNCTION(AND)
> DECL_EMIT_FUNCTION(HADD)
> DECL_EMIT_FUNCTION(RHADD)
> + DECL_EMIT_FUNCTION(I64HADD)
>
> #undef DECL_EMIT_FUNCTION
>
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 40a3d40..fab6d73 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -533,6 +533,8 @@ namespace ir {
> Instruction HADD(Type type, Register dst, Register src0, Register src1);
> /*! rhadd.type dst src */
> Instruction RHADD(Type type, Register dst, Register src0, Register src1);
> + /*! i64hadd.type dst src */
> + Instruction I64HADD(Type type, Register dst, Register src0, Register
> + src1);
> /*! tan.type dst src */
> Instruction RCP(Type type, Register dst, Register src);
> /*! abs.type dst src */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index c15e912..6af0899 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -77,6 +77,7 @@ DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) DECL_INSN(RHADD, BinaryInstruction)
> +DECL_INSN(I64HADD, BinaryInstruction)
> DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, BinaryInstruction) DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index e747d00..3c04565 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1841,6 +1841,7 @@ namespace gbe
> case GEN_OCL_USUB_SAT_LONG:
> case GEN_OCL_HADD:
> case GEN_OCL_RHADD:
> + case GEN_OCL_I64HADD:
> this->newRegister(&I);
> break;
> default:
> @@ -2278,6 +2279,16 @@ namespace gbe
> ctx.HADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
> break;
> }
> + case GEN_OCL_I64HADD:
> + {
> + GBE_ASSERT(AI != AE);
> + const ir::Register src0 = this->getRegister(*(AI++));
> + GBE_ASSERT(AI != AE);
> + const ir::Register src1 = this->getRegister(*(AI++));
> + const ir::Register dst = this->getRegister(&I);
> + ctx.I64HADD(ir::TYPE_U64, dst, src0, src1);
> + break;
> + }
> case GEN_OCL_RHADD: {
> GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
> GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index b712860..13d8f66 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -131,8 +131,9 @@ DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj) DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) -DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd)
> +DECL_LLVM_GEN_FUNCTION(HADD, _Z14__gen_ocl_haddjj)
> DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
> +DECL_LLVM_GEN_FUNCTION(I64HADD, _Z14__gen_ocl_haddmm)
> DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index a4f61ee..e87fea5 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -462,7 +462,7 @@ INLINE_OVERLOADABLE ulong upsample(uint hi, uint lo) {
> return __gen_ocl_upsample((long)hi, (long)lo); }
>
> -PURE CONST uint __gen_ocl_hadd(uint x, uint y);
> +OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y);
> PURE CONST uint __gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort) #define DEF(type) INLINE_OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; } @@ -472,15 +472,22 @@ DEC DEC #undef DEF #undef DEC -INLINE_OVERLOADABLE int hadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y) >> 1) : __gen_ocl_hadd(x, y); }
> +INLINE_OVERLOADABLE int hadd(int x, int y) {
> + return (x < 0 && y > 0) || (x > 0 && y < 0) ?
> + ((x + y) >> 1) :
> + __gen_ocl_hadd((uint)x, (uint)y); }
> INLINE_OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE int rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y + 1) >> 1) : __gen_ocl_rhadd(x, y); } INLINE_OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); }
> +OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y);
> INLINE_OVERLOADABLE long hadd(long x, long y) {
> - return 0;
> + return (x < 0 && y > 0) || (x > 0 && y < 0) ?
> + ((x + y) >> 1) :
> + __gen_ocl_hadd((ulong)x, (ulong)y);
> }
> INLINE_OVERLOADABLE ulong hadd(ulong x, ulong y) {
> - return 0;
> + return __gen_ocl_hadd(x, y);
> }
> INLINE_OVERLOADABLE long rhadd(long x, long y) {
> return 0;
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list