[Beignet] [PATCH v2 1/4] support built-in functions "hadd", "rhadd"
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Jul 2 00:30:22 PDT 2013
Thanks for the patch and the review comments.
Pushed.
On Tue, Jul 02, 2013 at 07:11:47AM +0000, Song, Ruiling wrote:
> LGTM, thanks very much for your patch.
>
> -----Original Message-----
> From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
> Sent: Tuesday, July 02, 2013 2:45 PM
> To: beignet at lists.freedesktop.org
> Cc: Xing, Homer
> Subject: [Beignet] [PATCH v2 1/4] support built-in functions "hadd", "rhadd"
>
> backend now support GPU opcode "addc".
> add built-in functions "hadd", "rhadd".
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 1 +
> backend/src/backend/gen_context.cpp | 42 ++++++++++++++++++++++++++++++
> backend/src/backend/gen_defs.hpp | 1 +
> backend/src/backend/gen_encoder.cpp | 7 +++++
> backend/src/backend/gen_encoder.hpp | 1 +
> backend/src/backend/gen_insn_selection.cpp | 12 +++++++++ backend/src/backend/gen_insn_selection.hxx | 2 ++
> backend/src/ir/instruction.cpp | 2 ++
> backend/src/ir/instruction.hpp | 4 +++
> backend/src/ir/instruction.hxx | 2 ++
> backend/src/llvm/llvm_gen_backend.cpp | 16 ++++++++++++
> backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++
> backend/src/ocl_stdlib.h | 37 ++++++++++++++++++++++++++
> 13 files changed, 129 insertions(+)
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> index 9a4e283..f65cc30 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -82,6 +82,7 @@ static const struct {
>
> [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
> + [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 41cab90..62c6378 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -180,6 +180,48 @@ namespace gbe
> const GenRegister src2 = ra->genReg(insn.src(2));
> switch (insn.opcode) {
> case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
> + case SEL_OP_HADD:
> + {
> + int w = p->curr.execWidth;
> + p->push();
> + p->curr.execWidth = 8;
> + p->curr.quarterControl = 0;
> + p->ADDC(dst, src0, src1);
> + p->SHR(dst, dst, GenRegister::immud(1));
> + p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> + p->OR(dst, dst, src2);
> + if (w == 16) {
> + p->curr.quarterControl = 1;
> + p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> + p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> + p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> + p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> + }
> + p->pop();
> + break;
> + }
> + case SEL_OP_RHADD:
> + {
> + int w = p->curr.execWidth;
> + p->push();
> + p->curr.execWidth = 8;
> + p->curr.quarterControl = 0;
> + p->ADDC(dst, src0, src1);
> + p->ADD(dst, dst, GenRegister::immud(1));
> + p->SHR(dst, dst, GenRegister::immud(1));
> + p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> + p->OR(dst, dst, src2);
> + if (w == 16) {
> + p->curr.quarterControl = 1;
> + p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> + p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> + p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> + p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> + p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> + }
> + p->pop();
> + break;
> + }
> default: NOT_IMPLEMENTED;
> }
> }
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index 61412c4..5a9bb2d 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -156,6 +156,7 @@ enum opcode {
> GEN_OPCODE_LZD = 74,
> GEN_OPCODE_FBH = 75,
> GEN_OPCODE_FBL = 76,
> + GEN_OPCODE_ADDC = 78,
> GEN_OPCODE_SAD2 = 80,
> GEN_OPCODE_SADA2 = 81,
> GEN_OPCODE_DP4 = 84,
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index e10a04b..25303b4 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -879,6 +879,13 @@ namespace gbe
> ALU2(MACH)
> ALU3(MAD)
>
> + void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
> + push();
> + curr.accWrEnable = 1;
> + alu2(this, GEN_OPCODE_ADDC, dest, src0, src1);
> + pop();
> + }
> +
> void GenEncoder::ADD(GenRegister dest, GenRegister src0, GenRegister src1) {
> if (src0.type == GEN_TYPE_F ||
> (src0.file == GEN_IMMEDIATE_VALUE && diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> index 3ff8c97..a7cbc89 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -107,6 +107,7 @@ namespace gbe
> ALU2(RSL)
> ALU2(ASR)
> ALU2(ADD)
> + ALU2(ADDC)
> ALU2(MUL)
> ALU1(FRC)
> ALU2(MAC)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 129ee2b..f356b27 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -435,6 +435,8 @@ namespace gbe
> ALU3(MAD)
> ALU1(FBH)
> ALU1(FBL)
> + ALU3(HADD)
> + ALU3(RHADD)
> #undef ALU1
> #undef ALU2
> #undef ALU3
> @@ -1433,6 +1435,16 @@ namespace gbe
> sel.MUL(dst, src0, src1);
> }
> break;
> + case OP_HADD: {
> + GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
> + sel.HADD(dst, src0, src1, temp);
> + break;
> + }
> + case OP_RHADD: {
> + GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
> + sel.RHADD(dst, src0, src1, temp);
> + break;
> + }
> default: NOT_IMPLEMENTED;
> }
> sel.pop();
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index f1a4701..8a81022 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -44,3 +44,5 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction) DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction)
> +DECL_SELECTION_IR(HADD, TernaryInstruction) DECL_SELECTION_IR(RHADD,
> +TernaryInstruction)
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index c55774f..bd854a4 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -1339,6 +1339,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
> DECL_EMIT_FUNCTION(OR)
> DECL_EMIT_FUNCTION(XOR)
> DECL_EMIT_FUNCTION(AND)
> + DECL_EMIT_FUNCTION(HADD)
> + DECL_EMIT_FUNCTION(RHADD)
>
> #undef DECL_EMIT_FUNCTION
>
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 8aefc92..3389ee0 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -521,6 +521,10 @@ namespace ir {
> Instruction FBH(Type type, Register dst, Register src);
> /*! fbl.type dst src */
> Instruction FBL(Type type, Register dst, Register src);
> + /*! hadd.type dst src */
> + Instruction HADD(Type type, Register dst, Register src0, Register
> + src1); /*! rhadd.type dst src */ Instruction RHADD(Type type,
> + Register dst, Register src0, Register src1);
> /*! tan.type dst src */
> Instruction RCP(Type type, Register dst, Register src);
> /*! abs.type dst src */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 9c4be2e..8df393b 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -74,3 +74,5 @@ DECL_INSN(LABEL, LabelInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction)
> +DECL_INSN(HADD, BinaryInstruction)
> +DECL_INSN(RHADD, BinaryInstruction)
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index edd912d..88d2dd8 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1791,6 +1791,8 @@ namespace gbe
> case GEN_OCL_USUB_SAT_SHORT:
> case GEN_OCL_USUB_SAT_INT:
> case GEN_OCL_USUB_SAT_LONG:
> + case GEN_OCL_HADD:
> + case GEN_OCL_RHADD:
> this->newRegister(&I);
> break;
> default:
> @@ -2182,6 +2184,20 @@ namespace gbe
> ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
> break;
> }
> + case GEN_OCL_HADD: {
> + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
> + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
> + const ir::Register dst = this->getRegister(&I);
> + ctx.HADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
> + break;
> + }
> + case GEN_OCL_RHADD: {
> + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
> + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
> + const ir::Register dst = this->getRegister(&I);
> + ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
> + break;
> + }
> default: break;
> }
> }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 685d504..89b57fc 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -129,3 +129,5 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm) DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
> +DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd)
> +DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
> diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 016d469..27e6af6 100644
> --- a/backend/src/ocl_stdlib.h
> +++ b/backend/src/ocl_stdlib.h
> @@ -4388,6 +4388,43 @@ DEF(16)
> #undef DEC8
> #undef DEC16
>
> +PURE CONST uint __gen_ocl_hadd(uint x, uint y); PURE CONST uint
> +__gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); DEF(uchar);
> +DEF(short); DEF(ushort) #define DEF(type) INLINE_OVERLOADABLE type
> +hadd(type x, type y) { return (x + y) >> 1; } DEC #undef DEF #define
> +DEF(type) INLINE_OVERLOADABLE type rhadd(type x, type y) { return (x +
> +y + 1) >> 1; } DEC #undef DEF #undef DEC INLINE_OVERLOADABLE int
> +hadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x
> ++ y) >> 1) : __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE uint hadd(uint
> +x, uint y) { return __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE int
> +rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x
> ++ y + 1) >> 1) : __gen_ocl_rhadd(x, y); } INLINE_OVERLOADABLE uint
> +rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); } #define
> +DEC2(func, type) INLINE_OVERLOADABLE type##2 func(type##2 a, type##2 b)
> +{ return (func(a.s0, b.s0), func(a.s1, b.s1)); } #define DEC3(func,
> +type) INLINE_OVERLOADABLE type##3 func(type##3 a, type##3 b) { return
> +(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2)); } #define
> +DEC4(func, type) INLINE_OVERLOADABLE type##4 func(type##4 a, type##4 b)
> +{ return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2),
> +func(a.s3, b.s3)); } #define DEC8(func, type) INLINE_OVERLOADABLE
> +type##8 func(type##8 a, type##8 b) { return (func(a.s0, b.s0),
> +func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4),
> +func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7)); } #define
> +DEC16(func, type) INLINE_OVERLOADABLE type##16 func(type##16 a,
> +type##16 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2,
> +b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6,
> +b.s6), func(a.s7, b.s7), func(a.s8, b.s8), func(a.s9, b.s9), func(a.sa,
> +b.sa), func(a.sb, b.sb), func(a.sc, b.sc), func(a.sd, b.sd), func(a.se,
> +b.se), func(a.sf, b.sf)); } #define DEF(func, n) DEC##n(func, char);
> +DEC##n(func, uchar); DEC##n(func, short); DEC##n(func, ushort);
> +DEC##n(func, int); DEC##n(func, uint) DEF(hadd, 2) DEF(hadd, 3)
> +DEF(hadd, 4) DEF(hadd, 8) DEF(hadd, 16) DEF(rhadd, 2) DEF(rhadd, 3)
> +DEF(rhadd, 4) DEF(rhadd, 8) DEF(rhadd, 16) #undef DEF #undef DEC2
> +#undef DEC3 #undef DEC4 #undef DEC8 #undef DEC16
> +
> int __gen_ocl_abs(int x);
> #define ABS_I(I, CVT) (CVT)__gen_ocl_abs(x.s##I) #define ABS_VEC1(CVT) (CVT)__gen_ocl_abs(x)
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list