[Beignet] [PATCH v2 1/4] support built-in functions "hadd", "rhadd"
Song, Ruiling
ruiling.song at intel.com
Tue Jul 2 00:11:47 PDT 2013
LGTM, thanks very much for your patch.
-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
Sent: Tuesday, July 02, 2013 2:45 PM
To: beignet at lists.freedesktop.org
Cc: Xing, Homer
Subject: [Beignet] [PATCH v2 1/4] support built-in functions "hadd", "rhadd"
backend now support GPU opcode "addc".
add built-in functions "hadd", "rhadd".
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 1 +
backend/src/backend/gen_context.cpp | 42 ++++++++++++++++++++++++++++++
backend/src/backend/gen_defs.hpp | 1 +
backend/src/backend/gen_encoder.cpp | 7 +++++
backend/src/backend/gen_encoder.hpp | 1 +
backend/src/backend/gen_insn_selection.cpp | 12 +++++++++ backend/src/backend/gen_insn_selection.hxx | 2 ++
backend/src/ir/instruction.cpp | 2 ++
backend/src/ir/instruction.hpp | 4 +++
backend/src/ir/instruction.hxx | 2 ++
backend/src/llvm/llvm_gen_backend.cpp | 16 ++++++++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++
backend/src/ocl_stdlib.h | 37 ++++++++++++++++++++++++++
13 files changed, 129 insertions(+)
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 9a4e283..f65cc30 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -82,6 +82,7 @@ static const struct {
[GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 41cab90..62c6378 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -180,6 +180,48 @@ namespace gbe
const GenRegister src2 = ra->genReg(insn.src(2));
switch (insn.opcode) {
case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
+ case SEL_OP_HADD:
+ {
+ int w = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = 0;
+ p->ADDC(dst, src0, src1);
+ p->SHR(dst, dst, GenRegister::immud(1));
+ p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(dst, dst, src2);
+ if (w == 16) {
+ p->curr.quarterControl = 1;
+ p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+ p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+ p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
+ }
+ p->pop();
+ break;
+ }
+ case SEL_OP_RHADD:
+ {
+ int w = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = 0;
+ p->ADDC(dst, src0, src1);
+ p->ADD(dst, dst, GenRegister::immud(1));
+ p->SHR(dst, dst, GenRegister::immud(1));
+ p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(dst, dst, src2);
+ if (w == 16) {
+ p->curr.quarterControl = 1;
+ p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+ p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+ p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+ p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
+ }
+ p->pop();
+ break;
+ }
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 61412c4..5a9bb2d 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -156,6 +156,7 @@ enum opcode {
GEN_OPCODE_LZD = 74,
GEN_OPCODE_FBH = 75,
GEN_OPCODE_FBL = 76,
+ GEN_OPCODE_ADDC = 78,
GEN_OPCODE_SAD2 = 80,
GEN_OPCODE_SADA2 = 81,
GEN_OPCODE_DP4 = 84,
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index e10a04b..25303b4 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -879,6 +879,13 @@ namespace gbe
ALU2(MACH)
ALU3(MAD)
+ void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
+ push();
+ curr.accWrEnable = 1;
+ alu2(this, GEN_OPCODE_ADDC, dest, src0, src1);
+ pop();
+ }
+
void GenEncoder::ADD(GenRegister dest, GenRegister src0, GenRegister src1) {
if (src0.type == GEN_TYPE_F ||
(src0.file == GEN_IMMEDIATE_VALUE && diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 3ff8c97..a7cbc89 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -107,6 +107,7 @@ namespace gbe
ALU2(RSL)
ALU2(ASR)
ALU2(ADD)
+ ALU2(ADDC)
ALU2(MUL)
ALU1(FRC)
ALU2(MAC)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 129ee2b..f356b27 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -435,6 +435,8 @@ namespace gbe
ALU3(MAD)
ALU1(FBH)
ALU1(FBL)
+ ALU3(HADD)
+ ALU3(RHADD)
#undef ALU1
#undef ALU2
#undef ALU3
@@ -1433,6 +1435,16 @@ namespace gbe
sel.MUL(dst, src0, src1);
}
break;
+ case OP_HADD: {
+ GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
+ sel.HADD(dst, src0, src1, temp);
+ break;
+ }
+ case OP_RHADD: {
+ GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
+ sel.RHADD(dst, src0, src1, temp);
+ break;
+ }
default: NOT_IMPLEMENTED;
}
sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index f1a4701..8a81022 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -44,3 +44,5 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction) DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction)
+DECL_SELECTION_IR(HADD, TernaryInstruction) DECL_SELECTION_IR(RHADD,
+TernaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index c55774f..bd854a4 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1339,6 +1339,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
DECL_EMIT_FUNCTION(OR)
DECL_EMIT_FUNCTION(XOR)
DECL_EMIT_FUNCTION(AND)
+ DECL_EMIT_FUNCTION(HADD)
+ DECL_EMIT_FUNCTION(RHADD)
#undef DECL_EMIT_FUNCTION
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 8aefc92..3389ee0 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -521,6 +521,10 @@ namespace ir {
Instruction FBH(Type type, Register dst, Register src);
/*! fbl.type dst src */
Instruction FBL(Type type, Register dst, Register src);
+ /*! hadd.type dst src */
+ Instruction HADD(Type type, Register dst, Register src0, Register
+ src1); /*! rhadd.type dst src */ Instruction RHADD(Type type,
+ Register dst, Register src0, Register src1);
/*! tan.type dst src */
Instruction RCP(Type type, Register dst, Register src);
/*! abs.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 9c4be2e..8df393b 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -74,3 +74,5 @@ DECL_INSN(LABEL, LabelInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction)
+DECL_INSN(HADD, BinaryInstruction)
+DECL_INSN(RHADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index edd912d..88d2dd8 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1791,6 +1791,8 @@ namespace gbe
case GEN_OCL_USUB_SAT_SHORT:
case GEN_OCL_USUB_SAT_INT:
case GEN_OCL_USUB_SAT_LONG:
+ case GEN_OCL_HADD:
+ case GEN_OCL_RHADD:
this->newRegister(&I);
break;
default:
@@ -2182,6 +2184,20 @@ namespace gbe
ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_HADD: {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.HADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
+ case GEN_OCL_RHADD: {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 685d504..89b57fc 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -129,3 +129,5 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm) DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
+DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd)
+DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 016d469..27e6af6 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4388,6 +4388,43 @@ DEF(16)
#undef DEC8
#undef DEC16
+PURE CONST uint __gen_ocl_hadd(uint x, uint y); PURE CONST uint
+__gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); DEF(uchar);
+DEF(short); DEF(ushort) #define DEF(type) INLINE_OVERLOADABLE type
+hadd(type x, type y) { return (x + y) >> 1; } DEC #undef DEF #define
+DEF(type) INLINE_OVERLOADABLE type rhadd(type x, type y) { return (x +
+y + 1) >> 1; } DEC #undef DEF #undef DEC INLINE_OVERLOADABLE int
+hadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x
++ y) >> 1) : __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE uint hadd(uint
+x, uint y) { return __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE int
+rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x
++ y + 1) >> 1) : __gen_ocl_rhadd(x, y); } INLINE_OVERLOADABLE uint
+rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); } #define
+DEC2(func, type) INLINE_OVERLOADABLE type##2 func(type##2 a, type##2 b)
+{ return (func(a.s0, b.s0), func(a.s1, b.s1)); } #define DEC3(func,
+type) INLINE_OVERLOADABLE type##3 func(type##3 a, type##3 b) { return
+(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2)); } #define
+DEC4(func, type) INLINE_OVERLOADABLE type##4 func(type##4 a, type##4 b)
+{ return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2),
+func(a.s3, b.s3)); } #define DEC8(func, type) INLINE_OVERLOADABLE
+type##8 func(type##8 a, type##8 b) { return (func(a.s0, b.s0),
+func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4),
+func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7)); } #define
+DEC16(func, type) INLINE_OVERLOADABLE type##16 func(type##16 a,
+type##16 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2,
+b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6,
+b.s6), func(a.s7, b.s7), func(a.s8, b.s8), func(a.s9, b.s9), func(a.sa,
+b.sa), func(a.sb, b.sb), func(a.sc, b.sc), func(a.sd, b.sd), func(a.se,
+b.se), func(a.sf, b.sf)); } #define DEF(func, n) DEC##n(func, char);
+DEC##n(func, uchar); DEC##n(func, short); DEC##n(func, ushort);
+DEC##n(func, int); DEC##n(func, uint) DEF(hadd, 2) DEF(hadd, 3)
+DEF(hadd, 4) DEF(hadd, 8) DEF(hadd, 16) DEF(rhadd, 2) DEF(rhadd, 3)
+DEF(rhadd, 4) DEF(rhadd, 8) DEF(rhadd, 16) #undef DEF #undef DEC2
+#undef DEC3 #undef DEC4 #undef DEC8 #undef DEC16
+
int __gen_ocl_abs(int x);
#define ABS_I(I, CVT) (CVT)__gen_ocl_abs(x.s##I) #define ABS_VEC1(CVT) (CVT)__gen_ocl_abs(x)
--
1.8.1.2
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list