[Beignet] [PATCH 1/2] support built-in functions "hadd", "rhadd"
Homer Hsing
homer.xing at intel.com
Mon Jul 1 19:25:55 PDT 2013
backend now support GPU opcode "addc".
add built-in functions "hadd", "rhadd".
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 1 +
backend/src/backend/gen_context.cpp | 42 ++++++++++++++++++++++++++++++
backend/src/backend/gen_defs.hpp | 1 +
backend/src/backend/gen_encoder.cpp | 7 +++++
backend/src/backend/gen_encoder.hpp | 1 +
backend/src/backend/gen_insn_selection.cpp | 12 +++++++++
backend/src/backend/gen_insn_selection.hxx | 2 ++
backend/src/ir/instruction.cpp | 2 ++
backend/src/ir/instruction.hpp | 4 +++
backend/src/ir/instruction.hxx | 2 ++
backend/src/llvm/llvm_gen_backend.cpp | 16 ++++++++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++
backend/src/ocl_stdlib.h | 37 ++++++++++++++++++++++++++
13 files changed, 129 insertions(+)
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 9a4e283..f65cc30 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -82,6 +82,7 @@ static const struct {
[GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 41cab90..62c6378 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -180,6 +180,48 @@ namespace gbe
const GenRegister src2 = ra->genReg(insn.src(2));
switch (insn.opcode) {
case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
+ case SEL_OP_HADD:
+ {
+ int w = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = 0;
+ p->ADDC(dst, src0, src1);
+ p->SHR(dst, dst, GenRegister::immud(1));
+ p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(dst, dst, src2);
+ if (w == 16) {
+ p->curr.quarterControl = 1;
+ p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+ p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+ p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
+ }
+ p->pop();
+ break;
+ }
+ case SEL_OP_RHADD:
+ {
+ int w = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = 0;
+ p->ADDC(dst, src0, src1);
+ p->ADD(dst, dst, GenRegister::immud(1));
+ p->SHR(dst, dst, GenRegister::immud(1));
+ p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(dst, dst, src2);
+ if (w == 16) {
+ p->curr.quarterControl = 1;
+ p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+ p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+ p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+ p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
+ }
+ p->pop();
+ break;
+ }
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 61412c4..5a9bb2d 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -156,6 +156,7 @@ enum opcode {
GEN_OPCODE_LZD = 74,
GEN_OPCODE_FBH = 75,
GEN_OPCODE_FBL = 76,
+ GEN_OPCODE_ADDC = 78,
GEN_OPCODE_SAD2 = 80,
GEN_OPCODE_SADA2 = 81,
GEN_OPCODE_DP4 = 84,
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index e10a04b..25303b4 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -879,6 +879,13 @@ namespace gbe
ALU2(MACH)
ALU3(MAD)
+ void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
+ push();
+ curr.accWrEnable = 1;
+ alu2(this, GEN_OPCODE_ADDC, dest, src0, src1);
+ pop();
+ }
+
void GenEncoder::ADD(GenRegister dest, GenRegister src0, GenRegister src1) {
if (src0.type == GEN_TYPE_F ||
(src0.file == GEN_IMMEDIATE_VALUE &&
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 3ff8c97..a7cbc89 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -107,6 +107,7 @@ namespace gbe
ALU2(RSL)
ALU2(ASR)
ALU2(ADD)
+ ALU2(ADDC)
ALU2(MUL)
ALU1(FRC)
ALU2(MAC)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 129ee2b..f356b27 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -435,6 +435,8 @@ namespace gbe
ALU3(MAD)
ALU1(FBH)
ALU1(FBL)
+ ALU3(HADD)
+ ALU3(RHADD)
#undef ALU1
#undef ALU2
#undef ALU3
@@ -1433,6 +1435,16 @@ namespace gbe
sel.MUL(dst, src0, src1);
}
break;
+ case OP_HADD: {
+ GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
+ sel.HADD(dst, src0, src1, temp);
+ break;
+ }
+ case OP_RHADD: {
+ GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
+ sel.RHADD(dst, src0, src1, temp);
+ break;
+ }
default: NOT_IMPLEMENTED;
}
sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index f1a4701..8a81022 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -44,3 +44,5 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
DECL_SELECTION_IR(FBH, UnaryInstruction)
DECL_SELECTION_IR(FBL, UnaryInstruction)
+DECL_SELECTION_IR(HADD, TernaryInstruction)
+DECL_SELECTION_IR(RHADD, TernaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index c55774f..bd854a4 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1339,6 +1339,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
DECL_EMIT_FUNCTION(OR)
DECL_EMIT_FUNCTION(XOR)
DECL_EMIT_FUNCTION(AND)
+ DECL_EMIT_FUNCTION(HADD)
+ DECL_EMIT_FUNCTION(RHADD)
#undef DECL_EMIT_FUNCTION
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 8aefc92..3389ee0 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -521,6 +521,10 @@ namespace ir {
Instruction FBH(Type type, Register dst, Register src);
/*! fbl.type dst src */
Instruction FBL(Type type, Register dst, Register src);
+ /*! hadd.type dst src */
+ Instruction HADD(Type type, Register dst, Register src0, Register src1);
+ /*! rhadd.type dst src */
+ Instruction RHADD(Type type, Register dst, Register src0, Register src1);
/*! tan.type dst src */
Instruction RCP(Type type, Register dst, Register src);
/*! abs.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 9c4be2e..8df393b 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -74,3 +74,5 @@ DECL_INSN(LABEL, LabelInstruction)
DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
DECL_INSN(FBH, UnaryInstruction)
DECL_INSN(FBL, UnaryInstruction)
+DECL_INSN(HADD, BinaryInstruction)
+DECL_INSN(RHADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index edd912d..88d2dd8 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1791,6 +1791,8 @@ namespace gbe
case GEN_OCL_USUB_SAT_SHORT:
case GEN_OCL_USUB_SAT_INT:
case GEN_OCL_USUB_SAT_LONG:
+ case GEN_OCL_HADD:
+ case GEN_OCL_RHADD:
this->newRegister(&I);
break;
default:
@@ -2182,6 +2184,20 @@ namespace gbe
ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_HADD: {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.HADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
+ case GEN_OCL_RHADD: {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 685d504..89b57fc 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -129,3 +129,5 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)
DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
+DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd)
+DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 016d469..27e6af6 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4388,6 +4388,43 @@ DEF(16)
#undef DEC8
#undef DEC16
+PURE CONST uint __gen_ocl_hadd(uint x, uint y);
+PURE CONST uint __gen_ocl_rhadd(uint x, uint y);
+#define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort)
+#define DEF(type) INLINE_OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; }
+DEC
+#undef DEF
+#define DEF(type) INLINE_OVERLOADABLE type rhadd(type x, type y) { return (x + y + 1) >> 1; }
+DEC
+#undef DEF
+#undef DEC
+INLINE_OVERLOADABLE int hadd(int x, int y) { return __gen_ocl_hadd(x, y); }
+INLINE_OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); }
+INLINE_OVERLOADABLE int rhadd(int x, int y) { return __gen_ocl_rhadd(x, y); }
+INLINE_OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); }
+#define DEC2(func, type) INLINE_OVERLOADABLE type##2 func(type##2 a, type##2 b) { return (func(a.s0, b.s0), func(a.s1, b.s1)); }
+#define DEC3(func, type) INLINE_OVERLOADABLE type##3 func(type##3 a, type##3 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2)); }
+#define DEC4(func, type) INLINE_OVERLOADABLE type##4 func(type##4 a, type##4 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3)); }
+#define DEC8(func, type) INLINE_OVERLOADABLE type##8 func(type##8 a, type##8 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7)); }
+#define DEC16(func, type) INLINE_OVERLOADABLE type##16 func(type##16 a, type##16 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7), func(a.s8, b.s8), func(a.s9, b.s9), func(a.sa, b.sa), func(a.sb, b.sb), func(a.sc, b.sc), func(a.sd, b.sd), func(a.se, b.se), func(a.sf, b.sf)); }
+#define DEF(func, n) DEC##n(func, char); DEC##n(func, uchar); DEC##n(func, short); DEC##n(func, ushort); DEC##n(func, int); DEC##n(func, uint)
+DEF(hadd, 2)
+DEF(hadd, 3)
+DEF(hadd, 4)
+DEF(hadd, 8)
+DEF(hadd, 16)
+DEF(rhadd, 2)
+DEF(rhadd, 3)
+DEF(rhadd, 4)
+DEF(rhadd, 8)
+DEF(rhadd, 16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
int __gen_ocl_abs(int x);
#define ABS_I(I, CVT) (CVT)__gen_ocl_abs(x.s##I)
#define ABS_VEC1(CVT) (CVT)__gen_ocl_abs(x)
--
1.8.1.2
More information about the Beignet
mailing list