[Beignet] [PATCH V2 1/2] Backend: refine mix with hardware lrp function
Pan Xiuli
xiuli.pan at intel.com
Wed Oct 21 20:21:21 PDT 2015
EU support lrp function that simillar to mix, but only
with float, so refine only float related mix with lrp.
There will be little errors whit mix now with lrp.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 1 +
backend/src/backend/gen_context.cpp | 1 +
backend/src/backend/gen_defs.hpp | 1 +
backend/src/backend/gen_encoder.cpp | 1 +
backend/src/backend/gen_encoder.hpp | 1 +
backend/src/backend/gen_insn_selection.cpp | 6 ++++++
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/ir/context.hpp | 1 +
backend/src/ir/instruction.cpp | 4 ++++
backend/src/ir/instruction.hpp | 2 ++
backend/src/ir/instruction.hxx | 1 +
backend/src/libocl/tmpl/ocl_common.tmpl.cl | 3 ++-
backend/src/llvm/llvm_gen_backend.cpp | 13 +++++++++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 3 +++
14 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 5220233..8824f3a 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -77,6 +77,7 @@ static const struct {
[GEN_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+ [GEN_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
[GEN_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index baf3897..cff56c9 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1795,6 +1795,7 @@ namespace gbe
const GenRegister src2 = ra->genReg(insn.src(2));
switch (insn.opcode) {
case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
+ case SEL_OP_LRP: p->LRP(dst, src0, src1, src2); break;
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 1ca148c..31f2666 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -174,6 +174,7 @@ enum opcode {
GEN_OPCODE_LINE = 89,
GEN_OPCODE_PLN = 90,
GEN_OPCODE_MAD = 91,
+ GEN_OPCODE_LRP = 92,
GEN_OPCODE_NOP = 126,
};
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index cac29e8..4fbc31b 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -879,6 +879,7 @@ namespace gbe
ALU2(PLN)
ALU2(MACH)
ALU3(MAD)
+ ALU3(LRP)
// ALU2(BRC)
// ALU1(ENDIF)
// ALU1(IF)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 79e7b6e..c720bdf 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -125,6 +125,7 @@ namespace gbe
ALU2(LINE)
ALU2(PLN)
ALU3(MAD)
+ ALU3(LRP)
//ALU2(MOV_DF);
ALU2(BRC)
ALU1(BRD)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 6833457..ce936df 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -546,6 +546,7 @@ namespace gbe
ALU2(MACH)
ALU1(LZD)
ALU3(MAD)
+ ALU3(LRP)
ALU2WithTemp(MUL_HI)
ALU1(FBH)
ALU1(FBL)
@@ -4889,6 +4890,11 @@ namespace gbe
sel.MAD(dst, src2, src0, src1);
break;
}
+ case OP_LRP:
+ {
+ sel.LRP(dst, src0, src1, src2);
+ break;
+ }
default:
NOT_IMPLEMENTED;
}
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index adbb137..cf6bb1f 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -45,6 +45,7 @@ DECL_SELECTION_IR(CMP, CompareInstruction)
DECL_SELECTION_IR(I64CMP, I64CompareInstruction)
DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
DECL_SELECTION_IR(MAD, TernaryInstruction)
+DECL_SELECTION_IR(LRP, TernaryInstruction)
DECL_SELECTION_IR(JMPI, JumpInstruction)
DECL_SELECTION_IR(EOT, EotInstruction)
DECL_SELECTION_IR(INDIRECT_MOVE, IndirectMoveInstruction)
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index 0f7ded4..cf9d62b 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -175,6 +175,7 @@ namespace ir {
DECL_THREE_SRC_INSN(SEL);
DECL_THREE_SRC_INSN(I64MADSAT);
DECL_THREE_SRC_INSN(MAD);
+ DECL_THREE_SRC_INSN(LRP);
#undef DECL_THREE_SRC_INSN
/*! For all nullary functions */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index f93c528..fc30f16 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1800,6 +1800,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
Instruction MAD(Type type, Register dst, Tuple src) {
return internal::TernaryInstruction(OP_MAD, type, dst, src).convert();
}
+
+ Instruction LRP(Type type, Register dst, Tuple src) {
+ return internal::TernaryInstruction(OP_LRP, type, dst, src).convert();
+ }
// All compare functions
#define DECL_EMIT_FUNCTION(NAME) \
Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 3f3c655..16aabda 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -616,6 +616,8 @@ namespace ir {
Instruction I64MADSAT(Type type, Register dst, Tuple src);
/*! mad.type dst src */
Instruction MAD(Type type, Register dst, Tuple src);
+ /*! lrp.type dst src */
+ Instruction LRP(Type type, Register dst, Tuple src);
/*! upsample_short.type dst src */
Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
/*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 81548c9..09afa7b 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -102,6 +102,7 @@ DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
DECL_INSN(I64MADSAT, TernaryInstruction)
DECL_INSN(MAD, TernaryInstruction)
+DECL_INSN(LRP, TernaryInstruction)
DECL_INSN(IF, BranchInstruction)
DECL_INSN(ENDIF, BranchInstruction)
DECL_INSN(ELSE, BranchInstruction)
diff --git a/backend/src/libocl/tmpl/ocl_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
index b6b09b5..0b6a8fb 100644
--- a/backend/src/libocl/tmpl/ocl_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
@@ -24,6 +24,7 @@
/////////////////////////////////////////////////////////////////////////////
PURE CONST OVERLOADABLE float __gen_ocl_fmax(float a, float b);
PURE CONST OVERLOADABLE float __gen_ocl_fmin(float a, float b);
+PURE CONST OVERLOADABLE float __gen_ocl_lrp(float a, float b, float c);
OVERLOADABLE float step(float edge, float x) {
return x < edge ? 0.0 : 1.0;
@@ -36,7 +37,7 @@ OVERLOADABLE float min(float a, float b) {
return __gen_ocl_fmin(a, b);
}
OVERLOADABLE float mix(float x, float y, float a) {
- return x + (y-x)*a;
+ return __gen_ocl_lrp(a,y,x); //The lrp using a different order with mix
}
OVERLOADABLE float clamp(float v, float l, float u) {
return max(min(v, u), l);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 3d76265..863b1f2 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3541,6 +3541,7 @@ namespace gbe
case GEN_OCL_REGION:
case GEN_OCL_SIMD_ID:
case GEN_OCL_SIMD_SHUFFLE:
+ case GEN_OCL_LRP:
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
@@ -4249,6 +4250,18 @@ namespace gbe
ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_LRP:
+ {
+ const ir::Register dst = this->getRegister(&I);
+ GBE_ASSERT(AI != AE);
+ const ir::Register src0 = this->getRegister(*(AI++));
+ GBE_ASSERT(AI != AE);
+ const ir::Register src1 = this->getRegister(*(AI++));
+ GBE_ASSERT(AI != AE);
+ const ir::Register src2 = this->getRegister(*(AI++));
+ ctx.LRP(ir::TYPE_FLOAT, dst, src0, src1, src2);
+ break;
+ }
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index cabb225..221ca8e 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -170,3 +170,6 @@ DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
// printf function
DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
+
+// common function
+DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)
--
2.1.4
More information about the Beignet
mailing list