[Beignet] [PATCH V2 1/2] Backend: refine mix with hardware lrp function

Pan Xiuli xiuli.pan at intel.com
Tue Nov 24 19:00:03 PST 2015


EU support lrp function that simillar to mix, but only
with float, so refine only float related mix with lrp.
There will be little errors whit mix now with lrp.

V2:
Rebase the patch

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen/gen_mesa_disasm.c  |  1 +
 backend/src/backend/gen_context.cpp        |  1 +
 backend/src/backend/gen_encoder.cpp        |  1 +
 backend/src/backend/gen_encoder.hpp        |  1 +
 backend/src/backend/gen_insn_selection.cpp |  6 ++++++
 backend/src/backend/gen_insn_selection.hxx |  1 +
 backend/src/ir/context.hpp                 |  1 +
 backend/src/ir/instruction.cpp             |  4 ++++
 backend/src/ir/instruction.hpp             |  2 ++
 backend/src/ir/instruction.hxx             |  1 +
 backend/src/libocl/tmpl/ocl_common.tmpl.cl |  3 ++-
 backend/src/llvm/llvm_gen_backend.cpp      | 13 +++++++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  3 +++
 13 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 3198da7..94bae48 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -77,6 +77,7 @@ static const struct {
   [GEN_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+  [GEN_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
   [GEN_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 43fa7fa..09a41aa 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1795,6 +1795,7 @@ namespace gbe
     const GenRegister src2 = ra->genReg(insn.src(2));
     switch (insn.opcode) {
       case SEL_OP_MAD:  p->MAD(dst, src0, src1, src2); break;
+      case SEL_OP_LRP:  p->LRP(dst, src0, src1, src2); break;
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 7c4357a..d82b75b 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -767,6 +767,7 @@ namespace gbe
   ALU2(PLN)
   ALU2(MACH)
   ALU3(MAD)
+  ALU3(LRP)
  // ALU2(BRC)
  // ALU1(ENDIF)
  //  ALU1(IF)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index e4f5ff4..aea7a30 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -125,6 +125,7 @@ namespace gbe
     ALU2(LINE)
     ALU2(PLN)
     ALU3(MAD)
+    ALU3(LRP)
     ALU2(BRC)
     ALU1(BRD)
 #undef ALU1
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ed7514c..9e60d34 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -547,6 +547,7 @@ namespace gbe
     ALU2(MACH)
     ALU1(LZD)
     ALU3(MAD)
+    ALU3(LRP)
     ALU2WithTemp(MUL_HI)
     ALU1(FBH)
     ALU1(FBL)
@@ -5214,6 +5215,11 @@ namespace gbe
           sel.MAD(dst, src2, src0, src1);
           break;
          }
+        case OP_LRP:
+         {
+          sel.LRP(dst, src0, src1, src2);
+          break;
+         }
         default:
           NOT_IMPLEMENTED;
       }
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index bc09522..5611a4f 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -43,6 +43,7 @@ DECL_SELECTION_IR(CMP, CompareInstruction)
 DECL_SELECTION_IR(I64CMP, I64CompareInstruction)
 DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
 DECL_SELECTION_IR(MAD, TernaryInstruction)
+DECL_SELECTION_IR(LRP, TernaryInstruction)
 DECL_SELECTION_IR(JMPI, JumpInstruction)
 DECL_SELECTION_IR(EOT, EotInstruction)
 DECL_SELECTION_IR(INDIRECT_MOVE, IndirectMoveInstruction)
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index ab0d8b5..b2ea2fb 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -175,6 +175,7 @@ namespace ir {
     DECL_THREE_SRC_INSN(SEL);
     DECL_THREE_SRC_INSN(I64MADSAT);
     DECL_THREE_SRC_INSN(MAD);
+    DECL_THREE_SRC_INSN(LRP);
 #undef DECL_THREE_SRC_INSN
 
     /*! For all nullary functions */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index c7facfb..f9b4992 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -2024,6 +2024,10 @@ DECL_MEM_FN(MemInstruction, void,     setBtiReg(Register reg), setBtiReg(reg))
   Instruction MAD(Type type, Register dst, Tuple src) {
     return internal::TernaryInstruction(OP_MAD, type, dst, src).convert();
   }
+
+  Instruction LRP(Type type, Register dst, Tuple src) {
+    return internal::TernaryInstruction(OP_LRP, type, dst, src).convert();
+  }
   // All compare functions
 #define DECL_EMIT_FUNCTION(NAME) \
   Instruction NAME(Type type, Register dst,  Register src0, Register src1) { \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 76ffd77..9c1b076 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -651,6 +651,8 @@ namespace ir {
   Instruction I64MADSAT(Type type, Register dst, Tuple src);
   /*! mad.type dst src */
   Instruction MAD(Type type, Register dst, Tuple src);
+  /*! lrp.type dst src */
+  Instruction LRP(Type type, Register dst, Tuple src);
   /*! upsample_short.type dst src */
   Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
   /*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index efdd4c5..e904ead 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -103,6 +103,7 @@ DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
 DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
 DECL_INSN(I64MADSAT, TernaryInstruction)
 DECL_INSN(MAD, TernaryInstruction)
+DECL_INSN(LRP, TernaryInstruction)
 DECL_INSN(IF, BranchInstruction)
 DECL_INSN(ENDIF, BranchInstruction)
 DECL_INSN(ELSE, BranchInstruction)
diff --git a/backend/src/libocl/tmpl/ocl_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
index b6b09b5..0b6a8fb 100644
--- a/backend/src/libocl/tmpl/ocl_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
@@ -24,6 +24,7 @@
 /////////////////////////////////////////////////////////////////////////////
 PURE CONST OVERLOADABLE float __gen_ocl_fmax(float a, float b);
 PURE CONST OVERLOADABLE float __gen_ocl_fmin(float a, float b);
+PURE CONST OVERLOADABLE float __gen_ocl_lrp(float a, float b, float c);
 
 OVERLOADABLE float step(float edge, float x) {
   return x < edge ? 0.0 : 1.0;
@@ -36,7 +37,7 @@ OVERLOADABLE float min(float a, float b) {
   return __gen_ocl_fmin(a, b);
 }
 OVERLOADABLE float mix(float x, float y, float a) {
-  return x + (y-x)*a;
+  return __gen_ocl_lrp(a,y,x); //The lrp using a different order with mix
 }
 OVERLOADABLE float clamp(float v, float l, float u) {
   return max(min(v, u), l);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index a0b2262..2b0e13c 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3596,6 +3596,7 @@ namespace gbe
       case GEN_OCL_SIMD_ID:
       case GEN_OCL_SIMD_SHUFFLE:
       case GEN_OCL_VME:
+      case GEN_OCL_LRP:
         this->newRegister(&I);
         break;
       case GEN_OCL_PRINTF:
@@ -4400,6 +4401,18 @@ namespace gbe
             ctx.WAIT();
             break;
           }
+          case GEN_OCL_LRP:
+          {
+            const ir::Register dst  = this->getRegister(&I);
+            GBE_ASSERT(AI != AE);
+            const ir::Register src0 = this->getRegister(*(AI++));
+            GBE_ASSERT(AI != AE);
+            const ir::Register src1 = this->getRegister(*(AI++));
+            GBE_ASSERT(AI != AE);
+            const ir::Register src2 = this->getRegister(*(AI++));
+            ctx.LRP(ir::TYPE_FLOAT, dst, src0, src1, src2);
+            break;
+          }
           default: break;
         }
       }
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index d0e3614..b126669 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -180,3 +180,6 @@ DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling)
 
 // debug wait function
 DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait)
+
+// common function
+DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)
-- 
2.1.4



More information about the Beignet mailing list