[Beignet] [PATCH 2/2] GBE: use ISA mad for mad() builtin function.

Ruiling Song ruiling.song at intel.com
Tue Nov 5 00:37:13 PST 2013


directly map mad() to ISA mad. so mad will have better performance and
less precision loss.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |    5 +++++
 backend/src/ir/context.hpp                 |    1 +
 backend/src/ir/instruction.cpp             |    3 +++
 backend/src/ir/instruction.hpp             |    2 ++
 backend/src/ir/instruction.hxx             |    1 +
 backend/src/llvm/llvm_gen_backend.cpp      |    9 +++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |    1 +
 backend/src/ocl_stdlib.tmpl.h              |    3 ++-
 8 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1f25f0e..6430ee8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2707,6 +2707,11 @@ namespace gbe
           sel.I64MADSAT(dst, src0, src1, src2, tmp);
           break;
          }
+        case OP_MAD:
+         {
+          sel.MAD(dst, src2, src0, src1);
+          break;
+         }
         default:
           NOT_IMPLEMENTED;
       }
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index 7bb6e5a..242beaa 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -149,6 +149,7 @@ namespace ir {
     }
     DECL_THREE_SRC_INSN(SEL);
     DECL_THREE_SRC_INSN(I64MADSAT);
+    DECL_THREE_SRC_INSN(MAD);
 #undef DECL_THREE_SRC_INSN
 
     /*! For all unary functions */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 9b3e699..d86c3c0 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1449,6 +1449,9 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
     return internal::TernaryInstruction(OP_I64MADSAT, type, dst, src).convert();
   }
 
+  Instruction MAD(Type type, Register dst, Tuple src) {
+    return internal::TernaryInstruction(OP_MAD, type, dst, src).convert();
+  }
   // All compare functions
 #define DECL_EMIT_FUNCTION(NAME) \
   Instruction NAME(Type type, Register dst,  Register src0, Register src1) { \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 90c819b..ae45a63 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -539,6 +539,8 @@ namespace ir {
   Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1);
   /*! i64madsat.type dst src */
   Instruction I64MADSAT(Type type, Register dst, Tuple src);
+  /*! mad.type dst src */
+  Instruction MAD(Type type, Register dst, Tuple src);
   /*! upsample_short.type dst src */
   Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
   /*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index cd60349..67dc682 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -86,3 +86,4 @@ DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction)
 DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
 DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
 DECL_INSN(I64MADSAT, TernaryInstruction)
+DECL_INSN(MAD, TernaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index b824bf9..aae52d8 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2016,6 +2016,7 @@ namespace gbe
       case GEN_OCL_UPSAMPLE_SHORT:
       case GEN_OCL_UPSAMPLE_INT:
       case GEN_OCL_UPSAMPLE_LONG:
+      case GEN_OCL_MAD:
       case GEN_OCL_SADD_SAT_CHAR:
       case GEN_OCL_SADD_SAT_SHORT:
       case GEN_OCL_SADD_SAT_INT:
@@ -2533,6 +2534,14 @@ namespace gbe
             ctx.I64MADSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1, src2);
             break;
            }
+          case GEN_OCL_MAD: {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.MAD(getType(ctx, I.getType()), dst, src0, src1, src2);
+            break;
+          }
           case GEN_OCL_HADD: {
             GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 3f44be8..71034ab 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -31,6 +31,7 @@ DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz)
 DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde)
 DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu)
 DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd)
+DECL_LLVM_GEN_FUNCTION(MAD, __gen_ocl_mad)
 
 // Barrier function
 DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index a1f365c..09c36d5 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1558,8 +1558,9 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {
 #define erf __gen_ocl_internal_erf
 #define erfc __gen_ocl_internal_erfc
 
+PURE CONST float __gen_ocl_mad(float a, float b, float c);
 INLINE_OVERLOADABLE float mad(float a, float b, float c) {
-  return a*b+c;
+  return __gen_ocl_mad(a, b, c);
 }
 
 #define DEF(TYPE1, TYPE2) \
-- 
1.7.9.5



More information about the Beignet mailing list