[Beignet] [PATCH version 2] add 64-bit version of "mad_sat"

Homer Hsing homer.xing at intel.com
Sat Sep 21 19:48:33 PDT 2013


tested by piglit:
   piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-mad_sat-1.0.generated.cl
   piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-mad_sat-1.0.generated.cl

version 2:
   temp flag register is allocated by RA

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp                | 127 +++++++++++++++++++++
 backend/src/backend/gen_context.hpp                |   1 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |   1 +
 backend/src/backend/gen_insn_selection.cpp         |  43 +++++++
 backend/src/backend/gen_insn_selection.hxx         |   1 +
 backend/src/ir/context.hpp                         |   1 +
 backend/src/ir/instruction.cpp                     |  56 +++++++++
 backend/src/ir/instruction.hpp                     |   9 ++
 backend/src/ir/instruction.hxx                     |   1 +
 backend/src/llvm/llvm_gen_backend.cpp              |  20 ++++
 backend/src/llvm/llvm_gen_ocl_function.hxx         |   3 +
 backend/src/ocl_stdlib.tmpl.h                      |   7 +-
 12 files changed, 268 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index ea03a45..b7c5a19 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -525,6 +525,133 @@ namespace gbe
     storeBottomHalf(dest, f);
   }
 
+  void GenContext::emitI64MADSATInstruction(const SelectionInstruction &insn) {
+    GenRegister dest = ra->genReg(insn.dst(0));
+    GenRegister x = ra->genReg(insn.src(0));
+    GenRegister y = ra->genReg(insn.src(1));
+    GenRegister z = ra->genReg(insn.src(2));
+    GenRegister a = ra->genReg(insn.dst(1));
+    GenRegister b = ra->genReg(insn.dst(2));
+    GenRegister c = ra->genReg(insn.dst(3));
+    GenRegister d = ra->genReg(insn.dst(4));
+    GenRegister e = ra->genReg(insn.dst(5));
+    GenRegister f = ra->genReg(insn.dst(6));
+    GenRegister g = ra->genReg(insn.dst(7));
+    GenRegister h = ra->genReg(insn.dst(8));
+    GenRegister i = ra->genReg(insn.dst(9));
+    GenRegister flagReg = ra->genReg(insn.dst(10));
+    GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1);
+    loadTopHalf(a, x);
+    loadBottomHalf(b, x);
+    loadTopHalf(c, y);
+    loadBottomHalf(d, y);
+    if(x.type == GEN_TYPE_UL) {
+      I64FullMult(e, f, g, h, a, b, c, d);
+      loadTopHalf(c, z);
+      loadBottomHalf(d, z);
+      addWithCarry(h, h, d);
+      addWithCarry(g, g, d);
+      addWithCarry(f, f, d);
+      p->ADD(e, e, d);
+      addWithCarry(g, g, c);
+      addWithCarry(f, f, c);
+      p->ADD(e, e, c);
+      p->OR(a, e, f);
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.subnr);
+      p->CMP(GEN_CONDITIONAL_NZ, a, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(g, GenRegister::immd(-1));
+      p->MOV(h, GenRegister::immd(-1));
+      p->pop();
+    } else {
+      I64ABS(e, a, b, i, flagReg);
+      I64ABS(f, c, d, i, flagReg);
+      p->XOR(i, e, f);
+      I64FullMult(e, f, g, h, a, b, c, d);
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.subnr);
+      p->CMP(GEN_CONDITIONAL_NZ, i, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->NOT(e, e);
+      p->NOT(f, f);
+      p->NOT(g, g);
+      p->NOT(h, h);
+      p->MOV(i, one);
+      addWithCarry(h, h, i);
+      addWithCarry(g, g, i);
+      addWithCarry(f, f, i);
+      p->ADD(e, e, i);
+      p->pop();
+      loadTopHalf(c, z);
+      loadBottomHalf(d, z);
+      p->ASR(GenRegister::retype(b, GEN_TYPE_D), GenRegister::retype(c, GEN_TYPE_D), GenRegister::immd(31));
+      p->MOV(a, b);
+      addWithCarry(h, h, d);
+      addWithCarry(g, g, d);
+      addWithCarry(f, f, d);
+      p->ADD(e, e, d);
+      addWithCarry(g, g, c);
+      addWithCarry(f, f, c);
+      p->ADD(e, e, c);
+      addWithCarry(f, f, b);
+      p->ADD(e, e, b);
+      p->ADD(e, e, a);
+      p->MOV(b, zero);
+      p->push();
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.subnr);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_NZ, e, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_NZ, f, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_G, g, GenRegister::immud(0x7FFFFFFF));
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->SHR(a, e, GenRegister::immud(31));
+      p->CMP(GEN_CONDITIONAL_NZ, a, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, zero);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_NZ, b, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(g, GenRegister::immud(0x7FFFFFFF));
+      p->MOV(h, GenRegister::immud(0xFFFFFFFFu));
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->MOV(b, zero);
+      p->CMP(GEN_CONDITIONAL_NEQ, e, GenRegister::immud(0xFFFFFFFFu));
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_NEQ, f, GenRegister::immud(0xFFFFFFFFu));
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_LE, g, GenRegister::immud(0x7FFFFFFF));
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_Z, a, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(b, zero);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_NZ, b, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(g, GenRegister::immud(0x80000000u));
+      p->MOV(h, zero);
+      p->pop();
+    }
+    storeTopHalf(dest, g);
+    storeBottomHalf(dest, h);
+  }
+
   void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) {
     GenRegister dest = ra->genReg(insn.dst(0));
     GenRegister x = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6fe71c5..c9b74eb 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -101,6 +101,7 @@ namespace gbe
     void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     void emitTernaryInstruction(const SelectionInstruction &insn);
     void emitI64MULHIInstruction(const SelectionInstruction &insn);
+    void emitI64MADSATInstruction(const SelectionInstruction &insn);
     void emitI64HADDInstruction(const SelectionInstruction &insn);
     void emitI64RHADDInstruction(const SelectionInstruction &insn);
     void emitI64ShiftInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index a420cfc..46537c6 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -10,6 +10,7 @@ DECL_GEN7_SCHEDULE(I64HADD,         20,        4,        2)
 DECL_GEN7_SCHEDULE(I64RHADD,        20,        4,        2)
 DECL_GEN7_SCHEDULE(I64ToFloat,      20,        4,        2)
 DECL_GEN7_SCHEDULE(I64MULHI,        20,        4,        2)
+DECL_GEN7_SCHEDULE(I64MADSAT,       20,        4,        2)
 DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
 DECL_GEN7_SCHEDULE(I64Compare,      20,        4,        2)
 DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 2791a0e..0691a58 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -471,6 +471,8 @@ namespace gbe
 #undef I64Shift
     /*! Convert 64-bit integer to 32-bit float */
     void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]);
+    /*! Saturated 64bit x*y + z */
+    void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]);
     /*! High 64bit of x*y */
     void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]);
     /*! (x+y)>>1 without mod. overflow */
@@ -1089,6 +1091,16 @@ namespace gbe
       insn->dst(i + 1) = tmp[i];
   }
 
+  void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 11, 3);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    insn->src(2) = src2;
+    for(int i = 0; i < 10; i ++)
+      insn->dst(i + 1) = tmp[i];
+  }
+
   void Selection::Opaque::I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_I64_MUL_HI, 11, 2);
     insn->dst(0) = dst;
@@ -2586,6 +2598,36 @@ namespace gbe
     }
   };
 
+  DECL_PATTERN(TernaryInstruction)
+   {
+    INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn) const {
+      using namespace ir;
+      const Type type = insn.getType();
+      const GenRegister dst = sel.selReg(insn.getDst(0), type),
+                        src0 = sel.selReg(insn.getSrc(0), type),
+                        src1 = sel.selReg(insn.getSrc(1), type),
+                        src2 = sel.selReg(insn.getSrc(2), type);
+      switch(insn.getOpcode()) {
+        case OP_I64MADSAT:
+         {
+          GenRegister tmp[10];
+          for(int i=0; i<9; i++) {
+            tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+            tmp[i].type = GEN_TYPE_UD;
+          }
+          tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL));
+          sel.I64MADSAT(dst, src0, src1, src2, tmp);
+          break;
+         }
+        default:
+          NOT_IMPLEMENTED;
+      }
+      return true;
+    }
+
+    DECL_CTOR(TernaryInstruction, 1, 1);
+   };
+
   /*! Label instruction pattern */
   DECL_PATTERN(LabelInstruction)
   {
@@ -2876,6 +2918,7 @@ namespace gbe
     this->insert<CompareInstructionPattern>();
     this->insert<ConvertInstructionPattern>();
     this->insert<AtomicInstructionPattern>();
+    this->insert<TernaryInstructionPattern>();
     this->insert<LabelInstructionPattern>();
     this->insert<BranchInstructionPattern>();
     this->insert<Int32x32MulInstructionPattern>();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 86d1756..63ad810 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -71,3 +71,4 @@ DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
 DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
 DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
 DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction)
+DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction)
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index c286f1d..a7337e6 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -142,6 +142,7 @@ namespace ir {
       this->NAME(type, dst, index); \
     }
     DECL_THREE_SRC_INSN(SEL);
+    DECL_THREE_SRC_INSN(I64MADSAT);
 #undef DECL_THREE_SRC_INSN
 
     /*! For all unary functions */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 8130b8b..2af2de3 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -173,6 +173,30 @@ namespace ir {
       }
     };
 
+    class ALIGNED_INSTRUCTION TernaryInstruction :
+      public BasePolicy,
+      public NDstPolicy<TernaryInstruction, 1>,
+      public TupleSrcPolicy<TernaryInstruction>
+    {
+     public:
+      TernaryInstruction(Opcode opcode,
+                         Type type,
+                         Register dst,
+                         Tuple src) {
+        this->opcode = opcode;
+        this->type = type;
+        this->dst[0] = dst;
+        this->src = src;
+      }
+      Type getType(void) const { return type; }
+      bool wellFormed(const Function &fn, std::string &whyNot) const;
+      INLINE void out(std::ostream &out, const Function &fn) const;
+      Type type;
+      Register dst[1];
+      Tuple src;
+      static const uint32_t srcNum = 3;
+    };
+
     /*! Three sources mean we need a tuple to encode it */
     class ALIGNED_INSTRUCTION SelectInstruction :
       public BasePolicy,
@@ -788,6 +812,25 @@ namespace ir {
       return true;
     }
 
+    INLINE bool TernaryInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+    {
+      const RegisterFamily family = getFamily(this->type);
+      if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false))
+        return false;
+      if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
+        return false;
+      if (UNLIKELY(src + 3u > fn.tupleNum())) {
+        whyNot = "Out-of-bound index for ternary instruction";
+        return false;
+      }
+      for (uint32_t srcID = 0; srcID < 3; ++srcID) {
+        const Register regID = fn.getRegister(src, srcID);
+        if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false))
+          return false;
+      }
+      return true;
+    }
+
     /*! Loads and stores follow the same restrictions */
     template <typename T>
     INLINE bool wellFormedLoadStore(const T &insn, const Function &fn, std::string &whyNot)
@@ -934,6 +977,10 @@ namespace ir {
       ternaryOrSelectOut(*this, out, fn);
     }
 
+    INLINE void TernaryInstruction::out(std::ostream &out, const Function &fn) const {
+      ternaryOrSelectOut(*this, out, fn);
+    }
+
     INLINE void AtomicInstruction::out(std::ostream &out, const Function &fn) const {
       this->outOpcode(out);
       out << "." << addrSpace;
@@ -1077,6 +1124,10 @@ START_INTROSPECTION(SelectInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(SelectInstruction)
 
+START_INTROSPECTION(TernaryInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(TernaryInstruction)
+
 START_INTROSPECTION(BranchInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(BranchInstruction)
@@ -1259,6 +1310,7 @@ DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType())
 DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType())
 DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes())
 DECL_MEM_FN(SelectInstruction, Type, getType(void), getType())
+DECL_MEM_FN(TernaryInstruction, Type, getType(void), getType())
 DECL_MEM_FN(CompareInstruction, Type, getType(void), getType())
 DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType())
 DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType())
@@ -1359,6 +1411,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
     return internal::SelectInstruction(type, dst, src).convert();
   }
 
+  Instruction I64MADSAT(Type type, Register dst, Tuple src) {
+    return internal::TernaryInstruction(OP_I64MADSAT, type, dst, src).convert();
+  }
+
   // All compare functions
 #define DECL_EMIT_FUNCTION(NAME) \
   Instruction NAME(Type type, Register dst,  Register src0, Register src1) { \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index f165595..0f7df58 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -206,6 +206,13 @@ namespace ir {
     static bool isClassOf(const Instruction &insn);
   };
 
+  /*! Ternary instructions are typed. dst and sources share the same type */
+  class TernaryInstruction : public Instruction {
+   public:
+    Type getType(void) const;
+    static bool isClassOf(const Instruction &insn);
+  };
+
   /*! Select instructions writes src0 to dst if cond is true. Otherwise, it
    *  writes src1
    */
@@ -521,6 +528,8 @@ namespace ir {
   Instruction MUL_HI(Type type, Register dst, Register src0, Register src1);
   /*! i64_mul_hi.type dst src */
   Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1);
+  /*! i64madsat.type dst src */
+  Instruction I64MADSAT(Type type, Register dst, Tuple src);
   /*! upsample_short.type dst src */
   Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
   /*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 135dc82..f3f2db6 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -83,3 +83,4 @@ DECL_INSN(I64RHADD, BinaryInstruction)
 DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction)
 DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
 DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
+DECL_INSN(I64MADSAT, TernaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index acb9848..fd42dca 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1903,6 +1903,8 @@ namespace gbe
       case GEN_OCL_RHADD:
       case GEN_OCL_I64HADD:
       case GEN_OCL_I64RHADD:
+      case GEN_OCL_I64_MAD_SAT:
+      case GEN_OCL_I64_MAD_SATU:
         this->newRegister(&I);
         break;
       default:
@@ -2341,6 +2343,24 @@ namespace gbe
             ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
             break;
           }
+          case GEN_OCL_I64_MAD_SAT:
+           {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.I64MADSAT(getType(ctx, I.getType()), dst, src0, src1, src2);
+            break;
+           }
+          case GEN_OCL_I64_MAD_SATU:
+           {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.I64MADSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1, src2);
+            break;
+           }
           case GEN_OCL_HADD: {
             GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 7b5a2d3..5ea879c 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -125,6 +125,9 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt)
 DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj)
 DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
 
+DECL_LLVM_GEN_FUNCTION(I64_MAD_SAT, _Z17__gen_ocl_mad_satlll)
+DECL_LLVM_GEN_FUNCTION(I64_MAD_SATU, _Z17__gen_ocl_mad_satmmm)
+
 // integer built-in functions
 DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii)
 DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index f21ba4f..abbb1a6 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -423,12 +423,15 @@ INLINE_OVERLOADABLE uint mad_sat(uint a, uint b, uint c) {
   return (uint)x;
 }
 
+OVERLOADABLE long __gen_ocl_mad_sat(long a, long b, long c);
+OVERLOADABLE ulong __gen_ocl_mad_sat(ulong a, ulong b, ulong c);
+
 INLINE_OVERLOADABLE long mad_sat(long a, long b, long c) {
-  return 0;
+  return __gen_ocl_mad_sat(a, b, c);
 }
 
 INLINE_OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c) {
-  return 0;
+  return __gen_ocl_mad_sat(a, b, c);
 }
 
 INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }
-- 
1.8.1.2



More information about the Beignet mailing list