[Beignet] [PATCH version 3] add 64-bit version of "mad_sat"
Homer Hsing
homer.xing at intel.com
Sat Sep 21 23:18:01 PDT 2013
tested by piglit:
piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-mad_sat-1.0.generated.cl
piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-mad_sat-1.0.generated.cl
version 2:
temp flag register is allocated by RA
version 3:
divide subnr of flag register by typesize
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 127 +++++++++++++++++++++
backend/src/backend/gen_context.hpp | 1 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 43 +++++++
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/ir/context.hpp | 1 +
backend/src/ir/instruction.cpp | 56 +++++++++
backend/src/ir/instruction.hpp | 9 ++
backend/src/ir/instruction.hxx | 1 +
backend/src/llvm/llvm_gen_backend.cpp | 20 ++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 3 +
backend/src/ocl_stdlib.tmpl.h | 7 +-
12 files changed, 268 insertions(+), 2 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index b064c94..fa635de 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -525,6 +525,133 @@ namespace gbe
storeBottomHalf(dest, f);
}
+ void GenContext::emitI64MADSATInstruction(const SelectionInstruction &insn) {
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister z = ra->genReg(insn.src(2));
+ GenRegister a = ra->genReg(insn.dst(1));
+ GenRegister b = ra->genReg(insn.dst(2));
+ GenRegister c = ra->genReg(insn.dst(3));
+ GenRegister d = ra->genReg(insn.dst(4));
+ GenRegister e = ra->genReg(insn.dst(5));
+ GenRegister f = ra->genReg(insn.dst(6));
+ GenRegister g = ra->genReg(insn.dst(7));
+ GenRegister h = ra->genReg(insn.dst(8));
+ GenRegister i = ra->genReg(insn.dst(9));
+ GenRegister flagReg = ra->genReg(insn.dst(10));
+ GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1);
+ loadTopHalf(a, x);
+ loadBottomHalf(b, x);
+ loadTopHalf(c, y);
+ loadBottomHalf(d, y);
+ if(x.type == GEN_TYPE_UL) {
+ I64FullMult(e, f, g, h, a, b, c, d);
+ loadTopHalf(c, z);
+ loadBottomHalf(d, z);
+ addWithCarry(h, h, d);
+ addWithCarry(g, g, d);
+ addWithCarry(f, f, d);
+ p->ADD(e, e, d);
+ addWithCarry(g, g, c);
+ addWithCarry(f, f, c);
+ p->ADD(e, e, c);
+ p->OR(a, e, f);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_NZ, a, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(g, GenRegister::immd(-1));
+ p->MOV(h, GenRegister::immd(-1));
+ p->pop();
+ } else {
+ I64ABS(e, a, b, i, flagReg);
+ I64ABS(f, c, d, i, flagReg);
+ p->XOR(i, e, f);
+ I64FullMult(e, f, g, h, a, b, c, d);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_NZ, i, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->NOT(e, e);
+ p->NOT(f, f);
+ p->NOT(g, g);
+ p->NOT(h, h);
+ p->MOV(i, one);
+ addWithCarry(h, h, i);
+ addWithCarry(g, g, i);
+ addWithCarry(f, f, i);
+ p->ADD(e, e, i);
+ p->pop();
+ loadTopHalf(c, z);
+ loadBottomHalf(d, z);
+ p->ASR(GenRegister::retype(b, GEN_TYPE_D), GenRegister::retype(c, GEN_TYPE_D), GenRegister::immd(31));
+ p->MOV(a, b);
+ addWithCarry(h, h, d);
+ addWithCarry(g, g, d);
+ addWithCarry(f, f, d);
+ p->ADD(e, e, d);
+ addWithCarry(g, g, c);
+ addWithCarry(f, f, c);
+ p->ADD(e, e, c);
+ addWithCarry(f, f, b);
+ p->ADD(e, e, b);
+ p->ADD(e, e, a);
+ p->MOV(b, zero);
+ p->push();
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_NZ, e, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_NZ, f, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_G, g, GenRegister::immud(0x7FFFFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->SHR(a, e, GenRegister::immud(31));
+ p->CMP(GEN_CONDITIONAL_NZ, a, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, zero);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_NZ, b, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(g, GenRegister::immud(0x7FFFFFFF));
+ p->MOV(h, GenRegister::immud(0xFFFFFFFFu));
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->MOV(b, zero);
+ p->CMP(GEN_CONDITIONAL_NEQ, e, GenRegister::immud(0xFFFFFFFFu));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_NEQ, f, GenRegister::immud(0xFFFFFFFFu));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_LE, g, GenRegister::immud(0x7FFFFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_Z, a, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(b, zero);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_NZ, b, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(g, GenRegister::immud(0x80000000u));
+ p->MOV(h, zero);
+ p->pop();
+ }
+ storeTopHalf(dest, g);
+ storeBottomHalf(dest, h);
+ }
+
void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) {
GenRegister dest = ra->genReg(insn.dst(0));
GenRegister x = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6fe71c5..c9b74eb 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -101,6 +101,7 @@ namespace gbe
void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
void emitI64MULHIInstruction(const SelectionInstruction &insn);
+ void emitI64MADSATInstruction(const SelectionInstruction &insn);
void emitI64HADDInstruction(const SelectionInstruction &insn);
void emitI64RHADDInstruction(const SelectionInstruction &insn);
void emitI64ShiftInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index a420cfc..46537c6 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -10,6 +10,7 @@ DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64RHADD, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64MULHI, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64MADSAT, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 2791a0e..0691a58 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -471,6 +471,8 @@ namespace gbe
#undef I64Shift
/*! Convert 64-bit integer to 32-bit float */
void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]);
+ /*! Saturated 64bit x*y + z */
+ void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]);
/*! High 64bit of x*y */
void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]);
/*! (x+y)>>1 without mod. overflow */
@@ -1089,6 +1091,16 @@ namespace gbe
insn->dst(i + 1) = tmp[i];
}
+ void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 11, 3);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->src(2) = src2;
+ for(int i = 0; i < 10; i ++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
void Selection::Opaque::I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_I64_MUL_HI, 11, 2);
insn->dst(0) = dst;
@@ -2586,6 +2598,36 @@ namespace gbe
}
};
+ DECL_PATTERN(TernaryInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn) const {
+ using namespace ir;
+ const Type type = insn.getType();
+ const GenRegister dst = sel.selReg(insn.getDst(0), type),
+ src0 = sel.selReg(insn.getSrc(0), type),
+ src1 = sel.selReg(insn.getSrc(1), type),
+ src2 = sel.selReg(insn.getSrc(2), type);
+ switch(insn.getOpcode()) {
+ case OP_I64MADSAT:
+ {
+ GenRegister tmp[10];
+ for(int i=0; i<9; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp[i].type = GEN_TYPE_UD;
+ }
+ tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL));
+ sel.I64MADSAT(dst, src0, src1, src2, tmp);
+ break;
+ }
+ default:
+ NOT_IMPLEMENTED;
+ }
+ return true;
+ }
+
+ DECL_CTOR(TernaryInstruction, 1, 1);
+ };
+
/*! Label instruction pattern */
DECL_PATTERN(LabelInstruction)
{
@@ -2876,6 +2918,7 @@ namespace gbe
this->insert<CompareInstructionPattern>();
this->insert<ConvertInstructionPattern>();
this->insert<AtomicInstructionPattern>();
+ this->insert<TernaryInstructionPattern>();
this->insert<LabelInstructionPattern>();
this->insert<BranchInstructionPattern>();
this->insert<Int32x32MulInstructionPattern>();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 86d1756..63ad810 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -71,3 +71,4 @@ DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction)
+DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction)
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index c286f1d..a7337e6 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -142,6 +142,7 @@ namespace ir {
this->NAME(type, dst, index); \
}
DECL_THREE_SRC_INSN(SEL);
+ DECL_THREE_SRC_INSN(I64MADSAT);
#undef DECL_THREE_SRC_INSN
/*! For all unary functions */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 8130b8b..2af2de3 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -173,6 +173,30 @@ namespace ir {
}
};
+ class ALIGNED_INSTRUCTION TernaryInstruction :
+ public BasePolicy,
+ public NDstPolicy<TernaryInstruction, 1>,
+ public TupleSrcPolicy<TernaryInstruction>
+ {
+ public:
+ TernaryInstruction(Opcode opcode,
+ Type type,
+ Register dst,
+ Tuple src) {
+ this->opcode = opcode;
+ this->type = type;
+ this->dst[0] = dst;
+ this->src = src;
+ }
+ Type getType(void) const { return type; }
+ bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Type type;
+ Register dst[1];
+ Tuple src;
+ static const uint32_t srcNum = 3;
+ };
+
/*! Three sources mean we need a tuple to encode it */
class ALIGNED_INSTRUCTION SelectInstruction :
public BasePolicy,
@@ -788,6 +812,25 @@ namespace ir {
return true;
}
+ INLINE bool TernaryInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const RegisterFamily family = getFamily(this->type);
+ if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(src + 3u > fn.tupleNum())) {
+ whyNot = "Out-of-bound index for ternary instruction";
+ return false;
+ }
+ for (uint32_t srcID = 0; srcID < 3; ++srcID) {
+ const Register regID = fn.getRegister(src, srcID);
+ if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false))
+ return false;
+ }
+ return true;
+ }
+
/*! Loads and stores follow the same restrictions */
template <typename T>
INLINE bool wellFormedLoadStore(const T &insn, const Function &fn, std::string &whyNot)
@@ -934,6 +977,10 @@ namespace ir {
ternaryOrSelectOut(*this, out, fn);
}
+ INLINE void TernaryInstruction::out(std::ostream &out, const Function &fn) const {
+ ternaryOrSelectOut(*this, out, fn);
+ }
+
INLINE void AtomicInstruction::out(std::ostream &out, const Function &fn) const {
this->outOpcode(out);
out << "." << addrSpace;
@@ -1077,6 +1124,10 @@ START_INTROSPECTION(SelectInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(SelectInstruction)
+START_INTROSPECTION(TernaryInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(TernaryInstruction)
+
START_INTROSPECTION(BranchInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(BranchInstruction)
@@ -1259,6 +1310,7 @@ DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType())
DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType())
DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes())
DECL_MEM_FN(SelectInstruction, Type, getType(void), getType())
+DECL_MEM_FN(TernaryInstruction, Type, getType(void), getType())
DECL_MEM_FN(CompareInstruction, Type, getType(void), getType())
DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType())
DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType())
@@ -1359,6 +1411,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
return internal::SelectInstruction(type, dst, src).convert();
}
+ Instruction I64MADSAT(Type type, Register dst, Tuple src) {
+ return internal::TernaryInstruction(OP_I64MADSAT, type, dst, src).convert();
+ }
+
// All compare functions
#define DECL_EMIT_FUNCTION(NAME) \
Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index f165595..0f7df58 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -206,6 +206,13 @@ namespace ir {
static bool isClassOf(const Instruction &insn);
};
+ /*! Ternary instructions are typed. dst and sources share the same type */
+ class TernaryInstruction : public Instruction {
+ public:
+ Type getType(void) const;
+ static bool isClassOf(const Instruction &insn);
+ };
+
/*! Select instructions writes src0 to dst if cond is true. Otherwise, it
* writes src1
*/
@@ -521,6 +528,8 @@ namespace ir {
Instruction MUL_HI(Type type, Register dst, Register src0, Register src1);
/*! i64_mul_hi.type dst src */
Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1);
+ /*! i64madsat.type dst src */
+ Instruction I64MADSAT(Type type, Register dst, Tuple src);
/*! upsample_short.type dst src */
Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
/*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 135dc82..f3f2db6 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -83,3 +83,4 @@ DECL_INSN(I64RHADD, BinaryInstruction)
DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction)
DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
+DECL_INSN(I64MADSAT, TernaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index acb9848..fd42dca 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1903,6 +1903,8 @@ namespace gbe
case GEN_OCL_RHADD:
case GEN_OCL_I64HADD:
case GEN_OCL_I64RHADD:
+ case GEN_OCL_I64_MAD_SAT:
+ case GEN_OCL_I64_MAD_SATU:
this->newRegister(&I);
break;
default:
@@ -2341,6 +2343,24 @@ namespace gbe
ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_I64_MAD_SAT:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.I64MADSAT(getType(ctx, I.getType()), dst, src0, src1, src2);
+ break;
+ }
+ case GEN_OCL_I64_MAD_SATU:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.I64MADSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1, src2);
+ break;
+ }
case GEN_OCL_HADD: {
GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 7b5a2d3..5ea879c 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -125,6 +125,9 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt)
DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj)
DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
+DECL_LLVM_GEN_FUNCTION(I64_MAD_SAT, _Z17__gen_ocl_mad_satlll)
+DECL_LLVM_GEN_FUNCTION(I64_MAD_SATU, _Z17__gen_ocl_mad_satmmm)
+
// integer built-in functions
DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii)
DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index f21ba4f..abbb1a6 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -423,12 +423,15 @@ INLINE_OVERLOADABLE uint mad_sat(uint a, uint b, uint c) {
return (uint)x;
}
+OVERLOADABLE long __gen_ocl_mad_sat(long a, long b, long c);
+OVERLOADABLE ulong __gen_ocl_mad_sat(ulong a, ulong b, ulong c);
+
INLINE_OVERLOADABLE long mad_sat(long a, long b, long c) {
- return 0;
+ return __gen_ocl_mad_sat(a, b, c);
}
INLINE_OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c) {
- return 0;
+ return __gen_ocl_mad_sat(a, b, c);
}
INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }
--
1.8.1.2
More information about the Beignet
mailing list