[Beignet] [PATCH v3 3/4] support built-in functions "mul_hi", "mad_hi"

Homer Hsing homer.xing at intel.com
Tue Jul 2 00:51:12 PDT 2013


Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp        | 28 ++++++++++++++++
 backend/src/backend/gen_insn_selection.cpp |  6 ++++
 backend/src/backend/gen_insn_selection.hxx |  1 +
 backend/src/ir/instruction.cpp             |  1 +
 backend/src/ir/instruction.hpp             |  2 ++
 backend/src/ir/instruction.hxx             |  1 +
 backend/src/llvm/llvm_gen_backend.cpp      | 18 +++++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  2 ++
 backend/src/ocl_stdlib.h                   | 52 ++++++++++++++++++++++++++++++
 9 files changed, 111 insertions(+)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 62c6378..acd9c1d 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -179,6 +179,34 @@ namespace gbe
     const GenRegister src1 = ra->genReg(insn.src(1));
     const GenRegister src2 = ra->genReg(insn.src(2));
     switch (insn.opcode) {
+      case SEL_OP_MUL_HI:
+       {
+        int w = p->curr.execWidth;
+        p->push();
+        p->curr.execWidth = 8;
+        p->curr.quarterControl = 0;
+        p->push();
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
+        p->curr.accWrEnable = 1;
+        p->MACH(src2, src0, src1);
+        p->curr.accWrEnable = 0;
+        p->pop();
+        p->MOV(dst, src2);
+        if (w == 16) {
+          p->push();
+          p->curr.predicate = GEN_PREDICATE_NONE;
+          p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+          p->curr.accWrEnable = 1;
+          p->MACH(src2, GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+          p->curr.accWrEnable = 0;
+          p->pop();
+          p->curr.quarterControl = 1;
+          p->MOV(GenRegister::Qn(dst, 1), src2);
+        }
+        p->pop();
+        break;
+       }
       case SEL_OP_MAD:  p->MAD(dst, src0, src1, src2); break;
       case SEL_OP_HADD:
        {
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index f356b27..bbe392d 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -433,6 +433,7 @@ namespace gbe
     ALU2(MACH)
     ALU1(LZD)
     ALU3(MAD)
+    ALU3(MUL_HI)
     ALU1(FBH)
     ALU1(FBL)
     ALU3(HADD)
@@ -1425,6 +1426,11 @@ namespace gbe
         case OP_SHL: sel.SHL(dst, src0, src1); break;
         case OP_SHR: sel.SHR(dst, src0, src1); break;
         case OP_ASR: sel.ASR(dst, src0, src1); break;
+        case OP_MUL_HI: {
+            GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_UD);
+            sel.MUL_HI(dst, src0, src1, temp);
+            break;
+          }
         case OP_MUL:
           if (type == TYPE_U32 || type == TYPE_S32) {
             sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 8a81022..c85d328 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -42,6 +42,7 @@ DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
 DECL_SELECTION_IR(SAMPLE, SampleInstruction)
 DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
 DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
+DECL_SELECTION_IR(MUL_HI, TernaryInstruction)
 DECL_SELECTION_IR(FBH, UnaryInstruction)
 DECL_SELECTION_IR(FBL, UnaryInstruction)
 DECL_SELECTION_IR(HADD, TernaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index bd854a4..21b82ce 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1329,6 +1329,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
   DECL_EMIT_FUNCTION(ADDSAT)
   DECL_EMIT_FUNCTION(SUB)
   DECL_EMIT_FUNCTION(SUBSAT)
+  DECL_EMIT_FUNCTION(MUL_HI)
   DECL_EMIT_FUNCTION(DIV)
   DECL_EMIT_FUNCTION(REM)
   DECL_EMIT_FUNCTION(SHL)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 3389ee0..fc1c984 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -517,6 +517,8 @@ namespace ir {
   Instruction COS(Type type, Register dst, Register src);
   /*! sin.type dst src */
   Instruction SIN(Type type, Register dst, Register src);
+  /*! mul_hi.type dst src */
+  Instruction MUL_HI(Type type, Register dst, Register src0, Register src1);
   /*! fbh.type dst src */
   Instruction FBH(Type type, Register dst, Register src);
   /*! fbl.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 8df393b..0e1c575 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -72,6 +72,7 @@ DECL_INSN(SAMPLE, SampleInstruction)
 DECL_INSN(SYNC, SyncInstruction)
 DECL_INSN(LABEL, LabelInstruction)
 DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
+DECL_INSN(MUL_HI, BinaryInstruction)
 DECL_INSN(FBH, UnaryInstruction)
 DECL_INSN(FBL, UnaryInstruction)
 DECL_INSN(HADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 88d2dd8..8385e21 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1775,6 +1775,8 @@ namespace gbe
         this->newRegister(&I);
         break;
       }
+      case GEN_OCL_MUL_HI_INT:
+      case GEN_OCL_MUL_HI_UINT:
       case GEN_OCL_SADD_SAT_CHAR:
       case GEN_OCL_SADD_SAT_SHORT:
       case GEN_OCL_SADD_SAT_INT:
@@ -2140,6 +2142,22 @@ namespace gbe
             ctx.TYPED_WRITE(srcTuple, srcType, coordType);
             break;
           }
+          case GEN_OCL_MUL_HI_INT:
+          {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.MUL_HI(getType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
+          case GEN_OCL_MUL_HI_UINT:
+          {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
           case GEN_OCL_SADD_SAT_CHAR:
           case GEN_OCL_SADD_SAT_SHORT:
           case GEN_OCL_SADD_SAT_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 89b57fc..f448a50 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -126,6 +126,8 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj)
 DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
 
 // integer built-in functions
+DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii)
+DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj)
 DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)
 DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
 DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 27e6af6..cd9a326 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4356,6 +4356,58 @@ DEC(16)
 #undef DEC8
 #undef DEC16
 
+OVERLOADABLE int __gen_ocl_mul_hi(int x, int y);
+OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y);
+INLINE_OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; }
+INLINE_OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; }
+INLINE_OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; }
+INLINE_OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; }
+INLINE_OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); }
+INLINE_OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); }
+#define DEC2(type) INLINE_OVERLOADABLE type##2 mul_hi(type##2 a, type##2 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1)); }
+#define DEC3(type) INLINE_OVERLOADABLE type##3 mul_hi(type##3 a, type##3 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2)); }
+#define DEC4(type) INLINE_OVERLOADABLE type##4 mul_hi(type##4 a, type##4 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2), mul_hi(a.s3, b.s3)); }
+#define DEC8(type) INLINE_OVERLOADABLE type##8 mul_hi(type##8 a, type##8 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2), mul_hi(a.s3, b.s3), mul_hi(a.s4, b.s4), mul_hi(a.s5, b.s5), mul_hi(a.s6, b.s6), mul_hi(a.s7, b.s7)); }
+#define DEC16(type) INLINE_OVERLOADABLE type##16 mul_hi(type##16 a, type##16 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2), mul_hi(a.s3, b.s3), mul_hi(a.s4, b.s4), mul_hi(a.s5, b.s5), mul_hi(a.s6, b.s6), mul_hi(a.s7, b.s7), mul_hi(a.s8, b.s8), mul_hi(a.s9, b.s9), mul_hi(a.sa, b.sa), mul_hi(a.sb, b.sb), mul_hi(a.sc, b.sc), mul_hi(a.sd, b.sd), mul_hi(a.se, b.se), mul_hi(a.sf, b.sf)); }
+#define DEF(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
+#define DEF(type) INLINE_OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; }
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+#undef DEF
+#define DEC2(type) INLINE_OVERLOADABLE type##2 mad_hi(type##2 a, type##2 b, type##2 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1)); }
+#define DEC3(type) INLINE_OVERLOADABLE type##3 mad_hi(type##3 a, type##3 b, type##3 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2)); }
+#define DEC4(type) INLINE_OVERLOADABLE type##4 mad_hi(type##4 a, type##4 b, type##4 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2), mad_hi(a.s3, b.s3, c.s3)); }
+#define DEC8(type) INLINE_OVERLOADABLE type##8 mad_hi(type##8 a, type##8 b, type##8 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2), mad_hi(a.s3, b.s3, c.s3), mad_hi(a.s4, b.s4, c.s4), mad_hi(a.s5, b.s5, c.s5), mad_hi(a.s6, b.s6, c.s6), mad_hi(a.s7, b.s7, c.s7)); }
+#define DEC16(type) INLINE_OVERLOADABLE type##16 mad_hi(type##16 a, type##16 b, type##16 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2), mad_hi(a.s3, b.s3, c.s3), mad_hi(a.s4, b.s4, c.s4), mad_hi(a.s5, b.s5, c.s5), mad_hi(a.s6, b.s6, c.s6), mad_hi(a.s7, b.s7, c.s7), mad_hi(a.s8, b.s8, c.s8), mad_hi(a.s9, b.s9, c.s9), mad_hi(a.sa, b.sa, c.sa), mad_hi(a.sb, b.sb, c.sb), mad_hi(a.sc, b.sc, c.sc), mad_hi(a.sd, b.sd, c.sd), mad_hi(a.se, b.se, c.se), mad_hi(a.sf, b.sf, c.sf)); }
+#define DEF(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
 INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }
 INLINE_OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); }
 INLINE_OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); }
-- 
1.8.1.2



More information about the Beignet mailing list