[Beignet] [PATCH v2 1/4] support built-in functions "hadd", "rhadd"

Song, Ruiling ruiling.song at intel.com
Tue Jul 2 00:11:47 PDT 2013


LGTM, thanks very much for your patch.

-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
Sent: Tuesday, July 02, 2013 2:45 PM
To: beignet at lists.freedesktop.org
Cc: Xing, Homer
Subject: [Beignet] [PATCH v2 1/4] support built-in functions "hadd", "rhadd"

backend now support GPU opcode "addc".
add built-in functions "hadd", "rhadd".

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen/gen_mesa_disasm.c  |  1 +
 backend/src/backend/gen_context.cpp        | 42 ++++++++++++++++++++++++++++++
 backend/src/backend/gen_defs.hpp           |  1 +
 backend/src/backend/gen_encoder.cpp        |  7 +++++
 backend/src/backend/gen_encoder.hpp        |  1 +
 backend/src/backend/gen_insn_selection.cpp | 12 +++++++++  backend/src/backend/gen_insn_selection.hxx |  2 ++
 backend/src/ir/instruction.cpp             |  2 ++
 backend/src/ir/instruction.hpp             |  4 +++
 backend/src/ir/instruction.hxx             |  2 ++
 backend/src/llvm/llvm_gen_backend.cpp      | 16 ++++++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  2 ++
 backend/src/ocl_stdlib.h                   | 37 ++++++++++++++++++++++++++
 13 files changed, 129 insertions(+)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 9a4e283..f65cc30 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -82,6 +82,7 @@ static const struct {
 
   [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+  [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 41cab90..62c6378 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -180,6 +180,48 @@ namespace gbe
     const GenRegister src2 = ra->genReg(insn.src(2));
     switch (insn.opcode) {
       case SEL_OP_MAD:  p->MAD(dst, src0, src1, src2); break;
+      case SEL_OP_HADD:
+       {
+        int w = p->curr.execWidth;
+        p->push();
+        p->curr.execWidth = 8;
+        p->curr.quarterControl = 0;
+        p->ADDC(dst, src0, src1);
+        p->SHR(dst, dst, GenRegister::immud(1));
+        p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+        p->OR(dst, dst, src2);
+        if (w == 16) {
+          p->curr.quarterControl = 1;
+          p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+          p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+          p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+          p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
+        }
+        p->pop();
+        break;
+       }
+      case SEL_OP_RHADD:
+       {
+        int w = p->curr.execWidth;
+        p->push();
+        p->curr.execWidth = 8;
+        p->curr.quarterControl = 0;
+        p->ADDC(dst, src0, src1);
+        p->ADD(dst, dst, GenRegister::immud(1));
+        p->SHR(dst, dst, GenRegister::immud(1));
+        p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+        p->OR(dst, dst, src2);
+        if (w == 16) {
+          p->curr.quarterControl = 1;
+          p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
+          p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+          p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
+          p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+          p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
+        }
+        p->pop();
+        break;
+       }
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 61412c4..5a9bb2d 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -156,6 +156,7 @@ enum opcode {
   GEN_OPCODE_LZD = 74,
   GEN_OPCODE_FBH = 75,
   GEN_OPCODE_FBL = 76,
+  GEN_OPCODE_ADDC = 78,
   GEN_OPCODE_SAD2 = 80,
   GEN_OPCODE_SADA2 = 81,
   GEN_OPCODE_DP4 = 84,
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index e10a04b..25303b4 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -879,6 +879,13 @@ namespace gbe
   ALU2(MACH)
   ALU3(MAD)
 
+  void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
+    push();
+    curr.accWrEnable = 1;
+    alu2(this, GEN_OPCODE_ADDC, dest, src0, src1);
+    pop();
+  }
+
   void GenEncoder::ADD(GenRegister dest, GenRegister src0, GenRegister src1) {
      if (src0.type == GEN_TYPE_F ||
          (src0.file == GEN_IMMEDIATE_VALUE && diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 3ff8c97..a7cbc89 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -107,6 +107,7 @@ namespace gbe
     ALU2(RSL)
     ALU2(ASR)
     ALU2(ADD)
+    ALU2(ADDC)
     ALU2(MUL)
     ALU1(FRC)
     ALU2(MAC)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 129ee2b..f356b27 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -435,6 +435,8 @@ namespace gbe
     ALU3(MAD)
     ALU1(FBH)
     ALU1(FBL)
+    ALU3(HADD)
+    ALU3(RHADD)
 #undef ALU1
 #undef ALU2
 #undef ALU3
@@ -1433,6 +1435,16 @@ namespace gbe
             sel.MUL(dst, src0, src1);
           }
         break;
+        case OP_HADD: {
+            GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
+            sel.HADD(dst, src0, src1, temp);
+            break;
+          }
+        case OP_RHADD: {
+            GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
+            sel.RHADD(dst, src0, src1, temp);
+            break;
+          }
         default: NOT_IMPLEMENTED;
       }
       sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index f1a4701..8a81022 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -44,3 +44,5 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)  DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)  DECL_SELECTION_IR(FBH, UnaryInstruction)  DECL_SELECTION_IR(FBL, UnaryInstruction)
+DECL_SELECTION_IR(HADD, TernaryInstruction) DECL_SELECTION_IR(RHADD, 
+TernaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index c55774f..bd854a4 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1339,6 +1339,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
   DECL_EMIT_FUNCTION(OR)
   DECL_EMIT_FUNCTION(XOR)
   DECL_EMIT_FUNCTION(AND)
+  DECL_EMIT_FUNCTION(HADD)
+  DECL_EMIT_FUNCTION(RHADD)
 
 #undef DECL_EMIT_FUNCTION
 
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 8aefc92..3389ee0 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -521,6 +521,10 @@ namespace ir {
   Instruction FBH(Type type, Register dst, Register src);
   /*! fbl.type dst src */
   Instruction FBL(Type type, Register dst, Register src);
+  /*! hadd.type dst src */
+  Instruction HADD(Type type, Register dst, Register src0, Register 
+ src1);  /*! rhadd.type dst src */  Instruction RHADD(Type type, 
+ Register dst, Register src0, Register src1);
   /*! tan.type dst src */
   Instruction RCP(Type type, Register dst, Register src);
   /*! abs.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 9c4be2e..8df393b 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -74,3 +74,5 @@ DECL_INSN(LABEL, LabelInstruction)  DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)  DECL_INSN(FBH, UnaryInstruction)  DECL_INSN(FBL, UnaryInstruction)
+DECL_INSN(HADD, BinaryInstruction)
+DECL_INSN(RHADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index edd912d..88d2dd8 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1791,6 +1791,8 @@ namespace gbe
       case GEN_OCL_USUB_SAT_SHORT:
       case GEN_OCL_USUB_SAT_INT:
       case GEN_OCL_USUB_SAT_LONG:
+      case GEN_OCL_HADD:
+      case GEN_OCL_RHADD:
         this->newRegister(&I);
         break;
       default:
@@ -2182,6 +2184,20 @@ namespace gbe
             ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
             break;
           }
+          case GEN_OCL_HADD: {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.HADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
+          case GEN_OCL_RHADD: {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
           default: break;
         }
       }
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 685d504..89b57fc 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -129,3 +129,5 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)  DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)  DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)  DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
+DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd) 
+DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 016d469..27e6af6 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4388,6 +4388,43 @@ DEF(16)
 #undef DEC8
 #undef DEC16
 
+PURE CONST uint __gen_ocl_hadd(uint x, uint y); PURE CONST uint 
+__gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); DEF(uchar); 
+DEF(short); DEF(ushort) #define DEF(type) INLINE_OVERLOADABLE type 
+hadd(type x, type y) { return (x + y) >> 1; } DEC #undef DEF #define 
+DEF(type) INLINE_OVERLOADABLE type rhadd(type x, type y) { return (x + 
+y + 1) >> 1; } DEC #undef DEF #undef DEC INLINE_OVERLOADABLE int 
+hadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x 
++ y) >> 1) : __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE uint hadd(uint 
+x, uint y) { return __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE int 
+rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x 
++ y + 1) >> 1) : __gen_ocl_rhadd(x, y); } INLINE_OVERLOADABLE uint 
+rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); } #define 
+DEC2(func, type) INLINE_OVERLOADABLE type##2 func(type##2 a, type##2 b) 
+{ return (func(a.s0, b.s0), func(a.s1, b.s1)); } #define DEC3(func, 
+type) INLINE_OVERLOADABLE type##3 func(type##3 a, type##3 b) { return 
+(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2)); } #define 
+DEC4(func, type) INLINE_OVERLOADABLE type##4 func(type##4 a, type##4 b) 
+{ return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2), 
+func(a.s3, b.s3)); } #define DEC8(func, type) INLINE_OVERLOADABLE 
+type##8 func(type##8 a, type##8 b) { return (func(a.s0, b.s0), 
+func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4), 
+func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7)); } #define 
+DEC16(func, type) INLINE_OVERLOADABLE type##16 func(type##16 a, 
+type##16 b) { return (func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, 
+b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6, 
+b.s6), func(a.s7, b.s7), func(a.s8, b.s8), func(a.s9, b.s9), func(a.sa, 
+b.sa), func(a.sb, b.sb), func(a.sc, b.sc), func(a.sd, b.sd), func(a.se, 
+b.se), func(a.sf, b.sf)); } #define DEF(func, n) DEC##n(func, char); 
+DEC##n(func, uchar); DEC##n(func, short); DEC##n(func, ushort); 
+DEC##n(func, int); DEC##n(func, uint) DEF(hadd, 2) DEF(hadd, 3) 
+DEF(hadd, 4) DEF(hadd, 8) DEF(hadd, 16) DEF(rhadd, 2) DEF(rhadd, 3) 
+DEF(rhadd, 4) DEF(rhadd, 8) DEF(rhadd, 16) #undef DEF #undef DEC2 
+#undef DEC3 #undef DEC4 #undef DEC8 #undef DEC16
+
 int __gen_ocl_abs(int x);
 #define ABS_I(I, CVT)  (CVT)__gen_ocl_abs(x.s##I)  #define ABS_VEC1(CVT)  (CVT)__gen_ocl_abs(x)
--
1.8.1.2

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list