[Beignet] [PATCH version 4] add 64-bit version of "mul_hi"

Song, Ruiling ruiling.song at intel.com
Wed Sep 25 22:45:07 PDT 2013


With this modification, the patchset (mul_hi, mad_sat, add_sat, sub_sat) looks good to me. Thanks!

-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
Sent: Thursday, September 26, 2013 1:43 PM
To: beignet at lists.freedesktop.org
Subject: [Beignet] [PATCH version 4] add 64-bit version of "mul_hi"

passed piglit test cases:
  piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-mul_hi-1.0.generated.cl
  piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-mul_hi-1.0.generated.cl

version 2:
  temp flag register is allocated by RA

version 3:
  divide subnr of flag register by typesize

version 4:
  fix a typo

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp                | 79 ++++++++++++++++++++++
 backend/src/backend/gen_context.hpp                |  4 ++
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
 backend/src/backend/gen_insn_selection.cpp         | 22 ++++++
 backend/src/backend/gen_insn_selection.hpp         |  4 +-
 backend/src/backend/gen_insn_selection.hxx         |  1 +
 backend/src/ir/instruction.cpp                     |  1 +
 backend/src/ir/instruction.hpp                     |  2 +
 backend/src/ir/instruction.hxx                     |  1 +
 backend/src/llvm/llvm_gen_backend.cpp              | 18 +++++
 backend/src/llvm/llvm_gen_ocl_function.hxx         |  2 +
 backend/src/ocl_stdlib.tmpl.h                      |  6 +-
 12 files changed, 137 insertions(+), 4 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 9ccf1bf..4ebd59a 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -444,6 +444,85 @@ namespace gbe
     p->pop();
   }
 
+  void GenContext::I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2) {
+    addWithCarry(low1, low1, low2);
+    addWithCarry(high1, high1, high2);
+    p->ADD(high1, high1, low2);
+  }
+
+  void GenContext::I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low) {
+    GenRegister &e = dst1, &f = dst2, &g = dst3, &h = dst4,
+                &a = x_high, &b = x_low, &c = y_high, &d = y_low;
+    I32FullMult(e, h, b, d);
+    I32FullMult(f, g, a, d);
+    addWithCarry(g, g, e);
+    addWithCarry(f, f, e);
+    I32FullMult(e, d, b, c);
+    I64FullAdd(f, g, e, d);
+    I32FullMult(b, d, a, c);
+    I64FullAdd(e, f, b, d);
+  }
+
+  void GenContext::I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg) {
+    p->SHR(sign, high, GenRegister::immud(31));
+    p->push();
+    p->curr.predicate = GEN_PREDICATE_NONE;
+    p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+    p->CMP(GEN_CONDITIONAL_NZ, sign, GenRegister::immud(0));
+    p->curr.predicate = GEN_PREDICATE_NORMAL;
+    p->NOT(high, high);
+    p->NOT(low, low);
+    p->MOV(tmp, GenRegister::immud(1));
+    addWithCarry(low, low, tmp);
+    p->ADD(high, high, tmp);
+    p->pop();
+  }
+
+  void GenContext::emitI64MULHIInstruction(const SelectionInstruction &insn) {
+    GenRegister dest = ra->genReg(insn.dst(0));
+    GenRegister x = ra->genReg(insn.src(0));
+    GenRegister y = ra->genReg(insn.src(1));
+    GenRegister a = ra->genReg(insn.dst(1));
+    GenRegister b = ra->genReg(insn.dst(2));
+    GenRegister c = ra->genReg(insn.dst(3));
+    GenRegister d = ra->genReg(insn.dst(4));
+    GenRegister e = ra->genReg(insn.dst(5));
+    GenRegister f = ra->genReg(insn.dst(6));
+    GenRegister g = ra->genReg(insn.dst(7));
+    GenRegister h = ra->genReg(insn.dst(8));
+    GenRegister i = ra->genReg(insn.dst(9));
+    GenRegister flagReg = ra->genReg(insn.dst(10));
+    loadTopHalf(a, x);
+    loadBottomHalf(b, x);
+    loadTopHalf(c, y);
+    loadBottomHalf(d, y);
+    if(x.type == GEN_TYPE_UL) {
+      I64FullMult(e, f, g, h, a, b, c, d);
+    } else {
+      I64ABS(e, a, b, i, flagReg);
+      I64ABS(f, c, d, i, flagReg);
+      p->XOR(i, e, f);
+      I64FullMult(e, f, g, h, a, b, c, d);
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_NZ, i, GenRegister::immud(0));
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->NOT(e, e);
+      p->NOT(f, f);
+      p->NOT(g, g);
+      p->NOT(h, h);
+      p->MOV(i, GenRegister::immud(1));
+      addWithCarry(h, h, i);
+      addWithCarry(g, g, i);
+      addWithCarry(f, f, i);
+      p->ADD(e, e, i);
+      p->pop();
+    }
+    storeTopHalf(dest, e);
+    storeBottomHalf(dest, f);
+  }
+
   void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) {
     GenRegister dest = ra->genReg(insn.dst(0));
     GenRegister x = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 1de0b3d..6fe71c5 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -86,7 +86,10 @@ namespace gbe
 
     void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
     void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
+    void I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg);
+    void I64FullAdd(GenRegister high1, GenRegister low1, GenRegister 
+ high2, GenRegister low2);
     void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
+    void I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister 
+ dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, 
+ GenRegister y_high, GenRegister y_low);
     void saveFlag(GenRegister dest, int flag, int subFlag);
     void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp);
 
@@ -97,6 +100,7 @@ namespace gbe
     void emitBinaryInstruction(const SelectionInstruction &insn);
     void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     void emitTernaryInstruction(const SelectionInstruction &insn);
+    void emitI64MULHIInstruction(const SelectionInstruction &insn);
     void emitI64HADDInstruction(const SelectionInstruction &insn);
     void emitI64RHADDInstruction(const SelectionInstruction &insn);
     void emitI64ShiftInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index a4ba90b..a420cfc 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -9,6 +9,7 @@ DECL_GEN7_SCHEDULE(I64Shift,        20,        4,        2)
 DECL_GEN7_SCHEDULE(I64HADD,         20,        4,        2)
 DECL_GEN7_SCHEDULE(I64RHADD,        20,        4,        2)
 DECL_GEN7_SCHEDULE(I64ToFloat,      20,        4,        2)
+DECL_GEN7_SCHEDULE(I64MULHI,        20,        4,        2)
 DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
 DECL_GEN7_SCHEDULE(I64Compare,      20,        4,        2)
 DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 06d7382..2791a0e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -471,6 +471,8 @@ namespace gbe
 #undef I64Shift
     /*! Convert 64-bit integer to 32-bit float */
     void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]);
+    /*! High 64bit of x*y */
+    void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]);
     /*! (x+y)>>1 without mod. overflow */
     void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
     /*! (x+y+1)>>1 without mod. overflow */ @@ -1087,6 +1089,15 @@ namespace gbe
       insn->dst(i + 1) = tmp[i];
   }
 
+  void Selection::Opaque::I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64_MUL_HI, 11, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    for(int i = 0; i < 10; i ++)
+      insn->dst(i + 1) = tmp[i];
+  }
+
   void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2);
     insn->dst(0) = dst;
@@ -1680,6 +1691,17 @@ namespace gbe
             sel.MUL_HI(dst, src0, src1, temp);
             break;
           }
+        case OP_I64_MUL_HI:
+         {
+          GenRegister temp[10];
+          for(int i=0; i<9; i++) {
+            temp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+            temp[i].type = GEN_TYPE_UD;
+          }
+          temp[9] = sel.selReg(sel.reg(FAMILY_BOOL));
+          sel.I64_MUL_HI(dst, src0, src1, temp);
+          break;
+         }
         case OP_MUL:
           if (type == TYPE_U32 || type == TYPE_S32) {
             sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 73e81de..2422b2b 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -86,8 +86,8 @@ namespace gbe
     const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; }
     /*! No more than 17 sources (used by typed writes on simd8 mode.) */
     enum { MAX_SRC_NUM = 17 };
-    /*! No more than 4 destinations (used by samples and untyped reads) */
-    enum { MAX_DST_NUM = 4 };
+    /*! No more than 11 destinations (used by samples and untyped reads) */
+    enum { MAX_DST_NUM = 11 };
     /*! State of the instruction (extra fields neeed for the encoding) */
     GenInstructionState state;
     union {
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index c4cf652..86d1756 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -58,6 +58,7 @@ DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)  DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)  DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)  DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
+DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction)
 DECL_SELECTION_IR(FBH, UnaryInstruction)  DECL_SELECTION_IR(FBL, UnaryInstruction)  DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 1925d93..8130b8b 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1333,6 +1333,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
   DECL_EMIT_FUNCTION(SUB)
   DECL_EMIT_FUNCTION(SUBSAT)
   DECL_EMIT_FUNCTION(MUL_HI)
+  DECL_EMIT_FUNCTION(I64_MUL_HI)
   DECL_EMIT_FUNCTION(UPSAMPLE_SHORT)
   DECL_EMIT_FUNCTION(UPSAMPLE_INT)
   DECL_EMIT_FUNCTION(UPSAMPLE_LONG)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 2a06f76..f165595 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -519,6 +519,8 @@ namespace ir {
   Instruction SIN(Type type, Register dst, Register src);
   /*! mul_hi.type dst src */
   Instruction MUL_HI(Type type, Register dst, Register src0, Register src1);
+  /*! i64_mul_hi.type dst src */
+  Instruction I64_MUL_HI(Type type, Register dst, Register src0, 
+ Register src1);
   /*! upsample_short.type dst src */
   Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
   /*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 7ead344..135dc82 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -73,6 +73,7 @@ DECL_INSN(SYNC, SyncInstruction)  DECL_INSN(LABEL, LabelInstruction)  DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)  DECL_INSN(MUL_HI, BinaryInstruction)
+DECL_INSN(I64_MUL_HI, BinaryInstruction)
 DECL_INSN(FBH, UnaryInstruction)
 DECL_INSN(FBL, UnaryInstruction)
 DECL_INSN(HADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 5284ce5..3371054 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1878,6 +1878,8 @@ namespace gbe
       }
       case GEN_OCL_MUL_HI_INT:
       case GEN_OCL_MUL_HI_UINT:
+      case GEN_OCL_MUL_HI_I64:
+      case GEN_OCL_MUL_HI_UI64:
       case GEN_OCL_UPSAMPLE_SHORT:
       case GEN_OCL_UPSAMPLE_INT:
       case GEN_OCL_UPSAMPLE_LONG:
@@ -2255,6 +2257,22 @@ namespace gbe
             ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1);
             break;
           }
+          case GEN_OCL_MUL_HI_I64:
+          {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.I64_MUL_HI(getType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
+          case GEN_OCL_MUL_HI_UI64:
+          {
+            GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+            GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.I64_MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
           case GEN_OCL_UPSAMPLE_SHORT:
           {
             GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 58df2b0..7b5a2d3 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -128,6 +128,8 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)  // integer built-in functions  DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii)  DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj)
+DECL_LLVM_GEN_FUNCTION(MUL_HI_I64, _Z16__gen_ocl_mul_hill) 
+DECL_LLVM_GEN_FUNCTION(MUL_HI_UI64, _Z16__gen_ocl_mul_himm)
 DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)  DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)  DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 9b76ba1..f21ba4f 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -343,6 +343,8 @@ INLINE_OVERLOADABLE ulong clz(ulong x) {
 
 OVERLOADABLE int __gen_ocl_mul_hi(int x, int y);  OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y);
+OVERLOADABLE long __gen_ocl_mul_hi(long x, long y); OVERLOADABLE ulong 
+__gen_ocl_mul_hi(ulong x, ulong y);
 INLINE_OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; }  INLINE_OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; }  INLINE_OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; } @@ -350,10 +352,10 @@ INLINE_OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; }  INLINE_OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); }  INLINE_OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); }  INLINE_OVERLOADABLE long mul_hi(long x, long y) {
-  return 0;
+  return __gen_ocl_mul_hi(x, y);
 }
 INLINE_OVERLOADABLE ulong mul_hi(ulong x, ulong y) {
-  return 0;
+  return __gen_ocl_mul_hi(x, y);
 }
 
 #define DEF(type) INLINE_OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; }
--
1.8.1.2

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list