[Beignet] [PATCH 1/2] support zero bit counting

Zhigang Gong zhigang.gong at linux.intel.com
Wed Jun 26 00:15:42 PDT 2013


Thanks for the patch and review comments.
Pushed.

On Wed, Jun 26, 2013 at 02:37:11PM +0800, He Junyan wrote:
> Reviewed.
> For piglit, char short and int is OK.
> long and ulong will crash, but it seems beyond 1.1 spec
> 
> 
> On 06/26/2013 01:11 PM, Homer Hsing wrote:
> >support OpenCL built-in function "clz", returning number of leading zero bits
> >
> >add GEN GPU "fbh", "fbl" instructions, for counting zero bits
> >
> >Signed-off-by: Homer Hsing <homer.xing at intel.com>
> >---
> >  backend/src/backend/gen/gen_mesa_disasm.c  |  2 +
> >  backend/src/backend/gen_context.cpp        |  2 +
> >  backend/src/backend/gen_defs.hpp           |  2 +
> >  backend/src/backend/gen_encoder.cpp        |  2 +
> >  backend/src/backend/gen_encoder.hpp        |  2 +
> >  backend/src/backend/gen_insn_selection.cpp | 14 ++++++-
> >  backend/src/backend/gen_insn_selection.hxx |  2 +
> >  backend/src/ir/instruction.cpp             |  2 +
> >  backend/src/ir/instruction.hpp             |  4 ++
> >  backend/src/ir/instruction.hxx             |  2 +
> >  backend/src/llvm/llvm_gen_backend.cpp      |  4 ++
> >  backend/src/llvm/llvm_gen_ocl_function.hxx |  4 ++
> >  backend/src/ocl_stdlib.h                   | 66 +++++++++++++++++++++++++++++-
> >  13 files changed, 105 insertions(+), 3 deletions(-)
> >
> >diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> >index 17fc845..9a4e283 100644
> >--- a/backend/src/backend/gen/gen_mesa_disasm.c
> >+++ b/backend/src/backend/gen/gen_mesa_disasm.c
> >@@ -63,6 +63,8 @@ static const struct {
> >    [GEN_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
> >    [GEN_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
> >    [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
> >+  [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
> >+  [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
> >    [GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
> >    [GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
> >diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> >index 53ba73c..93d3932 100644
> >--- a/backend/src/backend/gen_context.cpp
> >+++ b/backend/src/backend/gen_context.cpp
> >@@ -139,6 +139,8 @@ namespace gbe
> >      const GenRegister src = ra->genReg(insn.src(0));
> >      switch (insn.opcode) {
> >        case SEL_OP_MOV: p->MOV(dst, src); break;
> >+      case SEL_OP_FBH: p->FBH(dst, src); break;
> >+      case SEL_OP_FBL: p->FBL(dst, src); break;
> >        case SEL_OP_NOT: p->NOT(dst, src); break;
> >        case SEL_OP_RNDD: p->RNDD(dst, src); break;
> >        case SEL_OP_RNDU: p->RNDU(dst, src); break;
> >diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> >index 9d8db5b..d1ce6b2 100644
> >--- a/backend/src/backend/gen_defs.hpp
> >+++ b/backend/src/backend/gen_defs.hpp
> >@@ -154,6 +154,8 @@ enum opcode {
> >    GEN_OPCODE_MAC = 72,
> >    GEN_OPCODE_MACH = 73,
> >    GEN_OPCODE_LZD = 74,
> >+  GEN_OPCODE_FBH = 75,
> >+  GEN_OPCODE_FBL = 76,
> >    GEN_OPCODE_SAD2 = 80,
> >    GEN_OPCODE_SADA2 = 81,
> >    GEN_OPCODE_DP4 = 84,
> >diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> >index ae981b2..e96678b 100644
> >--- a/backend/src/backend/gen_encoder.cpp
> >+++ b/backend/src/backend/gen_encoder.cpp
> >@@ -824,6 +824,8 @@ namespace gbe
> >    ALU1(RNDE)
> >    ALU1(RNDD)
> >    ALU1(RNDU)
> >+  ALU1(FBH)
> >+  ALU1(FBL)
> >    ALU2(SEL)
> >    ALU1(NOT)
> >    ALU2(AND)
> >diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> >index 1a5dcf9..88a3e77 100644
> >--- a/backend/src/backend/gen_encoder.hpp
> >+++ b/backend/src/backend/gen_encoder.hpp
> >@@ -90,6 +90,8 @@ namespace gbe
> >  #define ALU2(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1);
> >  #define ALU3(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2);
> >      ALU1(MOV)
> >+    ALU1(FBH)
> >+    ALU1(FBL)
> >      ALU1(RNDZ)
> >      ALU1(RNDE)
> >      ALU1(RNDD)
> >diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> >index 1e5f514..8fb2a80 100644
> >--- a/backend/src/backend/gen_insn_selection.cpp
> >+++ b/backend/src/backend/gen_insn_selection.cpp
> >@@ -431,6 +431,8 @@ namespace gbe
> >      ALU2(MACH)
> >      ALU1(LZD)
> >      ALU3(MAD)
> >+    ALU1(FBH)
> >+    ALU1(FBL)
> >  #undef ALU1
> >  #undef ALU2
> >  #undef ALU3
> >@@ -1211,10 +1213,16 @@ namespace gbe
> >    /*! Unary instruction patterns */
> >    DECL_PATTERN(UnaryInstruction)
> >    {
> >+    static ir::Type getType(const ir::Opcode opcode) {
> >+      if (opcode == ir::OP_FBH || opcode == ir::OP_FBL)
> >+        return ir::TYPE_U32;
> >+      return ir::TYPE_FLOAT;
> >+    }
> >+
> >      INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const {
> >        const ir::Opcode opcode = insn.getOpcode();
> >-      const GenRegister dst = sel.selReg(insn.getDst(0));
> >-      const GenRegister src = sel.selReg(insn.getSrc(0));
> >+      const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode));
> >+      const GenRegister src = sel.selReg(insn.getSrc(0), getType(opcode));
> >        switch (opcode) {
> >          case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
> >          case ir::OP_MOV:
> >@@ -1228,6 +1236,8 @@ namespace gbe
> >          case ir::OP_RNDE: sel.RNDE(dst, src); break;
> >          case ir::OP_RNDU: sel.RNDU(dst, src); break;
> >          case ir::OP_RNDZ: sel.RNDZ(dst, src); break;
> >+        case ir::OP_FBH: sel.FBH(dst, src); break;
> >+        case ir::OP_FBL: sel.FBL(dst, src); break;
> >          case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
> >          case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
> >          case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
> >diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> >index 4b5525b..cc2be08 100644
> >--- a/backend/src/backend/gen_insn_selection.hxx
> >+++ b/backend/src/backend/gen_insn_selection.hxx
> >@@ -41,3 +41,5 @@ DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
> >  DECL_SELECTION_IR(SAMPLE, SampleInstruction)
> >  DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
> >  DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
> >+DECL_SELECTION_IR(FBH, UnaryInstruction)
> >+DECL_SELECTION_IR(FBL, UnaryInstruction)
> >diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> >index a57c204..67a4c12 100644
> >--- a/backend/src/ir/instruction.cpp
> >+++ b/backend/src/ir/instruction.cpp
> >@@ -1239,6 +1239,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
> >    }
> >    DECL_EMIT_FUNCTION(MOV)
> >+  DECL_EMIT_FUNCTION(FBH)
> >+  DECL_EMIT_FUNCTION(FBL)
> >    DECL_EMIT_FUNCTION(COS)
> >    DECL_EMIT_FUNCTION(SIN)
> >    DECL_EMIT_FUNCTION(LOG)
> >diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> >index 7662b6a..0f3bd34 100644
> >--- a/backend/src/ir/instruction.hpp
> >+++ b/backend/src/ir/instruction.hpp
> >@@ -485,6 +485,10 @@ namespace ir {
> >    Instruction COS(Type type, Register dst, Register src);
> >    /*! sin.type dst src */
> >    Instruction SIN(Type type, Register dst, Register src);
> >+  /*! fbh.type dst src */
> >+  Instruction FBH(Type type, Register dst, Register src);
> >+  /*! fbl.type dst src */
> >+  Instruction FBL(Type type, Register dst, Register src);
> >    /*! tan.type dst src */
> >    Instruction RCP(Type type, Register dst, Register src);
> >    /*! abs.type dst src */
> >diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> >index 5cf37d2..acfb45a 100644
> >--- a/backend/src/ir/instruction.hxx
> >+++ b/backend/src/ir/instruction.hxx
> >@@ -71,3 +71,5 @@ DECL_INSN(SAMPLE, SampleInstruction)
> >  DECL_INSN(SYNC, SyncInstruction)
> >  DECL_INSN(LABEL, LabelInstruction)
> >  DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
> >+DECL_INSN(FBH, UnaryInstruction)
> >+DECL_INSN(FBL, UnaryInstruction)
> >diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> >index 5b7754c..08500ba 100644
> >--- a/backend/src/llvm/llvm_gen_backend.cpp
> >+++ b/backend/src/llvm/llvm_gen_backend.cpp
> >@@ -1678,6 +1678,8 @@ namespace gbe
> >          regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break;
> >        case GEN_OCL_GET_WORK_DIM:
> >          regTranslator.newScalarProxy(ir::ocl::workdim, dst); break;
> >+      case GEN_OCL_FBH:
> >+      case GEN_OCL_FBL:
> >        case GEN_OCL_COS:
> >        case GEN_OCL_SIN:
> >        case GEN_OCL_SQR:
> >@@ -1842,6 +1844,8 @@ namespace gbe
> >              ctx.POW(ir::TYPE_FLOAT, dst, src0, src1);
> >              break;
> >            }
> >+          case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break;
> >+          case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break;
> >            case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
> >            case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
> >            case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
> >diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> >index 6cd7298..fe19844 100644
> >--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> >+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> >@@ -96,3 +96,7 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_CHAR, _Z12ocl_usub_sathh)
> >  DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt)
> >  DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj)
> >  DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
> >+
> >+// integer built-in functions
> >+DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)
> >+DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
> >diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
> >index 81a0193..3b191ab 100644
> >--- a/backend/src/ocl_stdlib.h
> >+++ b/backend/src/ocl_stdlib.h
> >@@ -4290,7 +4290,71 @@ DEC(16);
> >  #undef DEC4
> >  #undef DEC8
> >  #undef DEC16
> >-
> >+/////////////////////////////////////////////////////////////////////////////
> >+// Integer built-in functions
> >+/////////////////////////////////////////////////////////////////////////////
> >+PURE CONST uint __gen_ocl_fbh(uint);
> >+PURE CONST uint __gen_ocl_fbl(uint);
> >+
> >+INLINE_OVERLOADABLE char clz(char x) {
> >+  if (x < 0)
> >+    return 0;
> >+  if (x == 0)
> >+    return 8;
> >+  return __gen_ocl_fbl(x) - 24;
> >+}
> >+
> >+INLINE_OVERLOADABLE uchar clz(uchar x) {
> >+  if (x == 0)
> >+    return 8;
> >+  return __gen_ocl_fbl(x) - 24;
> >+}
> >+
> >+INLINE_OVERLOADABLE short clz(short x) {
> >+  if (x < 0)
> >+    return 0;
> >+  if (x == 0)
> >+    return 16;
> >+  return __gen_ocl_fbh(x) - 16;
> >+}
> >+
> >+INLINE_OVERLOADABLE ushort clz(ushort x) {
> >+  if (x == 0)
> >+    return 16;
> >+  return __gen_ocl_fbh(x) - 16;
> >+}
> >+
> >+INLINE_OVERLOADABLE int clz(int x) {
> >+  if (x < 0)
> >+    return 0;
> >+  if (x == 0)
> >+    return 32;
> >+  return __gen_ocl_fbh(x);
> >+}
> >+
> >+INLINE_OVERLOADABLE uint clz(uint x) {
> >+  if (x == 0)
> >+    return 32;
> >+  return __gen_ocl_fbh(x);
> >+}
> >+
> >+#define DEC2(type) INLINE_OVERLOADABLE type##2 clz(type##2 a) { return (clz(a.s0), clz(a.s1)); }
> >+#define DEC3(type) INLINE_OVERLOADABLE type##3 clz(type##3 a) { return (clz(a.s0), clz(a.s1), clz(a.s2)); }
> >+#define DEC4(type) INLINE_OVERLOADABLE type##4 clz(type##4 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3)); }
> >+#define DEC8(type) INLINE_OVERLOADABLE type##8 clz(type##8 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3), clz(a.s4), clz(a.s5), clz(a.s6), clz(a.s7)); }
> >+#define DEC16(type) INLINE_OVERLOADABLE type##16 clz(type##16 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3), clz(a.s4), clz(a.s5), clz(a.s6), clz(a.s7), clz(a.s8), clz(a.s9), clz(a.sa), clz(a.sb), clz(a.sc), clz(a.sd), clz(a.se), clz(a.sf)); }
> >+#define DEC(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint)
> >+DEC(2)
> >+DEC(3)
> >+DEC(4)
> >+DEC(8)
> >+DEC(16)
> >+#undef DEC
> >+#undef DEC2
> >+#undef DEC3
> >+#undef DEC4
> >+#undef DEC8
> >+#undef DEC16
> >  /////////////////////////////////////////////////////////////////////////////
> >  // Work Items functions (see 6.11.1 of OCL 1.1 spec)
> >  /////////////////////////////////////////////////////////////////////////////
> 
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list