[Beignet] [PATCH OCL2.0 1/2] Backend: Add built-in ctz function

Pan Xiuli xiuli.pan at intel.com
Thu Dec 3 18:31:04 PST 2015


Gen doesn't have a tailing zero detection function. Use bit field
reverse to reverse the interger first and leading zero detection
to get the number of tailing zeros. Also add some workaroud for
unsupport short and char type to get expected result.

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen/gen_mesa_disasm.c   |  1 +
 backend/src/backend/gen_context.cpp         |  1 +
 backend/src/backend/gen_defs.hpp            |  1 +
 backend/src/backend/gen_encoder.cpp         |  1 +
 backend/src/backend/gen_encoder.hpp         |  1 +
 backend/src/backend/gen_insn_selection.cpp  |  4 ++-
 backend/src/backend/gen_insn_selection.hxx  |  1 +
 backend/src/ir/instruction.cpp              |  1 +
 backend/src/ir/instruction.hpp              |  2 ++
 backend/src/ir/instruction.hxx              |  1 +
 backend/src/libocl/CMakeLists.txt           |  2 +-
 backend/src/libocl/script/ocl_integer.def   |  1 +
 backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 12 +++++++
 backend/src/libocl/tmpl/ocl_integer.tmpl.h  | 18 +++++++++++
 backend/src/llvm/llvm_gen_backend.cpp       | 50 +++++++++++++++++++++++++++++
 15 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 5b71cfa..13cfb36 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -70,6 +70,7 @@ static const struct {
   [GEN_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
+  [GEN_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1 },
 
   [GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 02d0bfd..b2586d1 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -268,6 +268,7 @@ namespace gbe
       case SEL_OP_F16TO32: p->F16TO32(dst, src); break;
       case SEL_OP_F32TO16: p->F32TO16(dst, src); break;
       case SEL_OP_LOAD_INT64_IMM: p->LOAD_INT64_IMM(dst, src); break;
+      case SEL_OP_BFREV: p->BFREV(dst, src); break;
       case SEL_OP_CONVI64_TO_I:
        {
         p->MOV(dst, src.bottom_half());
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index fb43718..3a5ec0b 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -129,6 +129,7 @@ enum opcode {
   GEN_OPCODE_CMPN = 17,
   GEN_OPCODE_F32TO16 = 19,
   GEN_OPCODE_F16TO32 = 20,
+  GEN_OPCODE_BFREV = 23,
   GEN_OPCODE_JMPI = 32,
   GEN_OPCODE_BRD = 33,
   GEN_OPCODE_IF = 34,
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 7161d49..5bd40da 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -854,6 +854,7 @@ namespace gbe
   ALU2(PLN)
   ALU2(MACH)
   ALU3(MAD)
+  ALU1(BFREV)
  // ALU2(BRC)
  // ALU1(ENDIF)
  //  ALU1(IF)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index f8d81c9..978fb20 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -128,6 +128,7 @@ namespace gbe
     //ALU2(MOV_DF);
     ALU2(BRC)
     ALU1(BRD)
+    ALU1(BFREV)
 #undef ALU1
 #undef ALU2
 #undef ALU2_MOD
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 63e36ef..f254e39 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -567,6 +567,7 @@ namespace gbe
     I64Shift(I64SHL)
     I64Shift(I64SHR)
     I64Shift(I64ASR)
+    ALU1(BFREV)
 #undef ALU1
 #undef ALU1WithTemp
 #undef ALU2
@@ -2588,7 +2589,7 @@ namespace gbe
         return isSrc ? insnType : ir::TYPE_U32;
       if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == ir::TYPE_S8 || insnType == ir::TYPE_U8)
         return insnType;
-      if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_LZD)
+      if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_LZD || opcode == ir::OP_BFREV)
         return ir::TYPE_U32;
       if (opcode == ir::OP_SIMD_ANY || opcode == ir::OP_SIMD_ALL)
         return ir::TYPE_S32;
@@ -2643,6 +2644,7 @@ namespace gbe
           case ir::OP_FBL: sel.FBL(dst, src); break;
           case ir::OP_CBIT: sel.CBIT(dst, src); break;
           case ir::OP_LZD: sel.LZD(dst, src); break;
+          case ir::OP_BFREV: sel.BFREV(dst, src); break;
           case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
           case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
           case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 1fbcb1a..c7343b8 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -97,3 +97,4 @@ DECL_SELECTION_IR(ELSE, UnaryInstruction)
 DECL_SELECTION_IR(READ_ARF, UnaryInstruction)
 DECL_SELECTION_IR(WHILE, UnaryInstruction)
 DECL_SELECTION_IR(F64DIV, F64DIVInstruction)
+DECL_SELECTION_IR(BFREV, UnaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index d403cdd..459ddd7 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1959,6 +1959,7 @@ DECL_MEM_FN(MemInstruction, void,     setBtiReg(Register reg), setBtiReg(reg))
   DECL_EMIT_FUNCTION(RNDE)
   DECL_EMIT_FUNCTION(RNDU)
   DECL_EMIT_FUNCTION(RNDZ)
+  DECL_EMIT_FUNCTION(BFREV)
 
 #undef DECL_EMIT_FUNCTION
 
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 0fc7a44..52b5800 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -685,6 +685,8 @@ namespace ir {
   Instruction RNDZ(Type type, Register dst, Register src);
   /*! bswap.type dst src */
   Instruction BSWAP(Type type, Register dst, Register src);
+  /*! bfrev.type dst src */
+  Instruction BFREV(Type type, Register dst, Register src);
   /*! pow.type dst src0 src1 */
   Instruction POW(Type type, Register dst, Register src0, Register src1);
   /*! mul.type dst src0 src1 */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 080647d..3474c9d 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -107,3 +107,4 @@ DECL_INSN(ENDIF, BranchInstruction)
 DECL_INSN(ELSE, BranchInstruction)
 DECL_INSN(WHILE, BranchInstruction)
 DECL_INSN(WORKGROUP, WorkGroupInstruction)
+DECL_INSN(BFREV, UnaryInstruction)
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt
index e2871b2..f2079dd 100644
--- a/backend/src/libocl/CMakeLists.txt
+++ b/backend/src/libocl/CMakeLists.txt
@@ -182,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M)
 	)
 ENDMACRO(ADD_LL_TO_BC_TARGET)
 
-SET (OCL_LL_MODULES ocl_barrier ocl_clz)
+SET (OCL_LL_MODULES ocl_barrier ocl_clz ocl_ctz)
 FOREACH(f ${OCL_LL_MODULES})
     COPY_THE_LL(${f})
     ADD_LL_TO_BC_TARGET(${f})
diff --git a/backend/src/libocl/script/ocl_integer.def b/backend/src/libocl/script/ocl_integer.def
index c35c242..5e41c34 100644
--- a/backend/src/libocl/script/ocl_integer.def
+++ b/backend/src/libocl/script/ocl_integer.def
@@ -7,6 +7,7 @@ gentype rhadd (gentype x, gentype y)
 gentype clamp (gentype x, gentype minval, gentype maxval)
 gentype clamp (gentype x, sgentype minval, sgentype maxval)
 gentype clz (gentype x)
+gentype ctz (gentype x)
 gentype mad_hi (gentype a, gentype b, gentype c)
 gentype mad_sat (gentype a, gentype b, gentype c)
 gentype max (gentype x,  gentype y)
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
index 12408eb..292cc63 100644
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
@@ -40,6 +40,18 @@ SDEF(long, s, 64);
 SDEF(ulong, u, 64);
 #undef SDEF
 
+#define SDEF(TYPE, TYPE_NAME, SIZE)        \
+OVERLOADABLE TYPE ctz(TYPE x){ return ctz_##TYPE_NAME##SIZE(x);}
+SDEF(char, s, 8);
+SDEF(uchar, u, 8);
+SDEF(short, s, 16);
+SDEF(ushort, u, 16);
+SDEF(int, s, 32);
+SDEF(uint, u, 32);
+SDEF(long, s, 64);
+SDEF(ulong, u, 64);
+#undef SDEF
+
 #define SDEF(TYPE)        \
 OVERLOADABLE TYPE popcount(TYPE x){ return __gen_ocl_cbit(x);}
 SDEF(char);
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
index 4b3b5ae..ac1800b 100644
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
@@ -54,6 +54,24 @@ uint   clz_u32(uint);
 long   clz_s64(long);
 ulong  clz_u64(ulong);
 
+OVERLOADABLE char ctz(char x);
+OVERLOADABLE uchar ctz(uchar x);
+OVERLOADABLE short ctz(short x);
+OVERLOADABLE ushort ctz(ushort x);
+OVERLOADABLE int ctz(int x);
+OVERLOADABLE uint ctz(uint x);
+OVERLOADABLE long ctz(long x);
+OVERLOADABLE ulong ctz(ulong x);
+
+char   ctz_s8(char);
+uchar  ctz_u8(uchar);
+short  ctz_s16(short);
+ushort ctz_u16(ushort);
+int    ctz_s32(int);
+uint   ctz_u32(uint);
+long   ctz_s64(long);
+ulong  ctz_u64(ulong);
+
 OVERLOADABLE char popcount(char x);
 OVERLOADABLE uchar popcount(uchar x);
 OVERLOADABLE short popcount(short x);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 85ca4c1..53394fe 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3384,6 +3384,7 @@ namespace gbe
             this->newRegister(&I);
           break;
           case Intrinsic::ctlz:
+          case Intrinsic::cttz:
           case Intrinsic::bswap:
             this->newRegister(&I);
           break;
@@ -3862,6 +3863,55 @@ namespace gbe
             }
           }
           break;
+          case Intrinsic::cttz:
+          {
+            Type *llvmDstType = I.getType();
+            ir::Type dstType = getType(ctx, llvmDstType);
+            Type *llvmSrcType = I.getOperand(0)->getType();
+            ir::Type srcType = getUnsignedType(ctx, llvmSrcType);
+
+            //the llvm.ctlz.i64 is lowered to two llvm.cttz.i32 call in ocl_ctz.ll
+            GBE_ASSERT(srcType != ir::TYPE_U64);
+
+            const ir::Register dst = this->getRegister(&I);
+            const ir::Register src = this->getRegister(I.getOperand(0));
+
+            uint32_t imm_value = 0;
+            if(srcType == ir::TYPE_U16) {
+              imm_value = 0xFFFF0000;
+            }else if(srcType == ir::TYPE_U8) {
+              imm_value = 0xFFFFFF00;
+            }
+            if(srcType == ir::TYPE_U16 || srcType == ir::TYPE_U8) {
+              ir::ImmediateIndex imm;
+              ir::Type tmpType = ir::TYPE_S32;
+              ir::Type revType = ir::TYPE_U32;
+              imm = ctx.newIntegerImmediate(imm_value, revType);
+              const ir::RegisterFamily family = getFamily(revType);
+              const ir::Register immReg = ctx.reg(family);
+              ctx.LOADI(ir::TYPE_U32, immReg, imm);
+
+              ir::Register tmp0 = ctx.reg(getFamily(tmpType));
+              ir::Register tmp1 = ctx.reg(getFamily(revType));
+              ir::Register tmp2 = ctx.reg(getFamily(revType));
+              ir::Register revTmp = ctx.reg(getFamily(revType));
+
+              ctx.CVT(tmpType, srcType, tmp0, src);
+              //gen does not have 'tzd', so reverse first
+              ctx.ADD(revType, tmp1, tmp0, immReg);
+              ctx.ALU1(ir::OP_BFREV, revType, revTmp, tmp1);
+              ctx.ALU1(ir::OP_LZD, tmpType, tmp2, revTmp);
+              ctx.CVT(dstType, tmpType, dst, tmp2);
+            }
+            else
+            {
+              ir::Type revType = ir::TYPE_U32;
+              ir::Register revTmp = ctx.reg(getFamily(revType));
+              ctx.ALU1(ir::OP_BFREV, revType, revTmp, src);
+              ctx.ALU1(ir::OP_LZD, dstType, dst, revTmp);
+            }
+          }
+          break;
           case Intrinsic::fma:
           {
             ir::Type srcType = getType(ctx, I.getType());
-- 
2.1.4



More information about the Beignet mailing list