[Beignet] [PATCH 1/2] add opencl-1.2 builtin function popcount.

xionghu.luo at intel.com xionghu.luo at intel.com
Thu Oct 9 20:05:04 PDT 2014


From: Luo <xionghu.luo at intel.com>

the popcount function returns the number of non-zero bits in input.
use GEN instruction cbit(Count Bits Set) to implement it.

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/backend/gen/gen_mesa_disasm.c   |    1 +
 backend/src/backend/gen_context.cpp         |    1 +
 backend/src/backend/gen_defs.hpp            |    1 +
 backend/src/backend/gen_encoder.cpp         |    1 +
 backend/src/backend/gen_encoder.hpp         |    1 +
 backend/src/backend/gen_insn_selection.cpp  |    4 +++-
 backend/src/backend/gen_insn_selection.hxx  |    1 +
 backend/src/ir/instruction.cpp              |    1 +
 backend/src/ir/instruction.hpp              |    2 ++
 backend/src/ir/instruction.hxx              |    1 +
 backend/src/libocl/script/ocl_integer.def   |    3 +--
 backend/src/libocl/tmpl/ocl_integer.tmpl.cl |   30 +++++++++++++++++++++++++++
 backend/src/libocl/tmpl/ocl_integer.tmpl.h  |    9 ++++++++
 backend/src/llvm/llvm_gen_backend.cpp       |    2 ++
 backend/src/llvm/llvm_gen_ocl_function.hxx  |    1 +
 15 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 266b501..330dffb 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -66,6 +66,7 @@ static const struct {
   [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
+  [GEN_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
   [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
 
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 8844233..c37d2ee 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -205,6 +205,7 @@ namespace gbe
       case SEL_OP_READ_ARF: p->MOV(dst, src); break;
       case SEL_OP_FBH: p->FBH(dst, src); break;
       case SEL_OP_FBL: p->FBL(dst, src); break;
+      case SEL_OP_CBIT: p->CBIT(dst, src); break;
       case SEL_OP_NOT: p->NOT(dst, src); break;
       case SEL_OP_RNDD: p->RNDD(dst, src); break;
       case SEL_OP_RNDU: p->RNDU(dst, src); break;
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 19aad95..3faacde 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -159,6 +159,7 @@ enum opcode {
   GEN_OPCODE_LZD = 74,
   GEN_OPCODE_FBH = 75,
   GEN_OPCODE_FBL = 76,
+  GEN_OPCODE_CBIT = 77,
   GEN_OPCODE_ADDC = 78,
   GEN_OPCODE_SUBB = 79,
   GEN_OPCODE_SAD2 = 80,
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 295e11d..bd6204a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -901,6 +901,7 @@ namespace gbe
   ALU1(RNDU)
   ALU1(FBH)
   ALU1(FBL)
+  ALU1(CBIT)
   ALU1(F16TO32)
   ALU1(F32TO16)
   ALU2(SEL)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 2c999ce..3f486d7 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -101,6 +101,7 @@ namespace gbe
     ALU1(MOV)
     ALU1(FBH)
     ALU1(FBL)
+    ALU1(CBIT)
     ALU2(SUBB)
     ALU2(UPSAMPLE_SHORT)
     ALU2(UPSAMPLE_INT)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e3ee35d..e05a0a6 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -493,6 +493,7 @@ namespace gbe
     ALU2WithTemp(MUL_HI)
     ALU1(FBH)
     ALU1(FBL)
+    ALU1(CBIT)
     ALU2WithTemp(HADD)
     ALU2WithTemp(RHADD)
     ALU2(UPSAMPLE_SHORT)
@@ -1861,7 +1862,7 @@ namespace gbe
     static ir::Type getType(const ir::Opcode opcode, const ir::Type insnType) {
       if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == ir::TYPE_S8 || insnType == ir::TYPE_U8)
         return insnType;
-      if (opcode == ir::OP_FBH || opcode == ir::OP_FBL)
+      if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_CBIT)
         return ir::TYPE_U32;
       if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16)
         return insnType;
@@ -1915,6 +1916,7 @@ namespace gbe
           case ir::OP_RNDZ: sel.RNDZ(dst, src); break;
           case ir::OP_FBH: sel.FBH(dst, src); break;
           case ir::OP_FBL: sel.FBL(dst, src); break;
+          case ir::OP_CBIT: sel.CBIT(dst, src); break;
           case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
           case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
           case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 7511b84..d80dc58 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -68,6 +68,7 @@ DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
 DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction)
 DECL_SELECTION_IR(FBH, UnaryInstruction)
 DECL_SELECTION_IR(FBL, UnaryInstruction)
+DECL_SELECTION_IR(CBIT, UnaryInstruction)
 DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
 DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
 DECL_SELECTION_IR(I64HADD, I64HADDInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index e4e30ed..6c37f29 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1594,6 +1594,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
   DECL_EMIT_FUNCTION(MOV)
   DECL_EMIT_FUNCTION(FBH)
   DECL_EMIT_FUNCTION(FBL)
+  DECL_EMIT_FUNCTION(CBIT)
   DECL_EMIT_FUNCTION(COS)
   DECL_EMIT_FUNCTION(SIN)
   DECL_EMIT_FUNCTION(LOG)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 6807615..1c31171 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -585,6 +585,8 @@ namespace ir {
   Instruction FBH(Type type, Register dst, Register src);
   /*! fbl.type dst src */
   Instruction FBL(Type type, Register dst, Register src);
+  /*! cbit.type dst src */
+  Instruction CBIT(Type type, Register dst, Register src);
   /*! hadd.type dst src */
   Instruction HADD(Type type, Register dst, Register src0, Register src1);
   /*! rhadd.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 5fed286..9a89069 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -86,6 +86,7 @@ DECL_INSN(MUL_HI, BinaryInstruction)
 DECL_INSN(I64_MUL_HI, BinaryInstruction)
 DECL_INSN(FBH, UnaryInstruction)
 DECL_INSN(FBL, UnaryInstruction)
+DECL_INSN(CBIT, UnaryInstruction)
 DECL_INSN(HADD, BinaryInstruction)
 DECL_INSN(RHADD, BinaryInstruction)
 DECL_INSN(I64HADD, BinaryInstruction)
diff --git a/backend/src/libocl/script/ocl_integer.def b/backend/src/libocl/script/ocl_integer.def
index ec9177a..c35c242 100644
--- a/backend/src/libocl/script/ocl_integer.def
+++ b/backend/src/libocl/script/ocl_integer.def
@@ -23,8 +23,7 @@ uintn upsample (ushortn hi, ushortn lo)
 longn upsample (intn hi, uintn lo)
 ulongn upsample (uintn hi, uintn lo)
 
-# XXX not implemented
-#gentype popcount (gentype x)
+gentype popcount (gentype x)
 
 ##fast_integer
 gentype mad24 (gentype x, gentype y, gentype z)
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
index 9230604..375a40f 100644
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
@@ -19,6 +19,7 @@
 
 PURE CONST uint __gen_ocl_fbh(uint);
 PURE CONST uint __gen_ocl_fbl(uint);
+PURE CONST uint __gen_ocl_cbit(uint);
 
 OVERLOADABLE char clz(char x) {
   if (x < 0)
@@ -86,6 +87,35 @@ OVERLOADABLE ulong clz(ulong x) {
   return v;
 }
 
+OVERLOADABLE char popcount(char x) {
+  return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 24 : __gen_ocl_cbit(x);
+}
+OVERLOADABLE short popcount(short x) {
+  return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 16 : __gen_ocl_cbit(x);
+}
+#define SDEF(TYPE)        \
+OVERLOADABLE TYPE popcount(TYPE x){ return x == 0? 0:__gen_ocl_cbit(x);}
+SDEF(uchar);
+SDEF(ushort);
+SDEF(int);
+SDEF(uint);
+#undef SDEF
+
+OVERLOADABLE long popcount(long x) {
+  union { int i[2]; long x; } u;
+  u.x = x;
+  uint v = popcount(u.i[1]);
+  v += popcount(u.i[0]);
+  return v;
+}
+
+OVERLOADABLE ulong popcount(ulong x) {
+  union { uint i[2]; ulong x; } u;
+  u.x = x;
+  uint v = popcount(u.i[1]);
+  v += popcount(u.i[0]);
+  return v;
+}
 
 // sat
 #define SDEF(TYPE)        \
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
index 2869bb6..0b3dea4 100644
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
@@ -45,6 +45,15 @@ OVERLOADABLE uint clz(uint x);
 OVERLOADABLE long clz(long x);
 OVERLOADABLE ulong clz(ulong x);
 
+OVERLOADABLE char popcount(char x);
+OVERLOADABLE uchar popcount(uchar x);
+OVERLOADABLE short popcount(short x);
+OVERLOADABLE ushort popcount(ushort x);
+OVERLOADABLE int popcount(int x);
+OVERLOADABLE uint popcount(uint x);
+OVERLOADABLE long popcount(long x);
+OVERLOADABLE ulong popcount(ulong x);
+
 OVERLOADABLE char mul_hi(char x, char y);
 OVERLOADABLE uchar mul_hi(uchar x, uchar y);
 OVERLOADABLE short mul_hi(short x, short y);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 39b441f..39e22d7 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2494,6 +2494,7 @@ namespace gbe
         regTranslator.newScalarProxy(ir::ocl::workdim, dst); break;
       case GEN_OCL_FBH:
       case GEN_OCL_FBL:
+      case GEN_OCL_CBIT:
       case GEN_OCL_COS:
       case GEN_OCL_SIN:
       case GEN_OCL_SQR:
@@ -2779,6 +2780,7 @@ namespace gbe
           }
           case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break;
           case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break;
+          case GEN_OCL_CBIT: this->emitUnaryCallInst(I,CS,ir::OP_CBIT); break;
           case GEN_OCL_ABS:
           {
             const ir::Register src = this->getRegister(*AI);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index f508bcc..7434c78 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -151,6 +151,7 @@ DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm)
 DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless)
 DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii)
 DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell)
+DECL_LLVM_GEN_FUNCTION(CBIT, __gen_ocl_cbit)
 
 // saturate convert
 DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8,  _Z16convert_char_sath)
-- 
1.7.9.5



More information about the Beignet mailing list