[Beignet] [PATCH 1/2] add opencl-1.2 builtin function popcount.
xionghu.luo at intel.com
xionghu.luo at intel.com
Thu Oct 9 20:05:04 PDT 2014
From: Luo <xionghu.luo at intel.com>
the popcount function returns the number of non-zero bits in input.
use GEN instruction cbit(Count Bits Set) to implement it.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 1 +
backend/src/backend/gen_context.cpp | 1 +
backend/src/backend/gen_defs.hpp | 1 +
backend/src/backend/gen_encoder.cpp | 1 +
backend/src/backend/gen_encoder.hpp | 1 +
backend/src/backend/gen_insn_selection.cpp | 4 +++-
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/ir/instruction.cpp | 1 +
backend/src/ir/instruction.hpp | 2 ++
backend/src/ir/instruction.hxx | 1 +
backend/src/libocl/script/ocl_integer.def | 3 +--
backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 30 +++++++++++++++++++++++++++
backend/src/libocl/tmpl/ocl_integer.tmpl.h | 9 ++++++++
backend/src/llvm/llvm_gen_backend.cpp | 2 ++
backend/src/llvm/llvm_gen_ocl_function.hxx | 1 +
15 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 266b501..330dffb 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -66,6 +66,7 @@ static const struct {
[GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
[GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
[GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
[GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
[GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 8844233..c37d2ee 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -205,6 +205,7 @@ namespace gbe
case SEL_OP_READ_ARF: p->MOV(dst, src); break;
case SEL_OP_FBH: p->FBH(dst, src); break;
case SEL_OP_FBL: p->FBL(dst, src); break;
+ case SEL_OP_CBIT: p->CBIT(dst, src); break;
case SEL_OP_NOT: p->NOT(dst, src); break;
case SEL_OP_RNDD: p->RNDD(dst, src); break;
case SEL_OP_RNDU: p->RNDU(dst, src); break;
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 19aad95..3faacde 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -159,6 +159,7 @@ enum opcode {
GEN_OPCODE_LZD = 74,
GEN_OPCODE_FBH = 75,
GEN_OPCODE_FBL = 76,
+ GEN_OPCODE_CBIT = 77,
GEN_OPCODE_ADDC = 78,
GEN_OPCODE_SUBB = 79,
GEN_OPCODE_SAD2 = 80,
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 295e11d..bd6204a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -901,6 +901,7 @@ namespace gbe
ALU1(RNDU)
ALU1(FBH)
ALU1(FBL)
+ ALU1(CBIT)
ALU1(F16TO32)
ALU1(F32TO16)
ALU2(SEL)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 2c999ce..3f486d7 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -101,6 +101,7 @@ namespace gbe
ALU1(MOV)
ALU1(FBH)
ALU1(FBL)
+ ALU1(CBIT)
ALU2(SUBB)
ALU2(UPSAMPLE_SHORT)
ALU2(UPSAMPLE_INT)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e3ee35d..e05a0a6 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -493,6 +493,7 @@ namespace gbe
ALU2WithTemp(MUL_HI)
ALU1(FBH)
ALU1(FBL)
+ ALU1(CBIT)
ALU2WithTemp(HADD)
ALU2WithTemp(RHADD)
ALU2(UPSAMPLE_SHORT)
@@ -1861,7 +1862,7 @@ namespace gbe
static ir::Type getType(const ir::Opcode opcode, const ir::Type insnType) {
if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == ir::TYPE_S8 || insnType == ir::TYPE_U8)
return insnType;
- if (opcode == ir::OP_FBH || opcode == ir::OP_FBL)
+ if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_CBIT)
return ir::TYPE_U32;
if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16)
return insnType;
@@ -1915,6 +1916,7 @@ namespace gbe
case ir::OP_RNDZ: sel.RNDZ(dst, src); break;
case ir::OP_FBH: sel.FBH(dst, src); break;
case ir::OP_FBL: sel.FBL(dst, src); break;
+ case ir::OP_CBIT: sel.CBIT(dst, src); break;
case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 7511b84..d80dc58 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -68,6 +68,7 @@ DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction)
DECL_SELECTION_IR(FBH, UnaryInstruction)
DECL_SELECTION_IR(FBL, UnaryInstruction)
+DECL_SELECTION_IR(CBIT, UnaryInstruction)
DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
DECL_SELECTION_IR(I64HADD, I64HADDInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index e4e30ed..6c37f29 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1594,6 +1594,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
DECL_EMIT_FUNCTION(MOV)
DECL_EMIT_FUNCTION(FBH)
DECL_EMIT_FUNCTION(FBL)
+ DECL_EMIT_FUNCTION(CBIT)
DECL_EMIT_FUNCTION(COS)
DECL_EMIT_FUNCTION(SIN)
DECL_EMIT_FUNCTION(LOG)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 6807615..1c31171 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -585,6 +585,8 @@ namespace ir {
Instruction FBH(Type type, Register dst, Register src);
/*! fbl.type dst src */
Instruction FBL(Type type, Register dst, Register src);
+ /*! cbit.type dst src */
+ Instruction CBIT(Type type, Register dst, Register src);
/*! hadd.type dst src */
Instruction HADD(Type type, Register dst, Register src0, Register src1);
/*! rhadd.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 5fed286..9a89069 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -86,6 +86,7 @@ DECL_INSN(MUL_HI, BinaryInstruction)
DECL_INSN(I64_MUL_HI, BinaryInstruction)
DECL_INSN(FBH, UnaryInstruction)
DECL_INSN(FBL, UnaryInstruction)
+DECL_INSN(CBIT, UnaryInstruction)
DECL_INSN(HADD, BinaryInstruction)
DECL_INSN(RHADD, BinaryInstruction)
DECL_INSN(I64HADD, BinaryInstruction)
diff --git a/backend/src/libocl/script/ocl_integer.def b/backend/src/libocl/script/ocl_integer.def
index ec9177a..c35c242 100644
--- a/backend/src/libocl/script/ocl_integer.def
+++ b/backend/src/libocl/script/ocl_integer.def
@@ -23,8 +23,7 @@ uintn upsample (ushortn hi, ushortn lo)
longn upsample (intn hi, uintn lo)
ulongn upsample (uintn hi, uintn lo)
-# XXX not implemented
-#gentype popcount (gentype x)
+gentype popcount (gentype x)
##fast_integer
gentype mad24 (gentype x, gentype y, gentype z)
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
index 9230604..375a40f 100644
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
@@ -19,6 +19,7 @@
PURE CONST uint __gen_ocl_fbh(uint);
PURE CONST uint __gen_ocl_fbl(uint);
+PURE CONST uint __gen_ocl_cbit(uint);
OVERLOADABLE char clz(char x) {
if (x < 0)
@@ -86,6 +87,35 @@ OVERLOADABLE ulong clz(ulong x) {
return v;
}
+OVERLOADABLE char popcount(char x) {
+ return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 24 : __gen_ocl_cbit(x);
+}
+OVERLOADABLE short popcount(short x) {
+ return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 16 : __gen_ocl_cbit(x);
+}
+#define SDEF(TYPE) \
+OVERLOADABLE TYPE popcount(TYPE x){ return x == 0? 0:__gen_ocl_cbit(x);}
+SDEF(uchar);
+SDEF(ushort);
+SDEF(int);
+SDEF(uint);
+#undef SDEF
+
+OVERLOADABLE long popcount(long x) {
+ union { int i[2]; long x; } u;
+ u.x = x;
+ uint v = popcount(u.i[1]);
+ v += popcount(u.i[0]);
+ return v;
+}
+
+OVERLOADABLE ulong popcount(ulong x) {
+ union { uint i[2]; ulong x; } u;
+ u.x = x;
+ uint v = popcount(u.i[1]);
+ v += popcount(u.i[0]);
+ return v;
+}
// sat
#define SDEF(TYPE) \
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
index 2869bb6..0b3dea4 100644
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
@@ -45,6 +45,15 @@ OVERLOADABLE uint clz(uint x);
OVERLOADABLE long clz(long x);
OVERLOADABLE ulong clz(ulong x);
+OVERLOADABLE char popcount(char x);
+OVERLOADABLE uchar popcount(uchar x);
+OVERLOADABLE short popcount(short x);
+OVERLOADABLE ushort popcount(ushort x);
+OVERLOADABLE int popcount(int x);
+OVERLOADABLE uint popcount(uint x);
+OVERLOADABLE long popcount(long x);
+OVERLOADABLE ulong popcount(ulong x);
+
OVERLOADABLE char mul_hi(char x, char y);
OVERLOADABLE uchar mul_hi(uchar x, uchar y);
OVERLOADABLE short mul_hi(short x, short y);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 39b441f..39e22d7 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2494,6 +2494,7 @@ namespace gbe
regTranslator.newScalarProxy(ir::ocl::workdim, dst); break;
case GEN_OCL_FBH:
case GEN_OCL_FBL:
+ case GEN_OCL_CBIT:
case GEN_OCL_COS:
case GEN_OCL_SIN:
case GEN_OCL_SQR:
@@ -2779,6 +2780,7 @@ namespace gbe
}
case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break;
case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break;
+ case GEN_OCL_CBIT: this->emitUnaryCallInst(I,CS,ir::OP_CBIT); break;
case GEN_OCL_ABS:
{
const ir::Register src = this->getRegister(*AI);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index f508bcc..7434c78 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -151,6 +151,7 @@ DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm)
DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless)
DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii)
DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell)
+DECL_LLVM_GEN_FUNCTION(CBIT, __gen_ocl_cbit)
// saturate convert
DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8, _Z16convert_char_sath)
--
1.7.9.5
More information about the Beignet
mailing list