[Beignet] [PATCH 1/2] support __gen_ocl_simd_any and __gen_ocl_simd_all
Guo Yejun
yejun.guo at intel.com
Thu Apr 17 22:42:16 PDT 2014
short __gen_ocl_simd_any(short x):
if x in any of the active threads in the same SIMD is not zero,
the return value for all these threads is not zero, otherwise, zero returned.
short __gen_ocl_simd_all(short x):
only if x in all of the active threads in the same SIMD is not zero,
the return value for all these threads is not zero, otherwise, zero returned.
for example:
to check if a special value exists in a global buffer, use one SIMD
to do the searching parallelly, the whole SIMD can stop the task
once the value is found. The key kernel code looks like:
for(; ; ) {
...
if (__gen_ocl_simd_any(...))
break; //the whole SIMD stop the searching
}
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 63 ++++++++++++++++++++++++++++++
backend/src/ir/instruction.hpp | 4 ++
backend/src/ir/instruction.hxx | 2 +
backend/src/llvm/llvm_gen_backend.cpp | 16 ++++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 4 ++
backend/src/ocl_stdlib.tmpl.h | 8 ++++
6 files changed, 97 insertions(+)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 72a8549..e7c84d0 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1730,6 +1730,69 @@ namespace gbe
case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break;
case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break;
case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break;
+ case ir::OP_SIMD_ANY:
+ {
+ const GenRegister constZero = GenRegister::immuw(0);;
+ const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one);
+ const GenRegister flag01 = GenRegister::flag(0, 1);
+
+ sel.push();
+ int simdWidth = sel.curr.execWidth;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.MOV(flag01, constZero);
+
+ sel.curr.execWidth = simdWidth;
+ sel.curr.noMask = 0;
+
+ sel.curr.physicalFlag = 1;
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero);
+
+ if (sel.curr.execWidth == 16)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else if (sel.curr.execWidth == 8)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ else
+ NOT_IMPLEMENTED;
+ sel.SEL(dst, regOne, constZero);
+ sel.pop();
+ }
+ break;
+ case ir::OP_SIMD_ALL:
+ {
+ const GenRegister constZero = GenRegister::immuw(0);
+ const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one);
+ const GenRegister flag01 = GenRegister::flag(0, 1);
+
+ sel.push();
+ int simdWidth = sel.curr.execWidth;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.MOV(flag01, regOne);
+
+ sel.curr.execWidth = simdWidth;
+ sel.curr.noMask = 0;
+
+ sel.curr.physicalFlag = 1;
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero);
+
+ if (sel.curr.execWidth == 16)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
+ else if (sel.curr.execWidth == 8)
+ sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
+ else
+ NOT_IMPLEMENTED;
+ sel.SEL(dst, regOne, constZero);
+ sel.pop();
+ }
+ break;
+
default: NOT_SUPPORTED;
}
return true;
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 457b5b4..582e22d 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -567,6 +567,10 @@ namespace ir {
Instruction RCP(Type type, Register dst, Register src);
/*! abs.type dst src */
Instruction ABS(Type type, Register dst, Register src);
+ /*! simd_all.type dst src */
+ Instruction SIMD_ALL(Type type, Register dst, Register src);
+ /*! simd_any.type dst src */
+ Instruction SIMD_ANY(Type type, Register dst, Register src);
/*! log.type dst src */
Instruction LOG(Type type, Register dst, Register src);
/*! exp.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index bebceff..587517b 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -38,6 +38,8 @@ DECL_INSN(RNDD, UnaryInstruction)
DECL_INSN(RNDE, UnaryInstruction)
DECL_INSN(RNDU, UnaryInstruction)
DECL_INSN(RNDZ, UnaryInstruction)
+DECL_INSN(SIMD_ANY, UnaryInstruction)
+DECL_INSN(SIMD_ALL, UnaryInstruction)
DECL_INSN(POW, BinaryInstruction)
DECL_INSN(MUL, BinaryInstruction)
DECL_INSN(ADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index b46e991..6c2b45d 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2282,6 +2282,8 @@ namespace gbe
case GEN_OCL_SAT_CONV_F32_TO_U32:
case GEN_OCL_CONV_F16_TO_F32:
case GEN_OCL_CONV_F32_TO_F16:
+ case GEN_OCL_SIMD_ANY:
+ case GEN_OCL_SIMD_ALL:
this->newRegister(&I);
break;
default:
@@ -2422,6 +2424,20 @@ namespace gbe
ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src);
break;
}
+ case GEN_OCL_SIMD_ALL:
+ {
+ const ir::Register src = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S16, dst, src);
+ break;
+ }
+ case GEN_OCL_SIMD_ANY:
+ {
+ const ir::Register src = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
+ break;
+ }
case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 7058a60..4236298 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -175,3 +175,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf)
DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32)
DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
+
+// SIMD level function for internal usage
+DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
+DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 22e3aec..cd8b918 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -638,6 +638,14 @@ INLINE_OVERLOADABLE ulong abs_diff (ulong x, ulong y) {
return y > x ? (y - x) : (x - y);
}
+
+/////////////////////////////////////////////////////////////////////////////
+// SIMD level function
+/////////////////////////////////////////////////////////////////////////////
+short __gen_ocl_simd_any(short);
+short __gen_ocl_simd_all(short);
+
+
/////////////////////////////////////////////////////////////////////////////
// Work Items functions (see 6.11.1 of OCL 1.1 spec)
/////////////////////////////////////////////////////////////////////////////
--
1.8.3.2
More information about the Beignet
mailing list