[Beignet] [PATCH] SKL: Fix opencv perf hang.
Yang Rong
rong.r.yang at intel.com
Tue Feb 10 01:01:23 PST 2015
SKL has 32 barrier per subslice, and R2.0 add a bit to "Barrier ID MSB" for it.
So need and 0x8f000000 to get barrier id.
Fix opencv hange:
opencv_perf_imgproc/OCL_ImgSize_TmplSize_Method_MatType_MatchTemplate
opencv_perf_imgproc/OCL_MomentsFixture_Moments
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen9_context.cpp | 26 ++++++++++++++++++++++++++
backend/src/backend/gen9_context.hpp | 1 +
backend/src/backend/gen_context.hpp | 2 +-
3 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen9_context.cpp b/backend/src/backend/gen9_context.cpp
index b47ae2a..326f5a1 100644
--- a/backend/src/backend/gen9_context.cpp
+++ b/backend/src/backend/gen9_context.cpp
@@ -28,4 +28,30 @@ namespace gbe
void Gen9Context::newSelection(void) {
this->sel = GBE_NEW(Selection9, *this);
}
+
+ void Gen9Context::emitBarrierInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = ra->genReg(insn.src(0));
+ const GenRegister fenceDst = ra->genReg(insn.dst(0));
+ uint32_t barrierType = insn.extra.barrierType;
+ const GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
+
+ if (barrierType == ir::syncGlobalBarrier) {
+ p->FENCE(fenceDst);
+ p->MOV(fenceDst, fenceDst);
+ }
+ p->push();
+ // As only the payload.2 is used and all the other regions are ignored
+ // SIMD8 mode here is safe.
+ p->curr.execWidth = 8;
+ p->curr.physicalFlag = 0;
+ p->curr.noMask = 1;
+ // Copy barrier id from r0.
+ p->AND(src, barrierId, GenRegister::immud(0x8f000000));
+ // A barrier is OK to start the thread synchronization *and* SLM fence
+ p->BARRIER(src);
+ p->curr.execWidth = 1;
+ // Now we wait for the other threads
+ p->WAIT();
+ p->pop();
+ }
}
diff --git a/backend/src/backend/gen9_context.hpp b/backend/src/backend/gen9_context.hpp
index 4123414..8acad8c 100644
--- a/backend/src/backend/gen9_context.hpp
+++ b/backend/src/backend/gen9_context.hpp
@@ -36,6 +36,7 @@ namespace gbe
Gen9Context(const ir::Unit &unit, const std::string &name, uint32_t deviceID, bool relaxMath = false)
: Gen8Context(unit, name, deviceID, relaxMath) {
};
+ virtual void emitBarrierInstruction(const SelectionInstruction &insn);
protected:
virtual GenEncoder* generateEncoder(void) {
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index c68e6cf..f64b916 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -145,7 +145,7 @@ namespace gbe
void emitEotInstruction(const SelectionInstruction &insn);
void emitNoOpInstruction(const SelectionInstruction &insn);
void emitWaitInstruction(const SelectionInstruction &insn);
- void emitBarrierInstruction(const SelectionInstruction &insn);
+ virtual void emitBarrierInstruction(const SelectionInstruction &insn);
void emitFenceInstruction(const SelectionInstruction &insn);
void emitMathInstruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
--
1.8.3.2
More information about the Beignet
mailing list