[Beignet] [PATCH 1/2] GBE: Clear Flag register to fix a gpu hang.
Ruiling Song
ruiling.song at intel.com
Thu Aug 22 01:52:04 PDT 2013
When group size not aligned to simdWidth, prediction any8/16h will
calculate pmask also using flag register bits mapped to non-active
lanes. As flag register is not cleared by default, any8/16h used
for jmpi instruction may cause wrong jump, and possibly infinite loop.
So, we clear Flag register to 0 to make any8/16h prediction work correct.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_context.cpp | 13 +++++++++++++
backend/src/backend/gen_context.hpp | 1 +
backend/src/backend/gen_insn_selection.cpp | 3 +++
3 files changed, 17 insertions(+)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 6eeab51..a029719 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -88,6 +88,18 @@ namespace gbe
}
}
+ void GenContext::clearFlagRegister(void) {
+ // when group size not aligned to simdWidth, flag register need clear to
+ // make prediction(any8/16h) work correctly
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.execWidth = 1;
+ p->MOV(GenRegister::retype(GenRegister::flag(0,0), GEN_TYPE_UD), GenRegister::immud(0x0));
+ p->MOV(GenRegister::retype(GenRegister::flag(1,0), GEN_TYPE_UD), GenRegister::immud(0x0));
+ p->pop();
+ }
+
void GenContext::emitStackPointer(void) {
using namespace ir;
@@ -1091,6 +1103,7 @@ namespace gbe
schedulePostRegAllocation(*this, *this->sel);
if (OCL_OUTPUT_REG_ALLOC)
ra->outputAllocation();
+ this->clearFlagRegister();
this->emitStackPointer();
this->emitInstructionStream();
this->patchBranches();
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 8b481d0..f66ec95 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -61,6 +61,7 @@ namespace gbe
INLINE const ir::Function &getFunction(void) const { return fn; }
/*! Simd width chosen for the current function */
INLINE uint32_t getSimdWidth(void) const { return simdWidth; }
+ void clearFlagRegister(void);
/*! Emit the per-lane stack pointer computation */
void emitStackPointer(void);
/*! Emit the instructions */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 55db48e..bca08ba 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2621,6 +2621,9 @@ namespace gbe
sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel));
// Branch to the jump target
+ // XXX TODO: For group size not aligned to simdWidth, ALL8/16h may not
+ // work correct, as flag register bits mapped to non-active lanes tend
+ // to be zero.
if (simdWidth == 8)
sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
else if (simdWidth == 16)
--
1.7.9.5
More information about the Beignet
mailing list