[Beignet] [PATCH 1/2] GBE: Clear Flag register to fix a gpu hang.
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Aug 28 23:30:34 PDT 2013
LGTM, will push it latter. Thanks for the patch.
On Thu, Aug 22, 2013 at 04:52:04PM +0800, Ruiling Song wrote:
> When group size not aligned to simdWidth, prediction any8/16h will
> calculate pmask also using flag register bits mapped to non-active
> lanes. As flag register is not cleared by default, any8/16h used
> for jmpi instruction may cause wrong jump, and possibly infinite loop.
>
> So, we clear Flag register to 0 to make any8/16h prediction work correct.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 13 +++++++++++++
> backend/src/backend/gen_context.hpp | 1 +
> backend/src/backend/gen_insn_selection.cpp | 3 +++
> 3 files changed, 17 insertions(+)
>
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 6eeab51..a029719 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -88,6 +88,18 @@ namespace gbe
> }
> }
>
> + void GenContext::clearFlagRegister(void) {
> + // when group size not aligned to simdWidth, flag register need clear to
> + // make prediction(any8/16h) work correctly
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->curr.execWidth = 1;
> + p->MOV(GenRegister::retype(GenRegister::flag(0,0), GEN_TYPE_UD), GenRegister::immud(0x0));
> + p->MOV(GenRegister::retype(GenRegister::flag(1,0), GEN_TYPE_UD), GenRegister::immud(0x0));
> + p->pop();
> + }
> +
> void GenContext::emitStackPointer(void) {
> using namespace ir;
>
> @@ -1091,6 +1103,7 @@ namespace gbe
> schedulePostRegAllocation(*this, *this->sel);
> if (OCL_OUTPUT_REG_ALLOC)
> ra->outputAllocation();
> + this->clearFlagRegister();
> this->emitStackPointer();
> this->emitInstructionStream();
> this->patchBranches();
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 8b481d0..f66ec95 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -61,6 +61,7 @@ namespace gbe
> INLINE const ir::Function &getFunction(void) const { return fn; }
> /*! Simd width chosen for the current function */
> INLINE uint32_t getSimdWidth(void) const { return simdWidth; }
> + void clearFlagRegister(void);
> /*! Emit the per-lane stack pointer computation */
> void emitStackPointer(void);
> /*! Emit the instructions */
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 55db48e..bca08ba 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -2621,6 +2621,9 @@ namespace gbe
> sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel));
>
> // Branch to the jump target
> + // XXX TODO: For group size not aligned to simdWidth, ALL8/16h may not
> + // work correct, as flag register bits mapped to non-active lanes tend
> + // to be zero.
> if (simdWidth == 8)
> sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
> else if (simdWidth == 16)
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list