On Sep 16, 2016 3:04 PM, "Francisco Jerez" <<a href="mailto:currojerez@riseup.net">currojerez@riseup.net</a>> wrote: > > Not intended for upstream. Should cause a GPU hang if some thread is > executed with a non-contiguous dispatch mask breaking assumptions of > brw_stage_has_packed_dispatch(). Doesn't cause any CTS, DEQP or > Piglit regressions, while replacing brw_stage_has_packed_dispatch() > with a dummy implementation that unconditionally returns true on top > of this patch causes multiple GPU hangs. > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 17 +++++++++++++++++ > src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 21 +++++++++++++++++++++ > 2 files changed, 38 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 042203d..b3eec49 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -33,6 +33,23 @@ using namespace brw::surface_access; > void > fs_visitor::emit_nir_code() > { > + if (brw_stage_has_packed_dispatch(stage, prog_data)) { Mind adding "0 &&" and merging this patch so we remain aware of the issue, keep it building, and can easily test future hardware. > + const fs_builder ubld = bld.exec_all().group(1, 0); > + const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0); > + const fs_reg mask = (stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : > + brw_dmask_reg()); > + > + ubld.ADD(tmp, mask, brw_imm_ud(1)); > + ubld.AND(tmp, mask, tmp); > + > + /* This will loop forever if the dispatch mask doesn't have the expected > + * form '2^n-1', in which case tmp will be non-zero. > + */ > + bld.emit(BRW_OPCODE_DO); > + bld.CMP(bld.null_reg_ud(), tmp, brw_imm_ud(0), BRW_CONDITIONAL_NZ); > + set_predicate(BRW_PREDICATE_NORMAL, bld.emit(BRW_OPCODE_WHILE)); > + } > + > /* emit the arrays used for inputs and outputs - load/store intrinsics will > * be converted to reads/writes of these arrays > */ > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > index ba3bbdf..9f7a1f0 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > @@ -35,6 +35,27 @@ namespace brw { > void > vec4_visitor::emit_nir_code() > { > + if (brw_stage_has_packed_dispatch(stage, &prog_data->base)) { > + const dst_reg tmp = writemask(dst_reg(this, glsl_type::uint_type), > + WRITEMASK_X); > + const src_reg mask = > + brw_swizzle(retype(stride(brw_vec4_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, 0), > + 0, 4, 1), > + BRW_REGISTER_TYPE_UD), > + BRW_SWIZZLE_ZZZZ); Can we just do vec4_reg(brw_vmask_reg)? > + > + emit(ADD(tmp, mask, brw_imm_ud(1))); > + emit(AND(tmp, mask, src_reg(tmp))); > + > + /* This will loop forever if the dispatch mask doesn't have the expected > + * form '2^n-1', in which case tmp will be non-zero. > + */ > + emit(BRW_OPCODE_DO); > + emit(CMP(dst_null_ud(), src_reg(tmp), brw_imm_ud(0), > + BRW_CONDITIONAL_NZ)); > + emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL; > + } > + > if (nir->num_uniforms > 0) > nir_setup_uniforms(); > > -- > 2.9.0 >