[Mesa-dev] [PATCH 34/53] r600/eg: workaround bug with tess shader and dynamic GPRs.
Marek Olšák
maraeo at gmail.com
Tue Dec 1 02:46:24 PST 2015
Did the alternative workaround (reserving one SIMD as PS only) not work?
Marek
On Mon, Nov 30, 2015 at 7:20 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> When using tessellation on eg/ni chipsets, we must disable
> dynamic GPRs to workaround a hw bug where the GPU hangs
> when too many things get queued.
>
> This implements something like the r600 code to emit
> the transition between static and dynamic GPRs, and to
> statically allocate GPRs when tessellation is enabled.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/drivers/r600/evergreen_compute.c | 6 +-
> src/gallium/drivers/r600/evergreen_state.c | 222 ++++++++++++++++++++-------
> src/gallium/drivers/r600/r600_hw_context.c | 2 +-
> src/gallium/drivers/r600/r600_pipe.h | 8 +-
> src/gallium/drivers/r600/r600_state_common.c | 7 +
> 5 files changed, 185 insertions(+), 60 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 010d109..c07cee1 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -432,6 +432,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
> */
> r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);
>
> + /* emit config state */
> + if (ctx->b.chip_class == EVERGREEN)
> + r600_emit_atom(ctx, &ctx->config_state.atom);
> +
> ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
> r600_flush_emit(ctx);
>
> @@ -791,7 +795,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
>
> /* Config Registers */
> if (ctx->b.chip_class < CAYMAN)
> - evergreen_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
> + evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family,
> ctx->screen->b.info.drm_minor);
> else
> cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index edc6f28..b3109c7 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -869,6 +869,33 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
> tex->width0, tex->height0, 0);
> }
>
> +static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
> +{
> + struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
> + struct r600_config_state *a = (struct r600_config_state*)atom;
> +
> + radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
> + if (a->dyn_gpr_enabled) {
> + radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs));
> + radeon_emit(cs, 0);
> + radeon_emit(cs, 0);
> + } else {
> + radeon_emit(cs, a->sq_gpr_resource_mgmt_1);
> + radeon_emit(cs, a->sq_gpr_resource_mgmt_2);
> + radeon_emit(cs, a->sq_gpr_resource_mgmt_3);
> + }
> + radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8));
> + if (a->dyn_gpr_enabled) {
> + radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> + S_028838_PS_GPRS(0x1e) |
> + S_028838_VS_GPRS(0x1e) |
> + S_028838_GS_GPRS(0x1e) |
> + S_028838_ES_GPRS(0x1e) |
> + S_028838_HS_GPRS(0x1e) |
> + S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> + }
> +}
> +
> static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
> {
> struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
> @@ -2553,10 +2580,10 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
> eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF);
> }
>
> -void evergreen_init_common_regs(struct r600_command_buffer *cb,
> - enum chip_class ctx_chip_class,
> - enum radeon_family ctx_family,
> - int ctx_drm_minor)
> +void evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb,
> + enum chip_class ctx_chip_class,
> + enum radeon_family ctx_family,
> + int ctx_drm_minor)
> {
> int ps_prio;
> int vs_prio;
> @@ -2567,31 +2594,23 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
> int cs_prio;
> int ls_prio;
>
> - int num_ps_gprs;
> - int num_vs_gprs;
> - int num_gs_gprs;
> - int num_es_gprs;
> - int num_hs_gprs;
> - int num_ls_gprs;
> - int num_temp_gprs;
> -
> unsigned tmp;
>
> ps_prio = 0;
> vs_prio = 1;
> gs_prio = 2;
> es_prio = 3;
> - hs_prio = 0;
> - ls_prio = 0;
> + hs_prio = 3;
> + ls_prio = 3;
> cs_prio = 0;
>
> - num_ps_gprs = 93;
> - num_vs_gprs = 46;
> - num_temp_gprs = 4;
> - num_gs_gprs = 31;
> - num_es_gprs = 31;
> - num_hs_gprs = 23;
> - num_ls_gprs = 23;
> + rctx->default_gprs[R600_HW_STAGE_PS] = 93;
> + rctx->default_gprs[R600_HW_STAGE_VS] = 46;
> + rctx->r6xx_num_clause_temp_gprs = 4;
> + rctx->default_gprs[R600_HW_STAGE_GS] = 31;
> + rctx->default_gprs[R600_HW_STAGE_ES] = 31;
> + rctx->default_gprs[EG_HW_STAGE_HS] = 23;
> + rctx->default_gprs[EG_HW_STAGE_LS] = 23;
>
> tmp = 0;
> switch (ctx_family) {
> @@ -2614,40 +2633,12 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
> tmp |= S_008C00_GS_PRIO(gs_prio);
> tmp |= S_008C00_ES_PRIO(es_prio);
>
> - /* enable dynamic GPR resource management */
> - if (ctx_drm_minor >= 7) {
> - r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
> - r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> - /* always set temp clauses */
> - r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> - r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> - S_028838_PS_GPRS(0x1e) |
> - S_028838_VS_GPRS(0x1e) |
> - S_028838_GS_GPRS(0x1e) |
> - S_028838_ES_GPRS(0x1e) |
> - S_028838_HS_GPRS(0x1e) |
> - S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> - } else {
> - r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
> - r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> -
> - tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
> - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
> - r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> -
> - tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
> - tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
> - r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
> + r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1);
> + r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
>
> - tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
> - tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
> - r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
> - }
> + r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> + r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> + r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
>
> /* The cs checker requires this register to be set. */
> r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
> @@ -2694,7 +2685,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
> r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
> r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
>
> - evergreen_init_common_regs(cb, rctx->b.chip_class,
> + evergreen_init_common_regs(rctx, cb, rctx->b.chip_class,
> rctx->b.family, rctx->screen->b.info.drm_minor);
>
> family = rctx->b.family;
> @@ -3685,7 +3676,11 @@ void evergreen_init_state_functions(struct r600_context *rctx)
> * or piglit regression).
> * !!!
> */
> -
> + if (rctx->b.chip_class == EVERGREEN) {
> + r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11);
> + if (rctx->screen->b.info.drm_minor >= 7)
> + rctx->config_state.dyn_gpr_enabled = true;
> + }
> r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0);
> /* shader const */
> r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
> @@ -3891,3 +3886,118 @@ void evergreen_set_lds_alloc(struct r600_context *rctx,
> {
> radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
> }
> +
> +/* on evergreen if you are running tessellation you need to disable dynamic
> + GPRs to workaround a hardware bug.*/
> +bool evergreen_adjust_gprs(struct r600_context *rctx)
> +{
> + unsigned num_gprs[EG_NUM_HW_STAGES];
> + unsigned def_gprs[EG_NUM_HW_STAGES];
> + unsigned cur_gprs[EG_NUM_HW_STAGES];
> + unsigned new_gprs[EG_NUM_HW_STAGES];
> + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
> + unsigned max_gprs;
> + unsigned i;
> + unsigned total_gprs;
> + unsigned tmp[3];
> + bool rework = false, set_default = false, set_dirty = false;
> + max_gprs = 0;
> + for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> + def_gprs[i] = rctx->default_gprs[i];
> + max_gprs += def_gprs[i];
> + }
> + max_gprs += def_num_clause_temp_gprs * 2;
> +
> + /* if we have no TESS and dyn gpr is enabled then do nothing. */
> + if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader || rctx->screen->b.info.drm_minor < 7) {
> + if (rctx->config_state.dyn_gpr_enabled)
> + return true;
> +
> + /* transition back to dyn gpr enabled state */
> + rctx->config_state.dyn_gpr_enabled = true;
> + r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
> + rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
> + return true;
> + }
> +
> +
> + /* gather required shader gprs */
> + for (i = 0; i < EG_NUM_HW_STAGES; i++)
> + num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr;
> +
> + cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> + cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> + cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> + cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> + cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
> + cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
> +
> + total_gprs = 0;
> + for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> + new_gprs[i] = num_gprs[i];
> + total_gprs += num_gprs[i];
> + }
> +
> + if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs)))
> + return false;
> +
> + for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> + if (new_gprs[i] > cur_gprs[i]) {
> + rework = true;
> + break;
> + }
> + }
> +
> + if (rctx->config_state.dyn_gpr_enabled) {
> + set_dirty = true;
> + rctx->config_state.dyn_gpr_enabled = false;
> + }
> +
> + if (rework) {
> + set_default = true;
> + for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> + if (new_gprs[i] > def_gprs[i])
> + set_default = false;
> + }
> +
> + if (set_default) {
> + for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> + new_gprs[i] = def_gprs[i];
> + }
> + } else {
> + unsigned ps_value = max_gprs;
> +
> + ps_value -= (def_num_clause_temp_gprs * 2);
> + for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++)
> + ps_value -= new_gprs[i];
> +
> + new_gprs[R600_HW_STAGE_PS] = ps_value;
> + }
> +
> + tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
> + S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
> + S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
> +
> + tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
> + S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
> +
> + tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) |
> + S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]);
> +
> + if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] ||
> + rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] ||
> + rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) {
> + rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0];
> + rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1];
> + rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2];
> + set_dirty = true;
> + }
> + }
> +
> +
> + if (set_dirty) {
> + r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
> + rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
> + }
> + return true;
> +}
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index b7845b5..90b99e8 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -310,7 +310,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
> ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
> ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
> r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
> - if (ctx->b.chip_class < EVERGREEN) {
> + if (ctx->b.chip_class <= EVERGREEN) {
> r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
> }
> r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 78f3a59..04248b4 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -206,6 +206,8 @@ struct r600_config_state {
> struct r600_atom atom;
> unsigned sq_gpr_resource_mgmt_1;
> unsigned sq_gpr_resource_mgmt_2;
> + unsigned sq_gpr_resource_mgmt_3;
> + bool dyn_gpr_enabled;
> };
>
> struct r600_stencil_ref
> @@ -441,6 +443,7 @@ struct r600_context {
> boolean has_vertex_cache;
> boolean keep_tiling_flags;
> unsigned default_gprs[EG_NUM_HW_STAGES];
> + unsigned current_gprs[EG_NUM_HW_STAGES];
> unsigned r6xx_num_clause_temp_gprs;
>
> /* Miscellaneous state objects. */
> @@ -603,7 +606,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
> const struct pipe_sampler_view *state,
> unsigned width0, unsigned height0,
> unsigned force_level);
> -void evergreen_init_common_regs(struct r600_command_buffer *cb,
> +void evergreen_init_common_regs(struct r600_context *ctx,
> + struct r600_command_buffer *cb,
> enum chip_class ctx_chip_class,
> enum radeon_family ctx_family,
> int ctx_drm_minor);
> @@ -634,7 +638,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
> void evergreen_init_color_surface_rat(struct r600_context *rctx,
> struct r600_surface *surf);
> void evergreen_update_db_shader_control(struct r600_context * rctx);
> -
> +bool evergreen_adjust_gprs(struct r600_context *rctx);
> /* r600_blit.c */
> void r600_init_blit_functions(struct r600_context *rctx);
> void r600_decompress_depth_textures(struct r600_context *rctx,
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index ab3313f..351aca9 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -1624,6 +1624,13 @@ static bool r600_update_derived_state(struct r600_context *rctx)
> }
> }
>
> + if (rctx->b.chip_class == EVERGREEN) {
> + if (!evergreen_adjust_gprs(rctx)) {
> + /* discard rendering */
> + return false;
> + }
> + }
> +
> blend_disable = (rctx->dual_src_blend &&
> rctx->ps_shader->current->nr_ps_color_outputs < 2);
>
> --
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list