[Mesa-dev] [PATCH 34/53] r600/eg: workaround bug with tess shader and dynamic GPRs.

Marek Olšák maraeo at gmail.com
Tue Dec 1 02:46:24 PST 2015


Did the alternative workaround (reserving one SIMD as PS only) not work?

Marek

On Mon, Nov 30, 2015 at 7:20 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> When using tessellation on eg/ni chipsets, we must disable
> dynamic GPRs to workaround a hw bug where the GPU hangs
> when too many things get queued.
>
> This implements something like the r600 code to emit
> the transition between static and dynamic GPRs, and to
> statically allocate GPRs when tessellation is enabled.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/evergreen_compute.c |   6 +-
>  src/gallium/drivers/r600/evergreen_state.c   | 222 ++++++++++++++++++++-------
>  src/gallium/drivers/r600/r600_hw_context.c   |   2 +-
>  src/gallium/drivers/r600/r600_pipe.h         |   8 +-
>  src/gallium/drivers/r600/r600_state_common.c |   7 +
>  5 files changed, 185 insertions(+), 60 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 010d109..c07cee1 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -432,6 +432,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>          */
>         r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);
>
> +       /* emit config state */
> +       if (ctx->b.chip_class == EVERGREEN)
> +               r600_emit_atom(ctx, &ctx->config_state.atom);
> +
>         ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
>         r600_flush_emit(ctx);
>
> @@ -791,7 +795,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
>
>         /* Config Registers */
>         if (ctx->b.chip_class < CAYMAN)
> -               evergreen_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
> +               evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family,
>                                            ctx->screen->b.info.drm_minor);
>         else
>                 cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index edc6f28..b3109c7 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -869,6 +869,33 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
>                                                     tex->width0, tex->height0, 0);
>  }
>
> +static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
> +{
> +       struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
> +       struct r600_config_state *a = (struct r600_config_state*)atom;
> +
> +       radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
> +       if (a->dyn_gpr_enabled) {
> +               radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs));
> +               radeon_emit(cs, 0);
> +               radeon_emit(cs, 0);
> +       } else {
> +               radeon_emit(cs, a->sq_gpr_resource_mgmt_1);
> +               radeon_emit(cs, a->sq_gpr_resource_mgmt_2);
> +               radeon_emit(cs, a->sq_gpr_resource_mgmt_3);
> +       }
> +       radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8));
> +       if (a->dyn_gpr_enabled) {
> +               radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> +                                      S_028838_PS_GPRS(0x1e) |
> +                                      S_028838_VS_GPRS(0x1e) |
> +                                      S_028838_GS_GPRS(0x1e) |
> +                                      S_028838_ES_GPRS(0x1e) |
> +                                      S_028838_HS_GPRS(0x1e) |
> +                                      S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> +       }
> +}
> +
>  static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
>  {
>         struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
> @@ -2553,10 +2580,10 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
>         eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF);
>  }
>
> -void evergreen_init_common_regs(struct r600_command_buffer *cb,
> -       enum chip_class ctx_chip_class,
> -       enum radeon_family ctx_family,
> -       int ctx_drm_minor)
> +void evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb,
> +                               enum chip_class ctx_chip_class,
> +                               enum radeon_family ctx_family,
> +                               int ctx_drm_minor)
>  {
>         int ps_prio;
>         int vs_prio;
> @@ -2567,31 +2594,23 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
>         int cs_prio;
>         int ls_prio;
>
> -       int num_ps_gprs;
> -       int num_vs_gprs;
> -       int num_gs_gprs;
> -       int num_es_gprs;
> -       int num_hs_gprs;
> -       int num_ls_gprs;
> -       int num_temp_gprs;
> -
>         unsigned tmp;
>
>         ps_prio = 0;
>         vs_prio = 1;
>         gs_prio = 2;
>         es_prio = 3;
> -       hs_prio = 0;
> -       ls_prio = 0;
> +       hs_prio = 3;
> +       ls_prio = 3;
>         cs_prio = 0;
>
> -       num_ps_gprs = 93;
> -       num_vs_gprs = 46;
> -       num_temp_gprs = 4;
> -       num_gs_gprs = 31;
> -       num_es_gprs = 31;
> -       num_hs_gprs = 23;
> -       num_ls_gprs = 23;
> +       rctx->default_gprs[R600_HW_STAGE_PS] = 93;
> +       rctx->default_gprs[R600_HW_STAGE_VS] = 46;
> +       rctx->r6xx_num_clause_temp_gprs = 4;
> +       rctx->default_gprs[R600_HW_STAGE_GS] = 31;
> +       rctx->default_gprs[R600_HW_STAGE_ES] = 31;
> +       rctx->default_gprs[EG_HW_STAGE_HS] = 23;
> +       rctx->default_gprs[EG_HW_STAGE_LS] = 23;
>
>         tmp = 0;
>         switch (ctx_family) {
> @@ -2614,40 +2633,12 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
>         tmp |= S_008C00_GS_PRIO(gs_prio);
>         tmp |= S_008C00_ES_PRIO(es_prio);
>
> -       /* enable dynamic GPR resource management */
> -       if (ctx_drm_minor >= 7) {
> -               r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
> -               r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> -               /* always set temp clauses */
> -               r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> -               r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> -               r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> -               r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
> -               r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
> -               r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
> -                                       S_028838_PS_GPRS(0x1e) |
> -                                       S_028838_VS_GPRS(0x1e) |
> -                                       S_028838_GS_GPRS(0x1e) |
> -                                       S_028838_ES_GPRS(0x1e) |
> -                                       S_028838_HS_GPRS(0x1e) |
> -                                       S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
> -       } else {
> -               r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
> -               r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
> -
> -               tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
> -               tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> -               tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
> -               r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
> -
> -               tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
> -               tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
> -               r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
> +       r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1);
> +       r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
>
> -               tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
> -               tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
> -               r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
> -       }
> +       r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
> +       r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
> +       r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
>
>         /* The cs checker requires this register to be set. */
>         r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
> @@ -2694,7 +2685,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
>         r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
>         r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
>
> -       evergreen_init_common_regs(cb, rctx->b.chip_class,
> +       evergreen_init_common_regs(rctx, cb, rctx->b.chip_class,
>                                    rctx->b.family, rctx->screen->b.info.drm_minor);
>
>         family = rctx->b.family;
> @@ -3685,7 +3676,11 @@ void evergreen_init_state_functions(struct r600_context *rctx)
>          * or piglit regression).
>          * !!!
>          */
> -
> +       if (rctx->b.chip_class == EVERGREEN) {
> +               r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11);
> +               if (rctx->screen->b.info.drm_minor >= 7)
> +                       rctx->config_state.dyn_gpr_enabled = true;
> +       }
>         r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0);
>         /* shader const */
>         r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
> @@ -3891,3 +3886,118 @@ void evergreen_set_lds_alloc(struct r600_context *rctx,
>  {
>         radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
>  }
> +
> +/* on evergreen if you are running tessellation you need to disable dynamic
> +   GPRs to workaround a hardware bug.*/
> +bool evergreen_adjust_gprs(struct r600_context *rctx)
> +{
> +       unsigned num_gprs[EG_NUM_HW_STAGES];
> +       unsigned def_gprs[EG_NUM_HW_STAGES];
> +       unsigned cur_gprs[EG_NUM_HW_STAGES];
> +       unsigned new_gprs[EG_NUM_HW_STAGES];
> +       unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
> +       unsigned max_gprs;
> +       unsigned i;
> +       unsigned total_gprs;
> +       unsigned tmp[3];
> +       bool rework = false, set_default = false, set_dirty = false;
> +       max_gprs = 0;
> +       for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> +               def_gprs[i] = rctx->default_gprs[i];
> +               max_gprs += def_gprs[i];
> +       }
> +       max_gprs += def_num_clause_temp_gprs * 2;
> +
> +       /* if we have no TESS and dyn gpr is enabled then do nothing. */
> +       if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader || rctx->screen->b.info.drm_minor < 7) {
> +               if (rctx->config_state.dyn_gpr_enabled)
> +                       return true;
> +
> +               /* transition back to dyn gpr enabled state */
> +               rctx->config_state.dyn_gpr_enabled = true;
> +               r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
> +               rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
> +               return true;
> +       }
> +
> +
> +       /* gather required shader gprs */
> +       for (i = 0; i < EG_NUM_HW_STAGES; i++)
> +               num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr;
> +
> +       cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +       cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +       cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> +       cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
> +       cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
> +       cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
> +
> +       total_gprs = 0;
> +       for (i = 0; i < EG_NUM_HW_STAGES; i++)  {
> +               new_gprs[i] = num_gprs[i];
> +               total_gprs += num_gprs[i];
> +       }
> +
> +       if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs)))
> +               return false;
> +
> +       for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> +               if (new_gprs[i] > cur_gprs[i]) {
> +                       rework = true;
> +                       break;
> +               }
> +       }
> +
> +       if (rctx->config_state.dyn_gpr_enabled) {
> +               set_dirty = true;
> +               rctx->config_state.dyn_gpr_enabled = false;
> +       }
> +
> +       if (rework) {
> +               set_default = true;
> +               for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> +                       if (new_gprs[i] > def_gprs[i])
> +                               set_default = false;
> +               }
> +
> +               if (set_default) {
> +                       for (i = 0; i < EG_NUM_HW_STAGES; i++) {
> +                               new_gprs[i] = def_gprs[i];
> +                       }
> +               } else {
> +                       unsigned ps_value = max_gprs;
> +
> +                       ps_value -= (def_num_clause_temp_gprs * 2);
> +                       for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++)
> +                               ps_value -= new_gprs[i];
> +
> +                       new_gprs[R600_HW_STAGE_PS] = ps_value;
> +               }
> +
> +               tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
> +                       S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
> +                       S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
> +
> +               tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
> +                       S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
> +
> +               tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) |
> +                       S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]);
> +
> +               if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] ||
> +                   rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] ||
> +                   rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) {
> +                       rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0];
> +                       rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1];
> +                       rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2];
> +                       set_dirty = true;
> +               }
> +       }
> +
> +
> +       if (set_dirty) {
> +               r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
> +               rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
> +       }
> +       return true;
> +}
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index b7845b5..90b99e8 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -310,7 +310,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
>         ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
>         ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
>         r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
> -       if (ctx->b.chip_class < EVERGREEN) {
> +       if (ctx->b.chip_class <= EVERGREEN) {
>                 r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
>         }
>         r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 78f3a59..04248b4 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -206,6 +206,8 @@ struct r600_config_state {
>         struct r600_atom atom;
>         unsigned sq_gpr_resource_mgmt_1;
>         unsigned sq_gpr_resource_mgmt_2;
> +       unsigned sq_gpr_resource_mgmt_3;
> +       bool dyn_gpr_enabled;
>  };
>
>  struct r600_stencil_ref
> @@ -441,6 +443,7 @@ struct r600_context {
>         boolean                         has_vertex_cache;
>         boolean                         keep_tiling_flags;
>         unsigned                        default_gprs[EG_NUM_HW_STAGES];
> +       unsigned                        current_gprs[EG_NUM_HW_STAGES];
>         unsigned                        r6xx_num_clause_temp_gprs;
>
>         /* Miscellaneous state objects. */
> @@ -603,7 +606,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>                                      const struct pipe_sampler_view *state,
>                                      unsigned width0, unsigned height0,
>                                      unsigned force_level);
> -void evergreen_init_common_regs(struct r600_command_buffer *cb,
> +void evergreen_init_common_regs(struct r600_context *ctx,
> +                               struct r600_command_buffer *cb,
>                                 enum chip_class ctx_chip_class,
>                                 enum radeon_family ctx_family,
>                                 int ctx_drm_minor);
> @@ -634,7 +638,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
>  void evergreen_init_color_surface_rat(struct r600_context *rctx,
>                                         struct r600_surface *surf);
>  void evergreen_update_db_shader_control(struct r600_context * rctx);
> -
> +bool evergreen_adjust_gprs(struct r600_context *rctx);
>  /* r600_blit.c */
>  void r600_init_blit_functions(struct r600_context *rctx);
>  void r600_decompress_depth_textures(struct r600_context *rctx,
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index ab3313f..351aca9 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -1624,6 +1624,13 @@ static bool r600_update_derived_state(struct r600_context *rctx)
>                 }
>         }
>
> +       if (rctx->b.chip_class == EVERGREEN) {
> +               if (!evergreen_adjust_gprs(rctx)) {
> +                       /* discard rendering */
> +                       return false;
> +               }
> +       }
> +
>         blend_disable = (rctx->dual_src_blend &&
>                         rctx->ps_shader->current->nr_ps_color_outputs < 2);
>
> --
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list