[Mesa-dev] [PATCH 6/7] radeonsi: let internal compute dispatches tune WAVES_PER_SH

Marek Olšák maraeo at gmail.com
Tue Aug 21 17:50:34 UTC 2018


On Tue, Aug 21, 2018 at 3:36 AM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
>
>
> On 8/21/18 9:36 AM, Samuel Pitoiset wrote:
> > Why don't you set cs_max_waves_per_sh? Did you miss something?
>
> Nevermind, it's used in the next patch.

Yes, this patch just adds the state. The state is always applied, so
it doesn't need any dirty flag.

Marek

>
> >
> > On 8/21/18 7:50 AM, Marek Olšák wrote:
> >> From: Marek Olšák <marek.olsak at amd.com>
> >>
> >> ---
> >>   src/gallium/drivers/radeonsi/si_compute.c | 8 ++++++++
> >>   src/gallium/drivers/radeonsi/si_pipe.h    | 1 +
> >>   2 files changed, 9 insertions(+)
> >>
> >> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> >> b/src/gallium/drivers/radeonsi/si_compute.c
> >> index c5d3d5fcf02..e0c6902fec4 100644
> >> --- a/src/gallium/drivers/radeonsi/si_compute.c
> >> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> >> @@ -777,20 +777,28 @@ static void si_emit_dispatch_packets(struct
> >> si_context *sctx,
> >>       if (sctx->chip_class >= CIK) {
> >>           unsigned num_cu_per_se = sscreen->info.num_good_compute_units /
> >>                        sscreen->info.max_se;
> >>           /* Force even distribution on all SIMDs in CU if the workgroup
> >>            * size is 64. This has shown some good improvements if # of
> >> CUs
> >>            * per SE is not a multiple of 4.
> >>            */
> >>           if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
> >>               compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
> >> +
> >> +        compute_resource_limits |=
> >> S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh);
> >> +    } else {
> >> +        /* SI */
> >> +        if (sctx->cs_max_waves_per_sh) {
> >> +            unsigned limit_div16 =
> >> DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16);
> >> +            compute_resource_limits |=
> >> S_00B854_WAVES_PER_SH_SI(limit_div16);
> >> +        }
> >>       }
> >>       radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
> >>                 compute_resource_limits);
> >>       radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
> >>       radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
> >>       radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
> >>       radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
> >> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> >> b/src/gallium/drivers/radeonsi/si_pipe.h
> >> index 100d0166f62..fe06064b388 100644
> >> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> >> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> >> @@ -818,20 +818,21 @@ struct si_context {
> >>       struct si_shader_ctx_state    ps_shader;
> >>       struct si_shader_ctx_state    gs_shader;
> >>       struct si_shader_ctx_state    vs_shader;
> >>       struct si_shader_ctx_state    tcs_shader;
> >>       struct si_shader_ctx_state    tes_shader;
> >>       struct si_cs_shader_state    cs_shader_state;
> >>       /* shader information */
> >>       struct si_vertex_elements    *vertex_elements;
> >>       unsigned            sprite_coord_enable;
> >> +    unsigned            cs_max_waves_per_sh;
> >>       bool                flatshade;
> >>       bool                do_update_shaders;
> >>       /* vertex buffer descriptors */
> >>       uint32_t *vb_descriptors_gpu_list;
> >>       struct r600_resource *vb_descriptors_buffer;
> >>       unsigned vb_descriptors_offset;
> >>       /* shader descriptors */
> >>       struct si_descriptors        descriptors[SI_NUM_DESCS];
> >>


More information about the mesa-dev mailing list