[Mesa-dev] [PATCH] radv: implement SI/CIK compute shader regalloc hang workaround.

Wed Jul 26 07:27:28 UTC 2017

Per the comment, the correct fix for radv should be to limit workgroup
size to 256 for SI/BONAIRE/KABINI?

On Wed, Jul 26, 2017 at 4:56 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This ports the regalloc hang workaround from radeonsi, not 100%
> sure if this is only needed on the GFX queue as the workaround
> references async compute not requiring it.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 10 ++++++++++
>  src/amd/vulkan/radv_pipeline.c   |  8 ++++++++
>  src/amd/vulkan/radv_private.h    |  3 +++
>  3 files changed, 21 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index 4b08781..4415e36 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -2290,6 +2290,16 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
>         radeon_emit(cmd_buffer->cs,
>                     S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
>
> +       /* HW bug workaround when CS threadgroups > 256 threads and async
> +        * compute isn't used, i.e. only one compute job can run at a time.
> +        * If async compute is possible, the threadgroup size must be limited
> +        * to 256 threads on all queues to avoid the bug.
> +        * Only SI and certain CIK chips are affected.
> +        */
> +       if (pipeline->compute.regalloc_hang) {
> +               cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
> +                       RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
> +       }
>         assert(cmd_buffer->cs->cdw <= cdw_max);
>  }
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 496c06a..fcfe7dc 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -2376,6 +2376,14 @@ static VkResult radv_compute_pipeline_create(
>                 return result;
>         }
>
> +       if ((device->physical_device->rad_info.chip_class == SI ||
> +            device->physical_device->rad_info.family == CHIP_BONAIRE ||
> +            device->physical_device->rad_info.family == CHIP_KABINI) &&
> +           (pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
> +            pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
> +            pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2] > 256))
> +               pipeline->compute.regalloc_hang = true;
> +
>         *pPipeline = radv_pipeline_to_handle(pipeline);
>
>         if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 8cd5ec0..4eac84c 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1094,6 +1094,9 @@ struct radv_pipeline {
>                         struct radv_prim_vertex_count prim_vertex_count;
>                         bool can_use_guardband;
>                 } graphics;
> +               struct {
> +                       bool regalloc_hang;
> +               } compute;
>         };
>
>         unsigned max_waves;
> --
> 2.9.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev