[PATCH] drm/amdgpu: set completion status as preempted for the resubmission

Alex Deucher alexdeucher at gmail.com
Wed Jul 26 13:03:35 UTC 2023


On Wed, Jul 26, 2023 at 4:24 AM <jiadong.zhu at amd.com> wrote:
>
> From: Jiadong Zhu <Jiadong.Zhu at amd.com>
>
> The driver's CSA buffer is shared by all the ibs. When the high priority ib
> is submitted after the preempted ib, CP overrides the ib_completion_status
> as completed in the csa buffer. After that the preempted ib is resubmitted,
> CP would clear some locals stored for ib resume when reading the completed
> status, which causes gpu hang in some cases.
>
> Always set status as preempted for those resubmitted ib instead of reading
> everything from the CSA buffer.
>
> Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>

Acked-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h | 9 +++++++++
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c        | 4 +++-
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> index b22d4fb2a847..d3186b570b82 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -56,6 +56,15 @@ enum amdgpu_ring_mux_offset_type {
>         AMDGPU_MUX_OFFSET_TYPE_CE,
>  };
>
> +enum ib_complete_status {
> +       /* IB not started/reset value, default value. */
> +       IB_COMPLETION_STATUS_DEFAULT = 0,
> +       /* IB preempted, started but not completed. */
> +       IB_COMPLETION_STATUS_PREEMPTED = 1,
> +       /* IB completed. */
> +       IB_COMPLETION_STATUS_COMPLETED = 2,
> +};
> +
>  struct amdgpu_ring_mux {
>         struct amdgpu_ring      *real_ring;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index fc179e5f8dc1..272f206042bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5169,7 +5169,6 @@ static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
>                                      unsigned offset)
>  {
>         u32 control = ring->ring[offset];
> -
>         control |= INDIRECT_BUFFER_PRE_RESUME(1);
>         ring->ring[offset] = control;
>  }
> @@ -5226,6 +5225,9 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
>                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
>         }
>
> +       ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
> +               IB_COMPLETION_STATUS_PREEMPTED;
> +
>         if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
>                 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
>         } else {
> --
> 2.25.1
>


More information about the amd-gfx mailing list