[PATCH] drm/amdgpu: set completion status as preempted for the resubmission
Alex Deucher
alexdeucher at gmail.com
Wed Jul 26 13:03:35 UTC 2023
On Wed, Jul 26, 2023 at 4:24 AM <jiadong.zhu at amd.com> wrote:
>
> From: Jiadong Zhu <Jiadong.Zhu at amd.com>
>
> The driver's CSA buffer is shared by all the ibs. When the high priority ib
> is submitted after the preempted ib, CP overrides the ib_completion_status
> as completed in the csa buffer. After that the preempted ib is resubmitted,
> CP would clear some locals stored for ib resume when reading the completed
> status, which causes gpu hang in some cases.
>
> Always set status as preempted for those resubmitted ib instead of reading
> everything from the CSA buffer.
>
> Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>
Acked-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h | 9 +++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +++-
> 2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> index b22d4fb2a847..d3186b570b82 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
> @@ -56,6 +56,15 @@ enum amdgpu_ring_mux_offset_type {
> AMDGPU_MUX_OFFSET_TYPE_CE,
> };
>
> +enum ib_complete_status {
> + /* IB not started/reset value, default value. */
> + IB_COMPLETION_STATUS_DEFAULT = 0,
> + /* IB preempted, started but not completed. */
> + IB_COMPLETION_STATUS_PREEMPTED = 1,
> + /* IB completed. */
> + IB_COMPLETION_STATUS_COMPLETED = 2,
> +};
> +
> struct amdgpu_ring_mux {
> struct amdgpu_ring *real_ring;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index fc179e5f8dc1..272f206042bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5169,7 +5169,6 @@ static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
> unsigned offset)
> {
> u32 control = ring->ring[offset];
> -
> control |= INDIRECT_BUFFER_PRE_RESUME(1);
> ring->ring[offset] = control;
> }
> @@ -5226,6 +5225,9 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
> de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
> }
>
> + ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
> + IB_COMPLETION_STATUS_PREEMPTED;
> +
> if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
> memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
> } else {
> --
> 2.25.1
>
More information about the amd-gfx
mailing list