[PATCH 01/28] drm/amdgpu/sdma: consolidate engine reset handling
Rodrigo Siqueira
siqueira at igalia.com
Sun Jul 6 14:34:16 UTC 2025
On 07/01, Alex Deucher wrote:
> Move the force completion handling into the common
> engine reset function. No need to duplicate it for
> every IP version.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 5 ++++-
> drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 17 +----------------
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 6 ++----
> drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 ++----
> 4 files changed, 9 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 89a849640ab91..91e8f45fe886e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -573,9 +573,12 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
> * to be submitted to the queues after the reset is complete.
> */
> if (!ret) {
> + amdgpu_fence_driver_force_completion(gfx_ring);
> drm_sched_wqueue_start(&gfx_ring->sched);
> - if (adev->sdma.has_page_queue)
> + if (adev->sdma.has_page_queue) {
> + amdgpu_fence_driver_force_completion(page_ring);
> drm_sched_wqueue_start(&page_ring->sched);
> + }
> }
> mutex_unlock(&sdma_instance->engine_reset_mutex);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index d3072bca43e3f..572d105420ec3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -1714,7 +1714,7 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
> static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> - u32 inst_mask, tmp_mask;
> + u32 inst_mask;
> int i, r;
>
> inst_mask = 1 << ring->me;
> @@ -1733,21 +1733,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
> }
>
> r = sdma_v4_4_2_inst_start(adev, inst_mask, true);
Now that you have deleted the below code, I think you can remove the
variable 'r' and use 'return sdma_v4_4_2_inst_start'.
> - if (r)
> - return r;
> -
> - tmp_mask = inst_mask;
> - for_each_inst(i, tmp_mask) {
> - ring = &adev->sdma.instance[i].ring;
> -
> - amdgpu_fence_driver_force_completion(ring);
> -
> - if (adev->sdma.has_page_queue) {
> - struct amdgpu_ring *page = &adev->sdma.instance[i].page;
> -
> - amdgpu_fence_driver_force_completion(page);
I guess I'm missing something, but this part is slightly different from
amdgpu_sdma_reset_engine since here
amdgpu_fence_driver_force_completion() can be called twice in some
cases.
Thanks
> - }
> - }
>
> return r;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 4d72b085b3dd7..ed1706da7deec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1618,10 +1618,8 @@ static int sdma_v6_0_restore_queue(struct amdgpu_ring *ring)
>
> r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
> amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
> - if (r)
> - return r;
> - amdgpu_fence_driver_force_completion(ring);
> - return 0;
> +
> + return r;
> }
>
> static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 42a25150f83ac..b87a4b44fa939 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1534,10 +1534,8 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
> r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
>
> amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
> - if (r)
> - return r;
> - amdgpu_fence_driver_force_completion(ring);
> - return 0;
> +
> + return r;
> }
>
> static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
> --
> 2.50.0
>
--
Rodrigo Siqueira
More information about the amd-gfx
mailing list