[PATCH 01/33] drm/amdgpu: clean up sdma reset functions
Lazar, Lijo
lijo.lazar at amd.com
Mon Jul 14 14:00:32 UTC 2025
Since the series has supported_reset across different ip blocks, isn't
it better to move this to amdgpu_ip_block? Or, if this needs to be
specific to be different type of rings within an IP block, keep a
supported_reset flag per ring to do something like -
amdgpu_ring_is_reset_supported(ring, reset_type) and call
amdgpu_ring_reset()?
Thanks,
Lijo
On 7/12/2025 4:09 AM, Alex Deucher wrote:
> Make them consistent and drop unneeded extra variables.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++++---
> drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 17 +++++++++++++----
> drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 20 ++++++++------------
> drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 20 ++++++++------------
> 4 files changed, 40 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 5a1098bdd8256..999705e7b2641 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1428,7 +1428,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
> case IP_VERSION(5, 0, 0):
> case IP_VERSION(5, 0, 2):
> case IP_VERSION(5, 0, 5):
> - if (adev->sdma.instance[0].fw_version >= 35)
> + if ((adev->sdma.instance[0].fw_version >= 35) &&
> + !amdgpu_sriov_vf(adev))
> adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> default:
> @@ -1544,11 +1545,18 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
> struct amdgpu_fence *timedout_fence)
> {
> struct amdgpu_device *adev = ring->adev;
> - u32 inst_id = ring->me;
> int r;
>
> + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> + return -EOPNOTSUPP;
> +
> + if (ring->me >= adev->sdma.num_instances) {
> + dev_err(adev->dev, "sdma instance not found\n");
> + return -EINVAL;
> + }
> +
> amdgpu_amdkfd_suspend(adev, true);
> - r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> + r = amdgpu_sdma_reset_engine(adev, ring->me, false);
> amdgpu_amdkfd_resume(adev, true);
>
> return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 6843c2c3d71f5..e542195972dd4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1347,11 +1347,13 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
> case IP_VERSION(5, 2, 2):
> case IP_VERSION(5, 2, 3):
> case IP_VERSION(5, 2, 4):
> - if (adev->sdma.instance[0].fw_version >= 76)
> + if ((adev->sdma.instance[0].fw_version >= 76) &&
> + !amdgpu_sriov_vf(adev))
> adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> case IP_VERSION(5, 2, 5):
> - if (adev->sdma.instance[0].fw_version >= 34)
> + if ((adev->sdma.instance[0].fw_version >= 34) &&
> + !amdgpu_sriov_vf(adev))
> adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> default:
> @@ -1457,11 +1459,18 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
> struct amdgpu_fence *timedout_fence)
> {
> struct amdgpu_device *adev = ring->adev;
> - u32 inst_id = ring->me;
> int r;
>
> + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> + return -EOPNOTSUPP;
> +
> + if (ring->me >= adev->sdma.num_instances) {
> + dev_err(adev->dev, "sdma instance not found\n");
> + return -EINVAL;
> + }
> +
> amdgpu_amdkfd_suspend(adev, true);
> - r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> + r = amdgpu_sdma_reset_engine(adev, ring->me, false);
> amdgpu_amdkfd_resume(adev, true);
>
> return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index d2effa5318176..c08e9a6cf6827 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -1355,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
> case IP_VERSION(6, 0, 0):
> case IP_VERSION(6, 0, 2):
> case IP_VERSION(6, 0, 3):
> - if (adev->sdma.instance[0].fw_version >= 21)
> + if ((adev->sdma.instance[0].fw_version >= 21) &&
> + !amdgpu_sriov_vf(adev))
> adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> default:
> @@ -1575,18 +1576,13 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
> struct amdgpu_fence *timedout_fence)
> {
> struct amdgpu_device *adev = ring->adev;
> - int i, r;
> -
> - if (amdgpu_sriov_vf(adev))
> - return -EINVAL;
> + int r;
>
> - for (i = 0; i < adev->sdma.num_instances; i++) {
> - if (ring == &adev->sdma.instance[i].ring)
> - break;
> - }
> + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> + return -EOPNOTSUPP;
>
> - if (i == adev->sdma.num_instances) {
> - DRM_ERROR("sdma instance not found\n");
> + if (ring->me >= adev->sdma.num_instances) {
> + dev_err(adev->dev, "sdma instance not found\n");
> return -EINVAL;
> }
>
> @@ -1596,7 +1592,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
> if (r)
> return r;
>
> - r = sdma_v6_0_gfx_resume_instance(adev, i, true);
> + r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true);
> if (r)
> return r;
> amdgpu_fence_driver_force_completion(ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index 99a080bad2a3d..ba1f3e3b6eb61 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -807,18 +807,13 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
> struct amdgpu_fence *timedout_fence)
> {
> struct amdgpu_device *adev = ring->adev;
> - int i, r;
> -
> - if (amdgpu_sriov_vf(adev))
> - return -EINVAL;
> + int r;
>
> - for (i = 0; i < adev->sdma.num_instances; i++) {
> - if (ring == &adev->sdma.instance[i].ring)
> - break;
> - }
> + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> + return -EOPNOTSUPP;
>
> - if (i == adev->sdma.num_instances) {
> - DRM_ERROR("sdma instance not found\n");
> + if (ring->me >= adev->sdma.num_instances) {
> + dev_err(adev->dev, "sdma instance not found\n");
> return -EINVAL;
> }
>
> @@ -828,7 +823,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
> if (r)
> return r;
>
> - r = sdma_v7_0_gfx_resume_instance(adev, i, true);
> + r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true);
> if (r)
> return r;
> amdgpu_fence_driver_force_completion(ring);
> @@ -1346,7 +1341,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
>
> adev->sdma.supported_reset =
> amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
> - adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> + if (!amdgpu_sriov_vf(adev))
> + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
>
> r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> if (r)
More information about the amd-gfx
mailing list