[PATCH 01/33] drm/amdgpu: clean up sdma reset functions
Alex Deucher
alexdeucher at gmail.com
Tue Jul 15 13:33:10 UTC 2025
On Mon, Jul 14, 2025 at 10:00 AM Lazar, Lijo <lijo.lazar at amd.com> wrote:
>
> Since the series has supported_reset across different ip blocks, isn't
> it better to move this to amdgpu_ip_block? Or, if this needs to be
> specific to be different type of rings within an IP block, keep a
> supported_reset flag per ring to do something like -
>
> amdgpu_ring_is_reset_supported(ring, reset_type) and call
> amdgpu_ring_reset()?
Yeah, I was thinking about that as a further cleanup once this lands.
Alex
>
> Thanks,
> Lijo
>
>
> On 7/12/2025 4:09 AM, Alex Deucher wrote:
> > Make them consistent and drop unneeded extra variables.
> >
> > Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++++---
> > drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 17 +++++++++++++----
> > drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 20 ++++++++------------
> > drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 20 ++++++++------------
> > 4 files changed, 40 insertions(+), 31 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index 5a1098bdd8256..999705e7b2641 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -1428,7 +1428,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
> > case IP_VERSION(5, 0, 0):
> > case IP_VERSION(5, 0, 2):
> > case IP_VERSION(5, 0, 5):
> > - if (adev->sdma.instance[0].fw_version >= 35)
> > + if ((adev->sdma.instance[0].fw_version >= 35) &&
> > + !amdgpu_sriov_vf(adev))
> > adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> > break;
> > default:
> > @@ -1544,11 +1545,18 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
> > struct amdgpu_fence *timedout_fence)
> > {
> > struct amdgpu_device *adev = ring->adev;
> > - u32 inst_id = ring->me;
> > int r;
> >
> > + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > + return -EOPNOTSUPP;
> > +
> > + if (ring->me >= adev->sdma.num_instances) {
> > + dev_err(adev->dev, "sdma instance not found\n");
> > + return -EINVAL;
> > + }
> > +
> > amdgpu_amdkfd_suspend(adev, true);
> > - r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> > + r = amdgpu_sdma_reset_engine(adev, ring->me, false);
> > amdgpu_amdkfd_resume(adev, true);
> >
> > return r;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > index 6843c2c3d71f5..e542195972dd4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > @@ -1347,11 +1347,13 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
> > case IP_VERSION(5, 2, 2):
> > case IP_VERSION(5, 2, 3):
> > case IP_VERSION(5, 2, 4):
> > - if (adev->sdma.instance[0].fw_version >= 76)
> > + if ((adev->sdma.instance[0].fw_version >= 76) &&
> > + !amdgpu_sriov_vf(adev))
> > adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> > break;
> > case IP_VERSION(5, 2, 5):
> > - if (adev->sdma.instance[0].fw_version >= 34)
> > + if ((adev->sdma.instance[0].fw_version >= 34) &&
> > + !amdgpu_sriov_vf(adev))
> > adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> > break;
> > default:
> > @@ -1457,11 +1459,18 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
> > struct amdgpu_fence *timedout_fence)
> > {
> > struct amdgpu_device *adev = ring->adev;
> > - u32 inst_id = ring->me;
> > int r;
> >
> > + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > + return -EOPNOTSUPP;
> > +
> > + if (ring->me >= adev->sdma.num_instances) {
> > + dev_err(adev->dev, "sdma instance not found\n");
> > + return -EINVAL;
> > + }
> > +
> > amdgpu_amdkfd_suspend(adev, true);
> > - r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> > + r = amdgpu_sdma_reset_engine(adev, ring->me, false);
> > amdgpu_amdkfd_resume(adev, true);
> >
> > return r;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> > index d2effa5318176..c08e9a6cf6827 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> > @@ -1355,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
> > case IP_VERSION(6, 0, 0):
> > case IP_VERSION(6, 0, 2):
> > case IP_VERSION(6, 0, 3):
> > - if (adev->sdma.instance[0].fw_version >= 21)
> > + if ((adev->sdma.instance[0].fw_version >= 21) &&
> > + !amdgpu_sriov_vf(adev))
> > adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> > break;
> > default:
> > @@ -1575,18 +1576,13 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
> > struct amdgpu_fence *timedout_fence)
> > {
> > struct amdgpu_device *adev = ring->adev;
> > - int i, r;
> > -
> > - if (amdgpu_sriov_vf(adev))
> > - return -EINVAL;
> > + int r;
> >
> > - for (i = 0; i < adev->sdma.num_instances; i++) {
> > - if (ring == &adev->sdma.instance[i].ring)
> > - break;
> > - }
> > + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > + return -EOPNOTSUPP;
> >
> > - if (i == adev->sdma.num_instances) {
> > - DRM_ERROR("sdma instance not found\n");
> > + if (ring->me >= adev->sdma.num_instances) {
> > + dev_err(adev->dev, "sdma instance not found\n");
> > return -EINVAL;
> > }
> >
> > @@ -1596,7 +1592,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
> > if (r)
> > return r;
> >
> > - r = sdma_v6_0_gfx_resume_instance(adev, i, true);
> > + r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true);
> > if (r)
> > return r;
> > amdgpu_fence_driver_force_completion(ring);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> > index 99a080bad2a3d..ba1f3e3b6eb61 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> > @@ -807,18 +807,13 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
> > struct amdgpu_fence *timedout_fence)
> > {
> > struct amdgpu_device *adev = ring->adev;
> > - int i, r;
> > -
> > - if (amdgpu_sriov_vf(adev))
> > - return -EINVAL;
> > + int r;
> >
> > - for (i = 0; i < adev->sdma.num_instances; i++) {
> > - if (ring == &adev->sdma.instance[i].ring)
> > - break;
> > - }
> > + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > + return -EOPNOTSUPP;
> >
> > - if (i == adev->sdma.num_instances) {
> > - DRM_ERROR("sdma instance not found\n");
> > + if (ring->me >= adev->sdma.num_instances) {
> > + dev_err(adev->dev, "sdma instance not found\n");
> > return -EINVAL;
> > }
> >
> > @@ -828,7 +823,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
> > if (r)
> > return r;
> >
> > - r = sdma_v7_0_gfx_resume_instance(adev, i, true);
> > + r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true);
> > if (r)
> > return r;
> > amdgpu_fence_driver_force_completion(ring);
> > @@ -1346,7 +1341,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
> >
> > adev->sdma.supported_reset =
> > amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
> > - adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> > + if (!amdgpu_sriov_vf(adev))
> > + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> >
> > r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> > if (r)
>
More information about the amd-gfx
mailing list