[PATCH 01/33] drm/amdgpu: clean up sdma reset functions

Alex Deucher alexdeucher at gmail.com
Tue Jul 15 13:33:10 UTC 2025


On Mon, Jul 14, 2025 at 10:00 AM Lazar, Lijo <lijo.lazar at amd.com> wrote:
>
> Since the series has supported_reset across different ip blocks, isn't
> it better to move this to amdgpu_ip_block? Or, if this needs to be
> specific to be different type of rings within an IP block, keep a
> supported_reset flag per ring to do something like -
>
> amdgpu_ring_is_reset_supported(ring, reset_type) and call
> amdgpu_ring_reset()?

Yeah, I was thinking about that as a further cleanup once this lands.

Alex

>
> Thanks,
> Lijo
>
>
> On 7/12/2025 4:09 AM, Alex Deucher wrote:
> > Make them consistent and drop unneeded extra variables.
> >
> > Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++++---
> >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 17 +++++++++++++----
> >  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 20 ++++++++------------
> >  drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 20 ++++++++------------
> >  4 files changed, 40 insertions(+), 31 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index 5a1098bdd8256..999705e7b2641 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -1428,7 +1428,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
> >       case IP_VERSION(5, 0, 0):
> >       case IP_VERSION(5, 0, 2):
> >       case IP_VERSION(5, 0, 5):
> > -             if (adev->sdma.instance[0].fw_version >= 35)
> > +             if ((adev->sdma.instance[0].fw_version >= 35) &&
> > +                 !amdgpu_sriov_vf(adev))
> >                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> >               break;
> >       default:
> > @@ -1544,11 +1545,18 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
> >                                struct amdgpu_fence *timedout_fence)
> >  {
> >       struct amdgpu_device *adev = ring->adev;
> > -     u32 inst_id = ring->me;
> >       int r;
> >
> > +     if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > +             return -EOPNOTSUPP;
> > +
> > +     if (ring->me >= adev->sdma.num_instances) {
> > +             dev_err(adev->dev, "sdma instance not found\n");
> > +             return -EINVAL;
> > +     }
> > +
> >       amdgpu_amdkfd_suspend(adev, true);
> > -     r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> > +     r = amdgpu_sdma_reset_engine(adev, ring->me, false);
> >       amdgpu_amdkfd_resume(adev, true);
> >
> >       return r;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > index 6843c2c3d71f5..e542195972dd4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > @@ -1347,11 +1347,13 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
> >       case IP_VERSION(5, 2, 2):
> >       case IP_VERSION(5, 2, 3):
> >       case IP_VERSION(5, 2, 4):
> > -             if (adev->sdma.instance[0].fw_version >= 76)
> > +             if ((adev->sdma.instance[0].fw_version >= 76) &&
> > +                 !amdgpu_sriov_vf(adev))
> >                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> >               break;
> >       case IP_VERSION(5, 2, 5):
> > -             if (adev->sdma.instance[0].fw_version >= 34)
> > +             if ((adev->sdma.instance[0].fw_version >= 34) &&
> > +                 !amdgpu_sriov_vf(adev))
> >                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> >               break;
> >       default:
> > @@ -1457,11 +1459,18 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
> >                                struct amdgpu_fence *timedout_fence)
> >  {
> >       struct amdgpu_device *adev = ring->adev;
> > -     u32 inst_id = ring->me;
> >       int r;
> >
> > +     if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > +             return -EOPNOTSUPP;
> > +
> > +     if (ring->me >= adev->sdma.num_instances) {
> > +             dev_err(adev->dev, "sdma instance not found\n");
> > +             return -EINVAL;
> > +     }
> > +
> >       amdgpu_amdkfd_suspend(adev, true);
> > -     r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> > +     r = amdgpu_sdma_reset_engine(adev, ring->me, false);
> >       amdgpu_amdkfd_resume(adev, true);
> >
> >       return r;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> > index d2effa5318176..c08e9a6cf6827 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> > @@ -1355,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
> >       case IP_VERSION(6, 0, 0):
> >       case IP_VERSION(6, 0, 2):
> >       case IP_VERSION(6, 0, 3):
> > -             if (adev->sdma.instance[0].fw_version >= 21)
> > +             if ((adev->sdma.instance[0].fw_version >= 21) &&
> > +                 !amdgpu_sriov_vf(adev))
> >                       adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> >               break;
> >       default:
> > @@ -1575,18 +1576,13 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
> >                                struct amdgpu_fence *timedout_fence)
> >  {
> >       struct amdgpu_device *adev = ring->adev;
> > -     int i, r;
> > -
> > -     if (amdgpu_sriov_vf(adev))
> > -             return -EINVAL;
> > +     int r;
> >
> > -     for (i = 0; i < adev->sdma.num_instances; i++) {
> > -             if (ring == &adev->sdma.instance[i].ring)
> > -                     break;
> > -     }
> > +     if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > +             return -EOPNOTSUPP;
> >
> > -     if (i == adev->sdma.num_instances) {
> > -             DRM_ERROR("sdma instance not found\n");
> > +     if (ring->me >= adev->sdma.num_instances) {
> > +             dev_err(adev->dev, "sdma instance not found\n");
> >               return -EINVAL;
> >       }
> >
> > @@ -1596,7 +1592,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
> >       if (r)
> >               return r;
> >
> > -     r = sdma_v6_0_gfx_resume_instance(adev, i, true);
> > +     r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true);
> >       if (r)
> >               return r;
> >       amdgpu_fence_driver_force_completion(ring);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> > index 99a080bad2a3d..ba1f3e3b6eb61 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> > @@ -807,18 +807,13 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
> >                                struct amdgpu_fence *timedout_fence)
> >  {
> >       struct amdgpu_device *adev = ring->adev;
> > -     int i, r;
> > -
> > -     if (amdgpu_sriov_vf(adev))
> > -             return -EINVAL;
> > +     int r;
> >
> > -     for (i = 0; i < adev->sdma.num_instances; i++) {
> > -             if (ring == &adev->sdma.instance[i].ring)
> > -                     break;
> > -     }
> > +     if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> > +             return -EOPNOTSUPP;
> >
> > -     if (i == adev->sdma.num_instances) {
> > -             DRM_ERROR("sdma instance not found\n");
> > +     if (ring->me >= adev->sdma.num_instances) {
> > +             dev_err(adev->dev, "sdma instance not found\n");
> >               return -EINVAL;
> >       }
> >
> > @@ -828,7 +823,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
> >       if (r)
> >               return r;
> >
> > -     r = sdma_v7_0_gfx_resume_instance(adev, i, true);
> > +     r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true);
> >       if (r)
> >               return r;
> >       amdgpu_fence_driver_force_completion(ring);
> > @@ -1346,7 +1341,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
> >
> >       adev->sdma.supported_reset =
> >               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
> > -     adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> > +     if (!amdgpu_sriov_vf(adev))
> > +             adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> >
> >       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> >       if (r)
>


More information about the amd-gfx mailing list