[PATCH 01/33] drm/amdgpu: clean up sdma reset functions

Lazar, Lijo lijo.lazar at amd.com
Mon Jul 14 14:00:32 UTC 2025


Since the series has supported_reset across different ip blocks, isn't
it better to move this to amdgpu_ip_block? Or, if this needs to be
specific to be different type of rings within an IP block, keep a
supported_reset flag per ring to do something like -

amdgpu_ring_is_reset_supported(ring, reset_type) and call
amdgpu_ring_reset()?

Thanks,
Lijo


On 7/12/2025 4:09 AM, Alex Deucher wrote:
> Make them consistent and drop unneeded extra variables.
> 
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++++---
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 17 +++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 20 ++++++++------------
>  drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 20 ++++++++------------
>  4 files changed, 40 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 5a1098bdd8256..999705e7b2641 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1428,7 +1428,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
>  	case IP_VERSION(5, 0, 0):
>  	case IP_VERSION(5, 0, 2):
>  	case IP_VERSION(5, 0, 5):
> -		if (adev->sdma.instance[0].fw_version >= 35)
> +		if ((adev->sdma.instance[0].fw_version >= 35) &&
> +		    !amdgpu_sriov_vf(adev))
>  			adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
>  		break;
>  	default:
> @@ -1544,11 +1545,18 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
>  				 struct amdgpu_fence *timedout_fence)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> -	u32 inst_id = ring->me;
>  	int r;
>  
> +	if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> +		return -EOPNOTSUPP;
> +
> +	if (ring->me >= adev->sdma.num_instances) {
> +		dev_err(adev->dev, "sdma instance not found\n");
> +		return -EINVAL;
> +	}
> +
>  	amdgpu_amdkfd_suspend(adev, true);
> -	r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> +	r = amdgpu_sdma_reset_engine(adev, ring->me, false);
>  	amdgpu_amdkfd_resume(adev, true);
>  
>  	return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 6843c2c3d71f5..e542195972dd4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1347,11 +1347,13 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
>  	case IP_VERSION(5, 2, 2):
>  	case IP_VERSION(5, 2, 3):
>  	case IP_VERSION(5, 2, 4):
> -		if (adev->sdma.instance[0].fw_version >= 76)
> +		if ((adev->sdma.instance[0].fw_version >= 76) &&
> +		    !amdgpu_sriov_vf(adev))
>  			adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
>  		break;
>  	case IP_VERSION(5, 2, 5):
> -		if (adev->sdma.instance[0].fw_version >= 34)
> +		if ((adev->sdma.instance[0].fw_version >= 34) &&
> +		    !amdgpu_sriov_vf(adev))
>  			adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
>  		break;
>  	default:
> @@ -1457,11 +1459,18 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
>  				 struct amdgpu_fence *timedout_fence)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> -	u32 inst_id = ring->me;
>  	int r;
>  
> +	if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> +		return -EOPNOTSUPP;
> +
> +	if (ring->me >= adev->sdma.num_instances) {
> +		dev_err(adev->dev, "sdma instance not found\n");
> +		return -EINVAL;
> +	}
> +
>  	amdgpu_amdkfd_suspend(adev, true);
> -	r = amdgpu_sdma_reset_engine(adev, inst_id, false);
> +	r = amdgpu_sdma_reset_engine(adev, ring->me, false);
>  	amdgpu_amdkfd_resume(adev, true);
>  
>  	return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index d2effa5318176..c08e9a6cf6827 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -1355,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
>  	case IP_VERSION(6, 0, 0):
>  	case IP_VERSION(6, 0, 2):
>  	case IP_VERSION(6, 0, 3):
> -		if (adev->sdma.instance[0].fw_version >= 21)
> +		if ((adev->sdma.instance[0].fw_version >= 21) &&
> +		    !amdgpu_sriov_vf(adev))
>  			adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
>  		break;
>  	default:
> @@ -1575,18 +1576,13 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
>  				 struct amdgpu_fence *timedout_fence)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> -	int i, r;
> -
> -	if (amdgpu_sriov_vf(adev))
> -		return -EINVAL;
> +	int r;
>  
> -	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		if (ring == &adev->sdma.instance[i].ring)
> -			break;
> -	}
> +	if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> +		return -EOPNOTSUPP;
>  
> -	if (i == adev->sdma.num_instances) {
> -		DRM_ERROR("sdma instance not found\n");
> +	if (ring->me >= adev->sdma.num_instances) {
> +		dev_err(adev->dev, "sdma instance not found\n");
>  		return -EINVAL;
>  	}
>  
> @@ -1596,7 +1592,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
>  	if (r)
>  		return r;
>  
> -	r = sdma_v6_0_gfx_resume_instance(adev, i, true);
> +	r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true);
>  	if (r)
>  		return r;
>  	amdgpu_fence_driver_force_completion(ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index 99a080bad2a3d..ba1f3e3b6eb61 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -807,18 +807,13 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
>  				 struct amdgpu_fence *timedout_fence)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> -	int i, r;
> -
> -	if (amdgpu_sriov_vf(adev))
> -		return -EINVAL;
> +	int r;
>  
> -	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		if (ring == &adev->sdma.instance[i].ring)
> -			break;
> -	}
> +	if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
> +		return -EOPNOTSUPP;
>  
> -	if (i == adev->sdma.num_instances) {
> -		DRM_ERROR("sdma instance not found\n");
> +	if (ring->me >= adev->sdma.num_instances) {
> +		dev_err(adev->dev, "sdma instance not found\n");
>  		return -EINVAL;
>  	}
>  
> @@ -828,7 +823,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
>  	if (r)
>  		return r;
>  
> -	r = sdma_v7_0_gfx_resume_instance(adev, i, true);
> +	r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true);
>  	if (r)
>  		return r;
>  	amdgpu_fence_driver_force_completion(ring);
> @@ -1346,7 +1341,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
>  
>  	adev->sdma.supported_reset =
>  		amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
> -	adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
> +	if (!amdgpu_sriov_vf(adev))
> +		adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
>  
>  	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
>  	if (r)



More information about the amd-gfx mailing list