[PATCH] drm/amdgpu: fix sriov reinit late orders

Lazar, Lijo lijo.lazar at amd.com
Tue Nov 26 12:03:43 UTC 2024



On 11/26/2024 4:45 PM, Yiqing Yao wrote:
> Use found block to call correct init/resume function on the block.
> Set status.hw for resume and init.
> 
> Print re-init result again. Change to use dev_info.
> Use amdgpu_device_ip_get_ip_block to get target block instead of
> loop.
> 
> Fixes: 17eb6e7137a7 ("drm/amdgpu: validate resume before function call")
> Signed-off-by: Yiqing Yao <YiQing.Yao at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 +++++++++-------------
>  1 file changed, 15 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 79c573de1f2d..2e30539b589c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3669,7 +3669,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
>  				continue;
>  
>  			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
> -			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
> +			dev_info(adev->dev, "RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
>  			if (r)
>  				return r;
>  			block->status.hw = true;
> @@ -3681,7 +3681,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
>  
>  static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
>  {
> -	int i, r;
> +	struct amdgpu_ip_block *block;
> +	int i, r = 0;
>  
>  	static enum amd_ip_block_type ip_order[] = {
>  		AMD_IP_BLOCK_TYPE_SMC,
> @@ -3696,34 +3697,26 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
>  	};
>  
>  	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
> -		int j;
> -		struct amdgpu_ip_block *block;
> -
> -		for (j = 0; j < adev->num_ip_blocks; j++) {
> -			block = &adev->ip_blocks[j];
> +		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
>  
> -			if (block->version->type != ip_order[i] ||
> -				!block->status.valid ||
> -				block->status.hw)
> -				continue;
> +		if(!block)
> +			continue;
>  
> +		if (block->status.valid && !block->status.hw) {
>  			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
> -				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
> -				if (r)
> -					return r;
> +				r = amdgpu_ip_block_resume(block);
>  			} else {
> -				r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
> -				if (r) {
> -					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
> -						  adev->ip_blocks[i].version->funcs->name, r);
> -					return r;
> -				}
> -				block->status.hw = true;
> +				r = block->version->funcs->hw_init(block);
>  			}
> +			dev_info(adev->dev, "RE-INIT-late: %s %s\n", block->version->funcs->name,
> +				 r?"failed":"succeeded")

This will come after every reset; better to keep the print only for fail
case as before.

Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>

Thanks,
Lijo

> +			if (r)
> +				break;
> +			block->status.hw = true;
>  		}
>  	}
>  
> -	return 0;
> +	return r;
>  }
>  
>  /**



More information about the amd-gfx mailing list