[PATCH] drm/amdgpu: revise retry init to fully cleanup driver

Christian König ckoenig.leichtzumerken at gmail.com
Wed Nov 8 09:42:54 UTC 2017


Am 08.11.2017 um 04:29 schrieb Pixel Ding:
> Retry at drm_dev_register instead of amdgpu_device_init.
>
> Signed-off-by: Pixel Ding <Pixel.Ding at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 15 ++-------------
>   3 files changed, 13 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bf2b008..4ef2b1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2390,6 +2390,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   		    amdgpu_virt_mmio_blocked(adev) &&
>   		    !amdgpu_virt_wait_reset(adev)) {
>   			dev_err(adev->dev, "VF exclusive mode timeout\n");
> +			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +			adev->virt.ops = NULL;

Why is that necessary? Maybe put this into some SRIOV specific fini 
function?

Apart from that patch looks good to me and is Acked-by: Christian König 
<christian.koenig at amd.com>.

Regards,
Christian.

>   			r = -EAGAIN;
>   			goto failed;
>   		}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6b11a75..eaccd4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -565,12 +565,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
>   	return 0;
>   }
>   
> +
>   static int amdgpu_pci_probe(struct pci_dev *pdev,
>   			    const struct pci_device_id *ent)
>   {
>   	struct drm_device *dev;
>   	unsigned long flags = ent->driver_data;
> -	int ret;
> +	int ret, retry = 0;
>   
>   	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
>   		DRM_INFO("This hardware requires experimental hardware support.\n"
> @@ -603,8 +604,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>   
>   	pci_set_drvdata(pdev, dev);
>   
> +retry_init:
>   	ret = drm_dev_register(dev, ent->driver_data);
> -	if (ret)
> +	if (ret == -EAGAIN && ++retry <= 3) {
> +		DRM_INFO("retry init %d\n", retry);
> +		/* Don't request EX mode too frequently which is attacking */
> +		msleep(5000);
> +		goto retry_init;
> +	} else if (ret)
>   		goto err_pci;
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 1d56b5b..65360cd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -84,7 +84,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>   int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>   {
>   	struct amdgpu_device *adev;
> -	int r, acpi_status, retry = 0;
> +	int r, acpi_status;
>   
>   #ifdef CONFIG_DRM_AMDGPU_SI
>   	if (!amdgpu_si_support) {
> @@ -120,7 +120,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>   		}
>   	}
>   #endif
> -retry_init:
>   
>   	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
>   	if (adev == NULL) {
> @@ -143,17 +142,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>   	 * VRAM allocation
>   	 */
>   	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
> -	if (r == -EAGAIN && ++retry <= 3) {
> -		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> -		adev->virt.ops = NULL;
> -		amdgpu_device_fini(adev);
> -		kfree(adev);
> -		dev->dev_private = NULL;
> -		/* Don't request EX mode too frequently which is attacking */
> -		msleep(5000);
> -		dev_err(&dev->pdev->dev, "retry init %d\n", retry);
> -		goto retry_init;
> -	} else if (r) {
> +	if (r) {
>   		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
>   		goto out;
>   	}




More information about the amd-gfx mailing list