[PATCH] drm/amdgpu: revise retry init to fully cleanup driver
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Nov 8 09:42:54 UTC 2017
Am 08.11.2017 um 04:29 schrieb Pixel Ding:
> Retry at drm_dev_register instead of amdgpu_device_init.
>
> Signed-off-by: Pixel Ding <Pixel.Ding at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 15 ++-------------
> 3 files changed, 13 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bf2b008..4ef2b1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2390,6 +2390,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> amdgpu_virt_mmio_blocked(adev) &&
> !amdgpu_virt_wait_reset(adev)) {
> dev_err(adev->dev, "VF exclusive mode timeout\n");
> + adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> + adev->virt.ops = NULL;
Why is that necessary? Maybe put this into some SRIOV specific fini
function?
Apart from that patch looks good to me and is Acked-by: Christian König
<christian.koenig at amd.com>.
Regards,
Christian.
> r = -EAGAIN;
> goto failed;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6b11a75..eaccd4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -565,12 +565,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
> return 0;
> }
>
> +
> static int amdgpu_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
> {
> struct drm_device *dev;
> unsigned long flags = ent->driver_data;
> - int ret;
> + int ret, retry = 0;
>
> if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
> DRM_INFO("This hardware requires experimental hardware support.\n"
> @@ -603,8 +604,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>
> pci_set_drvdata(pdev, dev);
>
> +retry_init:
> ret = drm_dev_register(dev, ent->driver_data);
> - if (ret)
> + if (ret == -EAGAIN && ++retry <= 3) {
> + DRM_INFO("retry init %d\n", retry);
> + /* Don't request EX mode too frequently which is attacking */
> + msleep(5000);
> + goto retry_init;
> + } else if (ret)
> goto err_pci;
>
> return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 1d56b5b..65360cd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -84,7 +84,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
> int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
> {
> struct amdgpu_device *adev;
> - int r, acpi_status, retry = 0;
> + int r, acpi_status;
>
> #ifdef CONFIG_DRM_AMDGPU_SI
> if (!amdgpu_si_support) {
> @@ -120,7 +120,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
> }
> }
> #endif
> -retry_init:
>
> adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
> if (adev == NULL) {
> @@ -143,17 +142,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
> * VRAM allocation
> */
> r = amdgpu_device_init(adev, dev, dev->pdev, flags);
> - if (r == -EAGAIN && ++retry <= 3) {
> - adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> - adev->virt.ops = NULL;
> - amdgpu_device_fini(adev);
> - kfree(adev);
> - dev->dev_private = NULL;
> - /* Don't request EX mode too frequently which is attacking */
> - msleep(5000);
> - dev_err(&dev->pdev->dev, "retry init %d\n", retry);
> - goto retry_init;
> - } else if (r) {
> + if (r) {
> dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
> goto out;
> }
More information about the amd-gfx
mailing list