[PATCH 1/1] drm/amdgpu: Fix handling of KFD initialization failures

Russell, Kent Kent.Russell at amd.com
Fri Sep 18 18:17:32 UTC 2020


[AMD Public Use]

Sorry, I read this yesterday and forgot to reply.

Reviewed-by: Kent Russell <kent.russell at amd.com>

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Felix Kuehling
> Sent: Thursday, September 17, 2020 12:16 AM
> To: amd-gfx at lists.freedesktop.org
> Subject: [PATCH 1/1] drm/amdgpu: Fix handling of KFD initialization failures
> 
> Remember KFD module initializaton status in a global variable. Skip KFD
> device probing when the module was not initialized. Other amdgpu_amdkfd
> calls are then protected by the adev->kfd.dev check.
> 
> Also print a clear error message when KFD disables itself. Amdgpu
> continues its intialization even when KFD failed.
> 
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 11 ++++++++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_module.c    |  1 +
>  3 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index d58148f455dd..7f14461f7f40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -36,6 +36,8 @@
>   */
>  uint64_t amdgpu_amdkfd_total_mem_size;
> 
> +bool kfd_initialized;
> +
>  int amdgpu_amdkfd_init(void)
>  {
>  	struct sysinfo si;
> @@ -51,19 +53,26 @@ int amdgpu_amdkfd_init(void)
>  #else
>  	ret = -ENOENT;
>  #endif
> +	kfd_initialized = !ret;
> 
>  	return ret;
>  }
> 
>  void amdgpu_amdkfd_fini(void)
>  {
> -	kgd2kfd_exit();
> +	if (kfd_initialized) {
> +		kgd2kfd_exit();
> +		kfd_initialized = false;
> +	}
>  }
> 
>  void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
>  {
>  	bool vf = amdgpu_sriov_vf(adev);
> 
> +	if (!kfd_initialized)
> +		return;
> +
>  	adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
>  				      adev->pdev, adev->asic_type, vf);
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b7b16adb0615..297484ca7d19 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -97,6 +97,7 @@ void kfd_chardev_exit(void)
>  	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
>  	class_destroy(kfd_class);
>  	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
> +	kfd_device = NULL;
>  }
> 
>  struct device *kfd_chardev(void)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> index f4b7f7e6c40e..e5aa51f44dd5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> @@ -70,6 +70,7 @@ static int kfd_init(void)
>  err_topology:
>  	kfd_chardev_exit();
>  err_ioctl:
> +	pr_err("KFD is disabled due to module intialization failure\n");
>  	return err;
>  }
> 
> --
> 2.17.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.or
> g%2Fmailman%2Flistinfo%2Famd-
> gfx&data=02%7C01%7Ckent.russell%40amd.com%7Cf9442a9688294ecc817808d85ac0
> 7cb4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637359130061923813&
> sdata=ysgKdz8%2BuZY93Um5fFAXR5hopzDg0MhFbasV29mJO%2F0%3D&reserved=0


More information about the amd-gfx mailing list