[PATCH 2/2] drm/amdkfd: Add PCIe Hotplug Support for AMDKFD

Fri Apr 8 15:28:07 UTC 2022

On 2022-04-08 04:45, Shuotao Xu wrote:
> Adding PCIe Hotplug Support for AMDKFD: the support of hot-plug of GPU
> devices can open doors for many advanced applications in data center
> in the next few years, such as for GPU resource
> disaggregation. Current AMDKFD does not support hotplug out b/o the
> following reasons:
> 
> 1. During PCIe removal, decrement KFD lock which was incremented at
>     the beginning of hw fini; otherwise kfd_open later is going to
>     fail.

I assumed you read my comment last time, still you do same approach.
More in details bellow

> 
> 2. Remove redudant p2p/io links in sysfs when device is hotplugged
>     out.
> 
> 3. New kfd node_id is not properly assigned after a new device is
>     added after a gpu is hotplugged out in a system. libhsakmt will
>     find this anomaly, (i.e. node_from != <dev node id> in iolinks),
>     when taking a topology_snapshot, thus returns fault to the rocm
>     stack.
> 
> -- This patch fixes issue 1; another patch by Mukul fixes issues 2&3.
> -- Tested on a 4-GPU MI100 gpu nodes with kernel 5.13.0-kfd; kernel
>     5.16.0-kfd is unstable out of box for MI100.
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  5 +++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  7 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 13 +++++++++++++
>   4 files changed, 26 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index c18c4be1e4ac..d50011bdb5c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -213,6 +213,11 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
>   	return r;
>   }
>   
> +int amdgpu_amdkfd_resume_processes(void)
> +{
> +	return kgd2kfd_resume_processes();
> +}
> +
>   int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
>   {
>   	int r = 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index f8b9f27adcf5..803306e011c3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -140,6 +140,7 @@ void amdgpu_amdkfd_fini(void);
>   void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
>   int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev);
>   int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
> +int amdgpu_amdkfd_resume_processes(void);
>   void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>   			const void *ih_ring_entry);
>   void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
> @@ -347,6 +348,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd);
>   void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
>   int kgd2kfd_resume_iommu(struct kfd_dev *kfd);
>   int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
> +int kgd2kfd_resume_processes(void);
>   int kgd2kfd_pre_reset(struct kfd_dev *kfd);
>   int kgd2kfd_post_reset(struct kfd_dev *kfd);
>   void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
> @@ -393,6 +395,11 @@ static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>   	return 0;
>   }
>   
> +static inline int kgd2kfd_resume_processes(void)
> +{
> +	return 0;
> +}
> +
>   static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
>   {
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index fa4a9f13c922..5827b65b7489 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4004,6 +4004,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
>   	if (drm_dev_is_unplugged(adev_to_drm(adev)))
>   		amdgpu_device_unmap_mmio(adev);
>   
> +	amdgpu_amdkfd_resume_processes();
>   }
>   
>   void amdgpu_device_fini_sw(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 62aa6c9d5123..ef05aae9255e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -714,6 +714,19 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
>   	return ret;
>   }
>   
> +/* for non-runtime resume only */
> +int kgd2kfd_resume_processes(void)
> +{
> +	int count;
> +
> +	count = atomic_dec_return(&kfd_locked);
> +	WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
> +	if (count == 0)
> +		return kfd_resume_all_processes();
> +
> +	return 0;
> +}


It doesn't make sense to me to just increment kfd_locked in
kgd2kfd_suspend to only decrement it again a few functions down the
road.

I suggest this instead - you only incrmemnt if not during PCI remove

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 1c2cf3a33c1f..7754f77248a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -952,11 +952,12 @@ bool kfd_is_locked(void)

  void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
  {
+
         if (!kfd->init_complete)
                 return;

         /* for runtime suspend, skip locking kfd */
-       if (!run_pm) {
+       if (!run_pm && !drm_dev_is_unplugged(kfd->ddev)) {
                 /* For first KFD device suspend all the KFD processes */
                 if (atomic_inc_return(&kfd_locked) == 1)
                         kfd_suspend_all_processes();


Andrey



> +
>   int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
>   {
>   	int err = 0;