[PATCH 1/1] drm/amdgpu: move the drm client creation behind drm device registration

Lazar, Lijo lijo.lazar at amd.com
Thu Jan 25 07:04:00 UTC 2024



On 1/25/2024 11:48 AM, Le Ma wrote:
> This patch is to eliminate interrupt warning below:
> 
>   "[drm] Fence fallback timer expired on ring sdma0.0".
> 
> An early vm pt clearing job is sent to SDMA ahead of interrupt enabled,
> introduced by patch below:
> 
>   - drm/amdkfd: Export DMABufs from KFD using GEM handles
> 
> And re-locating the drm client creation following after drm_dev_register
> looks like a more proper flow.
> 
> Change-Id: I0fece177b78345187068f92a823d96b3b7581140
> Signed-off-by: Le Ma <le.ma at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 +------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++++++++++
>  3 files changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index add315644773..69eb0f5574d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -139,14 +139,13 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
>  	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
>  }
>  
> -static const struct drm_client_funcs kfd_client_funcs = {
> +const struct drm_client_funcs kfd_client_funcs = {
>  	.unregister	= drm_client_release,
>  };
>  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>  {
>  	int i;
>  	int last_valid_bit;
> -	int ret;
>  
>  	amdgpu_amdkfd_gpuvm_init_mem_limits();
>  
> @@ -165,12 +164,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>  			.enable_mes = adev->enable_mes,
>  		};
>  
> -		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
> -		if (ret) {
> -			dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
> -			return;
> -		}
> -
>  		/* this is going to have a few of the MSBs set that we need to
>  		 * clear
>  		 */
> @@ -209,10 +202,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>  
>  		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
>  							&gpu_resources);
> -		if (adev->kfd.init_complete)
> -			drm_client_register(&adev->kfd.client);
> -		else
> -			drm_client_release(&adev->kfd.client);
>  
>  		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 00eed8c10cd4..b2c6f2b3c0fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -40,6 +40,8 @@
>  
>  extern uint64_t amdgpu_amdkfd_total_mem_size;
>  
> +extern const struct drm_client_funcs kfd_client_funcs;
> +
>  enum TLB_FLUSH_TYPE {
>  	TLB_FLUSH_LEGACY = 0,
>  	TLB_FLUSH_LIGHTWEIGHT,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 0d0aa4b798ac..d0b98343481d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2293,6 +2293,17 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>  			drm_fbdev_generic_setup(adev_to_drm(adev), 32);
>  	}
>  
> +	if (adev->kfd.init_complete) {
> +		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
> +				      &kfd_client_funcs);
> +		if (ret) {
> +			dev_err(adev->dev, "Failed to init DRM client: %d\n",
> +				ret);
> +			goto err_pci;
> +		}
> +		drm_client_register(&adev->kfd.client);
> +	}
> +

Maybe better to wrap this in amdgpu_amdkfd_drm_client_init() or similar

Thanks,
Lijo

>  	ret = amdgpu_debugfs_init(adev);
>  	if (ret)
>  		DRM_ERROR("Creating debugfs files failed (%d).\n", ret);


More information about the amd-gfx mailing list