[PATCH v2] drm/amdgpu: move the drm client creation behind drm device registration

Felix Kuehling felix.kuehling at amd.com
Thu Jan 25 18:54:40 UTC 2024


On 2024-01-25 08:15, Lazar, Lijo wrote:
> On 1/25/2024 1:37 PM, Le Ma wrote:
>> This patch is to eliminate interrupt warning below:
>>
>>    "[drm] Fence fallback timer expired on ring sdma0.0".
>>
>> An early vm pt clearing job is sent to SDMA ahead of interrupt enabled,
>> introduced by patch below:
>>
>>    - drm/amdkfd: Export DMABufs from KFD using GEM handles
>>
> The drm client creation logic in this patch won't work well for dynamic
> partition switch cases. Better add a ''Fixes' tag also.

I agree. It would also be problematic with GPU resets. amdgpu_pci_probe 
seems to be the right place for this to ensure it only gets called once. 
The fix looks reasonable to me.

Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>


>
> This looks fine to me, needs to be checked by Felix anyway.
>
> Thanks,
> Lijo
>
>> And re-locating the drm client creation following after drm_dev_register
>> looks like a more proper flow.
>>
>> v2: wrap the drm client creation
>>
>> Signed-off-by: Le Ma <le.ma at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++--------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
>>   3 files changed, 27 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index 77e263660288..41db030ddc4e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
>>   static const struct drm_client_funcs kfd_client_funcs = {
>>   	.unregister	= drm_client_release,
>>   };
>> +
>> +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
>> +{
>> +	int ret;
>> +
>> +	if (!adev->kfd.init_complete)
>> +		return 0;
>> +
>> +	ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
>> +			      &kfd_client_funcs);
>> +	if (ret) {
>> +		dev_err(adev->dev, "Failed to init DRM client: %d\n",
>> +			ret);
>> +		return ret;
>> +	}
>> +
>> +	drm_client_register(&adev->kfd.client);
>> +
>> +	return 0;
>> +}
>> +
>>   void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>>   {
>>   	int i;
>>   	int last_valid_bit;
>> -	int ret;
>>   
>>   	amdgpu_amdkfd_gpuvm_init_mem_limits();
>>   
>> @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>>   			.enable_mes = adev->enable_mes,
>>   		};
>>   
>> -		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
>> -		if (ret) {
>> -			dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
>> -			return;
>> -		}
>> -
>>   		/* this is going to have a few of the MSBs set that we need to
>>   		 * clear
>>   		 */
>> @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>>   
>>   		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
>>   							&gpu_resources);
>> -		if (adev->kfd.init_complete)
>> -			drm_client_register(&adev->kfd.client);
>> -		else
>> -			drm_client_release(&adev->kfd.client);
>>   
>>   		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 584a0cea5572..da175c384adf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
>>   struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
>>   				struct mm_struct *mm,
>>   				struct svm_range_bo *svm_bo);
>> +
>> +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
>>   #if defined(CONFIG_DEBUG_FS)
>>   int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 475bd59c9ac2..91d5d9435067 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>>   	if (ret)
>>   		goto err_pci;
>>   
>> +	ret = amdgpu_amdkfd_drm_client_create(adev);
>> +	if (ret)
>> +		goto err_pci;
>> +
>>   	/*
>>   	 * 1. don't init fbdev on hw without DCE
>>   	 * 2. don't init fbdev if there are no connectors


More information about the amd-gfx mailing list