[PATCH] drm/amdgpu: Move SR-IOV check into amdgpu_gfx_sysfs_compute_init

Lazar, Lijo lijo.lazar at amd.com
Fri Jun 7 07:03:29 UTC 2024



On 6/7/2024 12:31 PM, SRINIVASAN SHANMUGAM wrote:
> 
> On 6/6/2024 10:58 PM, Lazar, Lijo wrote:
>> On 6/6/2024 5:35 PM, Srinivasan Shanmugam wrote:
>>> Previously, this check was performed in the gfx_v9_4_3_sw_init function,
>>> and the amdgpu_gfx_sysfs_compute_init function was only called if the
>>> GPU was not a VF in SR-IOV mode. This is because the sysfs entries
>>> created by amdgpu_gfx_sysfs_compute_init are specific to compute
>>> partitions, which are only applicable on GFX9 and not on a VF in SR-IOV
>>> mode.
>>>
>>> By moving the check into amdgpu_gfx_sysfs_compute_init, we make this
>>> function responsible for deciding whether or not to create the compute
>>> partition sysfs entries.
>>>
>>> This change improves the code organization and maintainability. If in
>>> the future the  conditions for creating the compute partition sysfs
>>> entries change, we  would only need to update the
>>> amdgpu_gfx_sysfs_compute_init function.
>> This is not correct. If this has to be true, this will reside somewhere
>> in amdgpu_gfx and you would also need IP version inside this one. If for
>> a new IP version say gfx v9.4.5 this needs to be created for VF also,
> 
> In this case, how about below?
> 
> int amdgpu_gfx_sysfs_compute_init(struct amdgpu_device *adev, bool
> check_sriov)  
> {  
>     int r; 
>  
>     if (!check_sriov || !amdgpu_sriov_vf(adev)) { 
>         r = device_create_file(adev->dev,
> &dev_attr_current_compute_partition); 
>         if (r) 
>             return r; 
>  
>         r = device_create_file(adev->dev,
> &dev_attr_available_compute_partition); 
>         if (r) 
>             return r; 
>     } 
>  
>     return 0; 
>> 
> In gfx_v9_4_3_sw_init you would call amdgpu_gfx_sysfs_compute_init(adev,
> true),
> 
> to perform the check, and in gfx_v9_4_5_sw_init you would call
> amdgpu_gfx_sysfs_compute_init(adev, false) to skip the check.
> 
> This way, we can control the behavior of the function without needing to
> put condition in IP code version.?
> 
> But would like have Christian's view also, onto this "a new IP version
> say gfx v9.4.5 this needs to be created for VF also,
> 

Drop the patch. As you see, the patch is just adding more complexity
with more variables rather than simplifying anything.

Thanks,
Lijo

> "
> 
>> then this check here won't work. This is the specific reason why we put
>> the conditions inside IP code.
>>
>> Thanks,
>> Lijo
>>
>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>> Cc: Christian König <christian.koenig at amd.com>
>>> Suggested-by: Christian König <christian.koenig at amd.com>
>>> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 24 +++++++++++++++---------
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  4 ++--
>>>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 11 +++++------
>>>  3 files changed, 22 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 19b1817b55d7..72477a5aedca 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -1376,21 +1376,27 @@ static DEVICE_ATTR(current_compute_partition, 0644,
>>>  static DEVICE_ATTR(available_compute_partition, 0444,
>>>  		   amdgpu_gfx_get_available_compute_partition, NULL);
>>>  
>>> -int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
>>> +int amdgpu_gfx_sysfs_compute_init(struct amdgpu_device *adev)
>>>  {
>>>  	int r;
>>>  
>>> -	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
>>> -	if (r)
>>> -		return r;
>>> +	if (!amdgpu_sriov_vf(adev)) {
>>> +		r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
>>> +		if (r)
>>> +			return r;
>>>  
>>> -	r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
>>> +		r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
>>> +		if (r)
>>> +			return r;
>>> +	}
>>>  
>>> -	return r;
>>> +	return 0;
>>>  }
>>>  
>>> -void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
>>> +void amdgpu_gfx_sysfs_compute_fini(struct amdgpu_device *adev)
>>>  {
>>> -	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
>>> -	device_remove_file(adev->dev, &dev_attr_available_compute_partition);
>>> +	if (!amdgpu_sriov_vf(adev)) {
>>> +		device_remove_file(adev->dev, &dev_attr_current_compute_partition);
>>> +		device_remove_file(adev->dev, &dev_attr_available_compute_partition);
>>> +	}
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> index 6b0416777c5b..b65c459b3aa9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> @@ -533,8 +533,8 @@ int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
>>>  						struct amdgpu_iv_entry *entry);
>>>  
>>>  bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
>>> -int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
>>> -void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
>>> +int amdgpu_gfx_sysfs_compute_init(struct amdgpu_device *adev);
>>> +void amdgpu_gfx_sysfs_compute_fini(struct amdgpu_device *adev);
>>>  void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
>>>  		void *ras_error_status,
>>>  		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
>>> index aecc2bcea145..07ce614ef282 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
>>> @@ -939,11 +939,11 @@ static int gfx_v9_4_3_sw_init(void *handle)
>>>  	if (r)
>>>  		return r;
>>>  
>>> +	r = amdgpu_gfx_sysfs_compute_init(adev);
>>> +	if (r)
>>> +		return r;
>>>  
>>> -	if (!amdgpu_sriov_vf(adev))
>>> -		r = amdgpu_gfx_sysfs_init(adev);
>>> -
>>> -	return r;
>>> +	return 0;
>>>  }
>>>  
>>>  static int gfx_v9_4_3_sw_fini(void *handle)
>>> @@ -964,8 +964,7 @@ static int gfx_v9_4_3_sw_fini(void *handle)
>>>  	gfx_v9_4_3_mec_fini(adev);
>>>  	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
>>>  	gfx_v9_4_3_free_microcode(adev);
>>> -	if (!amdgpu_sriov_vf(adev))
>>> -		amdgpu_gfx_sysfs_fini(adev);
>>> +	amdgpu_gfx_sysfs_compute_fini(adev);
>>>  
>>>  	return 0;
>>>  }


More information about the amd-gfx mailing list