[PATCH v2 3/6] drm/amdxcp: introduce new API amdgpu_xcp_drm_dev_free()

Gerry Liu gerry at linux.alibaba.com
Tue Jan 7 02:00:41 UTC 2025



> 2025年1月6日 14:51,Lazar, Lijo <lijo.lazar at amd.com> 写道:
> 
> 
> 
> On 1/5/2025 8:15 AM, Jiang Liu wrote:
>> Introduce new interface amdgpu_xcp_drm_dev_free() to free a specific
>> drm_device crreated by amdgpu_xcp_drm_dev_alloc(), which will be used
>> to do error recovery.
>> 
>> Signed-off-by: Jiang Liu <gerry at linux.alibaba.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c     | 11 +++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h     |  1 +
>> drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c | 70 +++++++++++++++++----
>> drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h |  1 +
>> 4 files changed, 70 insertions(+), 13 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
>> index e209b5e101df..401fbaa0b6b8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
>> @@ -359,6 +359,8 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
>> 		ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
>> 		if (ret)
>> 			return ret;
>> +
>> +		adev->xcp_mgr->xcp[i].registered = true;
>> 	}
>> 
>> 	return 0;
>> @@ -376,12 +378,19 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
>> 		if (!adev->xcp_mgr->xcp[i].ddev)
>> 			break;
>> 
>> +		// Restore and free the original drm_device.
>> 		p_ddev = adev->xcp_mgr->xcp[i].ddev;
>> -		drm_dev_unplug(p_ddev);
>> +		if (adev->xcp_mgr->xcp[i].registered) {
>> +			drm_dev_unplug(p_ddev);
>> +			adev->xcp_mgr->xcp[i].registered = false;
>> +		}
>> 		p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
>> 		p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
>> 		p_ddev->driver =  adev->xcp_mgr->xcp[i].driver;
>> 		p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
>> +		amdgpu_xcp_drm_dev_free(p_ddev);
>> +
>> +		adev->xcp_mgr->xcp[i].ddev = NULL;
>> 	}
>> }
>> 
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
>> index b63f53242c57..cd06a4a232be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
>> @@ -101,6 +101,7 @@ struct amdgpu_xcp {
>> 	uint8_t id;
>> 	uint8_t mem_id;
>> 	bool valid;
>> +	bool registered;
>> 	atomic_t	ref_cnt;
>> 	struct drm_device *ddev;
>> 	struct drm_device *rdev;
>> diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
>> index faed84172dd4..9058d71b4756 100644
>> --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
>> +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
>> @@ -45,18 +45,26 @@ static const struct drm_driver amdgpu_xcp_driver = {
>> 
>> static int8_t pdev_num;
>> static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE];
>> +static struct mutex xcp_mutex;
> 
> I think this needs to be static DEFINE_MUTEX(xcp_mutex).
> 
>> 
>> int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
>> {
>> 	struct platform_device *pdev;
>> 	struct xcp_device *pxcp_dev;
>> 	char dev_name[20];
>> -	int ret;
>> +	int ret, index;
>> 
>> +	mutex_lock(&xcp_mutex);
>> +	ret = -ENODEV;
> 
> Preference would be do this inside the below if() to associate the error
> with the condition.
> 
>> 	if (pdev_num >= MAX_XCP_PLATFORM_DEVICE)
>> -		return -ENODEV;
>> +		goto out_unlock;
>> 
>> -	snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", pdev_num);
>> +	for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) {
>> +		if (!xcp_dev[index])
>> +			break;
>> +	}
>> +
>> +	snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", index);
>> 	pdev = platform_device_register_simple(dev_name, -1, NULL, 0);
>> 	if (IS_ERR(pdev))
>> 		return PTR_ERR(pdev);
> 
> Seems mutex is left locked here.
Will fixed in next version:)

> 
>> @@ -72,10 +80,11 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
>> 		goto out_devres;
>> 	}
>> 
>> -	xcp_dev[pdev_num] = pxcp_dev;
>> -	xcp_dev[pdev_num]->pdev = pdev;
>> +	xcp_dev[index] = pxcp_dev;
>> +	xcp_dev[index]->pdev = pdev;
>> 	*ddev = &pxcp_dev->drm;
>> 	pdev_num++;
>> +	mutex_unlock(&xcp_mutex);
>> 
>> 	return 0;
>> 
>> @@ -83,21 +92,58 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
>> 	devres_release_group(&pdev->dev, NULL);
>> out_unregister:
>> 	platform_device_unregister(pdev);
>> +out_unlock:
>> +	mutex_unlock(&xcp_mutex);
>> 
>> 	return ret;
>> }
>> EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc);
>> 
>> -void amdgpu_xcp_drv_release(void)
>> +static void amdgpu_xcp_drm_dev_destroy(int index)
>> +{
>> +	struct platform_device *pdev;
>> +
>> +	pdev = xcp_dev[index]->pdev;
>> +	devres_release_group(&pdev->dev, NULL);
>> +	platform_device_unregister(pdev);
>> +	xcp_dev[index] = NULL;
>> +	pdev_num--;
>> +}
>> +
>> +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev)
>> {
>> -	for (--pdev_num; pdev_num >= 0; --pdev_num) {
>> -		struct platform_device *pdev = xcp_dev[pdev_num]->pdev;
>> +	int index;
>> +	struct xcp_device *pxcp_dev;
>> +
>> +	if (ddev == NULL)
>> +		return;
>> 
>> -		devres_release_group(&pdev->dev, NULL);
>> -		platform_device_unregister(pdev);
>> -		xcp_dev[pdev_num] = NULL;
>> +	pxcp_dev = container_of(ddev, struct xcp_device, drm);
>> +
>> +	mutex_lock(&xcp_mutex);
>> +	for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) {
>> +		if (xcp_dev[index] == pxcp_dev) {
>> +			amdgpu_xcp_drm_dev_destroy(index);
>> +			break;
>> +		}
>> +	}
>> +	mutex_unlock(&xcp_mutex);
>> +}
>> +EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free);
>> +
>> +void amdgpu_xcp_drv_release(void)
>> +{
>> +	int index;
>> +
>> +	mutex_lock(&xcp_mutex);
>> +	for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) {
>> +		if (xcp_dev[index]) {
>> +			WARN_ON(xcp_dev[index]);
> 
> Why is this WARN check needed? There is already a if() check for valid
> index.
> 
> Also, would suggest to separate out amdgpu_xcp.c from xcp_drv.c. xcp_drv
> introducing a new interface may be kept in a separate patch.
With new implementation, all xcp devices should have already be removed when amdgpu_xcp_drv_release() gets called,
So hope to verify whether it works as expected.

Thanks!
> 
> Thanks,
> Lijo
> 
>> +			amdgpu_xcp_drm_dev_destroy(index);
>> +		}
>> 	}
>> -	pdev_num = 0;
>> +	WARN_ON(pdev_num != 0);
>> +	mutex_unlock(&xcp_mutex);
>> }
>> EXPORT_SYMBOL(amdgpu_xcp_drv_release);
>> 
>> diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
>> index c1c4b679bf95..580a1602c8e3 100644
>> --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
>> +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
>> @@ -25,5 +25,6 @@
>> #define _AMDGPU_XCP_DRV_H_
>> 
>> int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev);
>> +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev);
>> void amdgpu_xcp_drv_release(void);
>> #endif /* _AMDGPU_XCP_DRV_H_ */

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250107/785ab526/attachment-0001.htm>


More information about the amd-gfx mailing list