[PATCH v2 3/6] drm/amdxcp: introduce new API amdgpu_xcp_drm_dev_free()
Gerry Liu
gerry at linux.alibaba.com
Tue Jan 7 02:00:41 UTC 2025
> 2025年1月6日 14:51,Lazar, Lijo <lijo.lazar at amd.com> 写道:
>
>
>
> On 1/5/2025 8:15 AM, Jiang Liu wrote:
>> Introduce new interface amdgpu_xcp_drm_dev_free() to free a specific
>> drm_device crreated by amdgpu_xcp_drm_dev_alloc(), which will be used
>> to do error recovery.
>>
>> Signed-off-by: Jiang Liu <gerry at linux.alibaba.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 11 +++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 1 +
>> drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c | 70 +++++++++++++++++----
>> drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h | 1 +
>> 4 files changed, 70 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
>> index e209b5e101df..401fbaa0b6b8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
>> @@ -359,6 +359,8 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
>> ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
>> if (ret)
>> return ret;
>> +
>> + adev->xcp_mgr->xcp[i].registered = true;
>> }
>>
>> return 0;
>> @@ -376,12 +378,19 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
>> if (!adev->xcp_mgr->xcp[i].ddev)
>> break;
>>
>> + // Restore and free the original drm_device.
>> p_ddev = adev->xcp_mgr->xcp[i].ddev;
>> - drm_dev_unplug(p_ddev);
>> + if (adev->xcp_mgr->xcp[i].registered) {
>> + drm_dev_unplug(p_ddev);
>> + adev->xcp_mgr->xcp[i].registered = false;
>> + }
>> p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
>> p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
>> p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
>> p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
>> + amdgpu_xcp_drm_dev_free(p_ddev);
>> +
>> + adev->xcp_mgr->xcp[i].ddev = NULL;
>> }
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
>> index b63f53242c57..cd06a4a232be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
>> @@ -101,6 +101,7 @@ struct amdgpu_xcp {
>> uint8_t id;
>> uint8_t mem_id;
>> bool valid;
>> + bool registered;
>> atomic_t ref_cnt;
>> struct drm_device *ddev;
>> struct drm_device *rdev;
>> diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
>> index faed84172dd4..9058d71b4756 100644
>> --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
>> +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
>> @@ -45,18 +45,26 @@ static const struct drm_driver amdgpu_xcp_driver = {
>>
>> static int8_t pdev_num;
>> static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE];
>> +static struct mutex xcp_mutex;
>
> I think this needs to be static DEFINE_MUTEX(xcp_mutex).
>
>>
>> int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
>> {
>> struct platform_device *pdev;
>> struct xcp_device *pxcp_dev;
>> char dev_name[20];
>> - int ret;
>> + int ret, index;
>>
>> + mutex_lock(&xcp_mutex);
>> + ret = -ENODEV;
>
> Preference would be do this inside the below if() to associate the error
> with the condition.
>
>> if (pdev_num >= MAX_XCP_PLATFORM_DEVICE)
>> - return -ENODEV;
>> + goto out_unlock;
>>
>> - snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", pdev_num);
>> + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) {
>> + if (!xcp_dev[index])
>> + break;
>> + }
>> +
>> + snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", index);
>> pdev = platform_device_register_simple(dev_name, -1, NULL, 0);
>> if (IS_ERR(pdev))
>> return PTR_ERR(pdev);
>
> Seems mutex is left locked here.
Will fixed in next version:)
>
>> @@ -72,10 +80,11 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
>> goto out_devres;
>> }
>>
>> - xcp_dev[pdev_num] = pxcp_dev;
>> - xcp_dev[pdev_num]->pdev = pdev;
>> + xcp_dev[index] = pxcp_dev;
>> + xcp_dev[index]->pdev = pdev;
>> *ddev = &pxcp_dev->drm;
>> pdev_num++;
>> + mutex_unlock(&xcp_mutex);
>>
>> return 0;
>>
>> @@ -83,21 +92,58 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
>> devres_release_group(&pdev->dev, NULL);
>> out_unregister:
>> platform_device_unregister(pdev);
>> +out_unlock:
>> + mutex_unlock(&xcp_mutex);
>>
>> return ret;
>> }
>> EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc);
>>
>> -void amdgpu_xcp_drv_release(void)
>> +static void amdgpu_xcp_drm_dev_destroy(int index)
>> +{
>> + struct platform_device *pdev;
>> +
>> + pdev = xcp_dev[index]->pdev;
>> + devres_release_group(&pdev->dev, NULL);
>> + platform_device_unregister(pdev);
>> + xcp_dev[index] = NULL;
>> + pdev_num--;
>> +}
>> +
>> +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev)
>> {
>> - for (--pdev_num; pdev_num >= 0; --pdev_num) {
>> - struct platform_device *pdev = xcp_dev[pdev_num]->pdev;
>> + int index;
>> + struct xcp_device *pxcp_dev;
>> +
>> + if (ddev == NULL)
>> + return;
>>
>> - devres_release_group(&pdev->dev, NULL);
>> - platform_device_unregister(pdev);
>> - xcp_dev[pdev_num] = NULL;
>> + pxcp_dev = container_of(ddev, struct xcp_device, drm);
>> +
>> + mutex_lock(&xcp_mutex);
>> + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) {
>> + if (xcp_dev[index] == pxcp_dev) {
>> + amdgpu_xcp_drm_dev_destroy(index);
>> + break;
>> + }
>> + }
>> + mutex_unlock(&xcp_mutex);
>> +}
>> +EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free);
>> +
>> +void amdgpu_xcp_drv_release(void)
>> +{
>> + int index;
>> +
>> + mutex_lock(&xcp_mutex);
>> + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) {
>> + if (xcp_dev[index]) {
>> + WARN_ON(xcp_dev[index]);
>
> Why is this WARN check needed? There is already a if() check for valid
> index.
>
> Also, would suggest to separate out amdgpu_xcp.c from xcp_drv.c. xcp_drv
> introducing a new interface may be kept in a separate patch.
With new implementation, all xcp devices should have already be removed when amdgpu_xcp_drv_release() gets called,
So hope to verify whether it works as expected.
Thanks!
>
> Thanks,
> Lijo
>
>> + amdgpu_xcp_drm_dev_destroy(index);
>> + }
>> }
>> - pdev_num = 0;
>> + WARN_ON(pdev_num != 0);
>> + mutex_unlock(&xcp_mutex);
>> }
>> EXPORT_SYMBOL(amdgpu_xcp_drv_release);
>>
>> diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
>> index c1c4b679bf95..580a1602c8e3 100644
>> --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
>> +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
>> @@ -25,5 +25,6 @@
>> #define _AMDGPU_XCP_DRV_H_
>>
>> int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev);
>> +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev);
>> void amdgpu_xcp_drv_release(void);
>> #endif /* _AMDGPU_XCP_DRV_H_ */
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250107/785ab526/attachment-0001.htm>
More information about the amd-gfx
mailing list