[PATCH 3/3] drm/amdgpu: Fix sysfs warning when hotplugging
Zhang, Jesse(Jie)
Jesse.Zhang at amd.com
Tue Nov 19 01:19:22 UTC 2024
[AMD Official Use Only - AMD Internal Distribution Only]
-----Original Message-----
From: Koenig, Christian <Christian.Koenig at amd.com>
Sent: Monday, November 18, 2024 5:58 PM
To: Zhang, Jesse(Jie) <Jesse.Zhang at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Prosyak, Vitaly <Vitaly.Prosyak at amd.com>; Huang, Tim <Tim.Huang at amd.com>; Dong, Andy <Andy.Dong at amd.com>
Subject: Re: [PATCH 3/3] drm/amdgpu: Fix sysfs warning when hotplugging
Am 18.11.24 um 05:31 schrieb Jesse.zhang at amd.com:
> Replace the check drm_dev_enter with sysfs directory entry.
> Because the dev->unplugged flag will also be set to true, Only
> uninstall the driver by amdgpu_exit, not actually unplug the device.
Clearly a NAK to this one. This looks strongly like you are just working around the issue that the functions are called twice.
What exactly is going on here?
This warning occurs when running hotplug tests in IGT.
When a device is unplugged, the PCI bus removes the device.
Then uninstall the amdgpu driver, and many similar warnings will be reported.
There is V2 about the is patch and update the details in comments.
Thanks
Jesse
Regards,
Christian.
>
> Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
> Reported-by: Andy Dong <andy.dong at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c | 3 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 4 ++--
> 7 files changed, 25 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 3c89c74d67e0..e54f42e3797e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -1778,9 +1778,11 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device
> *adev)
>
> void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
> {
> - amdgpu_gfx_sysfs_xcp_fini(adev);
> - amdgpu_gfx_sysfs_isolation_shader_fini(adev);
> - amdgpu_gfx_sysfs_reset_mask_fini(adev);
> + if (adev->dev->kobj.sd) {
> + amdgpu_gfx_sysfs_xcp_fini(adev);
> + amdgpu_gfx_sysfs_isolation_shader_fini(adev);
> + amdgpu_gfx_sysfs_reset_mask_fini(adev);
> + }
> }
>
> int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
> index 43ea76ebbad8..9a1a317d4fd9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
> @@ -447,6 +447,8 @@ int amdgpu_jpeg_sysfs_reset_mask_init(struct
> amdgpu_device *adev)
>
> void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
> {
> - if (adev->jpeg.num_jpeg_inst)
> - device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
> + if (adev->dev->kobj.sd) {
> + if (adev->jpeg.num_jpeg_inst)
> + device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
> + }
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> index e8adfd0a570a..34b5e22b44e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> @@ -137,7 +137,8 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
> if (ret)
> return;
>
> - device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
> + if (adev->dev->kobj.sd)
> + device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
>
> ttm_resource_manager_cleanup(man);
> ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 8c89b69edc20..113f0d242618 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -451,6 +451,8 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
> if (!amdgpu_gpu_recovery)
> return;
>
> - if (adev->sdma.num_instances)
> - device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
> + if (adev->dev->kobj.sd) {
> + if (adev->sdma.num_instances)
> + device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
> + }
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 60e19052a1e2..ed9c795e7b35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -1310,6 +1310,8 @@ int amdgpu_vcn_sysfs_reset_mask_init(struct
> amdgpu_device *adev)
>
> void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
> {
> - if (adev->vcn.num_vcn_inst)
> - device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
> + if (adev->dev->kobj.sd) {
> + if (adev->vcn.num_vcn_inst)
> + device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
> + }
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> index 02bda187f982..dc96e81235df 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> @@ -904,8 +904,10 @@ int amdgpu_vpe_sysfs_reset_mask_init(struct
> amdgpu_device *adev)
>
> void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev)
> {
> - if (adev->vpe.num_instances)
> - device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
> + if (adev->dev->kobj.sd) {
> + if (adev->vpe.num_instances)
> + device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
> + }
> }
>
> static const struct amdgpu_ring_funcs vpe_ring_funcs = { diff --git
> a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> index 483a441b46aa..621aeca53880 100644
> --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> @@ -254,8 +254,8 @@ static void df_v3_6_sw_init(struct amdgpu_device
> *adev)
>
> static void df_v3_6_sw_fini(struct amdgpu_device *adev)
> {
> -
> - device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
> + if (adev->dev->kobj.sd)
> + device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
>
> }
>
More information about the amd-gfx
mailing list