[PATCH V2] drm/amdgpu: fix warning when removing sysfs

Jesse.zhang@amd.com jesse.zhang at amd.com
Fri Nov 8 10:05:12 UTC 2024


Fix the similar warning:

[  155.585721] kernfs: can not remove 'enforce_isolation', no directory
[  155.592201] WARNING: CPU: 3 PID: 6960 at fs/kernfs/dir.c:1683 kernfs_remove_by_name_ns+0xb9/0xc0
[  155.601145] Modules linked in: xt_MASQUERADE xt_comment nft_compat veth bridge stp llc overlay nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink qrtr intel_rapl_msr amd_atl intel_rapl_common amd64_edac edac_mce_amd amdgpu kvm_amd kvm ipmi_ssif amdxcp rapl drm_exec gpu_sched drm_buddy i2c_algo_bit drm_suballoc_helper drm_ttm_helper ttm pcspkr drm_display_helper acpi_cpufreq drm_kms_helper video wmi k10temp i2c_piix4 acpi_ipmi ipmi_si drm zram ip_tables loop squashfs dm_multipath crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 sha256_ssse3 sha1_ssse3 sp5100_tco ixgbe rfkill ccp dca sunrpc be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls cxgb3i cxgb3 mdio libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ipmi_devintf ipmi_msghandler fuse
[  155.685224] systemd-journald[1354]: Compressed data object 957 -> 524 using ZSTD
[  155.685687] CPU: 3 PID: 6960 Comm: amd_pci_unplug Not tainted 6.10.0-1148853.1.zuul.164395107d6642bdb451071313e9378d #1
[  155.704149] Hardware name: TYAN B8021G88V2HR-2T/S8021GM2NR-2T, BIOS V1.03.B10 04/01/2019
[  155.712383] RIP: 0010:kernfs_remove_by_name_ns+0xb9/0xc0
[  155.717805] Code: a0 00 48 89 ef e8 37 96 c7 ff 5b b8 fe ff ff ff 5d 41 5c 41 5d e9 f7 96 a0 00 0f 0b eb ab 48 c7 c7 48 ba 7e 8f e8 f7 66 bf ff <0f> 0b eb dc 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
[  155.736766] RSP: 0018:ffffb1685d7a3e20 EFLAGS: 00010296
[  155.742108] RAX: 0000000000000038 RBX: ffff929e94c80000 RCX: 0000000000000000
[  155.749363] RDX: ffff928e1efaf200 RSI: ffff928e1efa18c0 RDI: ffff928e1efa18c0
[  155.756612] RBP: 0000000000000008 R08: 0000000000000000 R09: 0000000000000003
[  155.763855] R10: ffffb1685d7a3cd8 R11: ffffffff8fb3e1c8 R12: ffffffffc1ef5341
[  155.771104] R13: ffff929e94cc5530 R14: 0000000000000000 R15: 0000000000000000
[  155.778357] FS:  00007fd9dd8d9c40(0000) GS:ffff928e1ef80000(0000) knlGS:0000000000000000
[  155.786594] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  155.792450] CR2: 0000561245ceee38 CR3: 0000000113018000 CR4: 00000000003506f0
[  155.799702] Call Trace:
[  155.802254]  <TASK>
[  155.804460]  ? __warn+0x80/0x120
[  155.807798]  ? kernfs_remove_by_name_ns+0xb9/0xc0
[  155.812617]  ? report_bug+0x164/0x190
[  155.816393]  ? handle_bug+0x3c/0x80
[  155.819994]  ? exc_invalid_op+0x17/0x70
[  155.823939]  ? asm_exc_invalid_op+0x1a/0x20
[  155.828235]  ? kernfs_remove_by_name_ns+0xb9/0xc0
[  155.833058]  amdgpu_gfx_sysfs_fini+0x59/0xd0 [amdgpu]
[  155.838637]  gfx_v9_0_sw_fini+0x123/0x1c0 [amdgpu]
[  155.843887]  amdgpu_device_fini_sw+0xbc/0x3e0 [amdgpu]
[  155.849432]  amdgpu_driver_release_kms+0x16/0x30 [amdgpu]
[  155.855235]  drm_dev_put.part.0+0x3c/0x60 [drm]
[  155.859914]  drm_release+0x8b/0xc0 [drm]
[  155.863978]  __fput+0xf1/0x2c0
[  155.867141]  __x64_sys_close+0x3c/0x80
[  155.870998]  do_syscall_64+0x64/0x170

Check if the sysfs directory entry exists before deleting the sysfs file.

Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
Suggested-by: Lijo Lazar <lijo.lazar at amd.com>
Reviewed-by: Tim Huang <tim.huang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c         | 12 +++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c        | 10 ++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c |  8 ++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c        | 10 ++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c         |  9 +++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c         |  9 +++++++--
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c            |  8 ++++++--
 7 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index f7bf5e43f16e..f552d7dfae96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -25,6 +25,7 @@
 
 #include <linux/firmware.h>
 #include <linux/pm_runtime.h>
+#include <drm/drm_drv.h>
 
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
@@ -1773,9 +1774,14 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
 
 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
 {
-	amdgpu_gfx_sysfs_xcp_fini(adev);
-	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
-	amdgpu_gfx_sysfs_reset_mask_fini(adev);
+	int idx;
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		amdgpu_gfx_sysfs_xcp_fini(adev);
+		amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+		amdgpu_gfx_sysfs_reset_mask_fini(adev);
+		drm_dev_exit(idx);
+	}
 }
 
 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 642b8c848141..2a8d8e5cc7eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -24,6 +24,7 @@
  *
  */
 
+#include <drm/drm_drv.h>
 #include "amdgpu.h"
 #include "amdgpu_jpeg.h"
 #include "amdgpu_pm.h"
@@ -447,6 +448,11 @@ int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev)
 
 void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
 {
-	if (adev->jpeg.num_jpeg_inst)
-		device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
+	int idx;
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		if (adev->jpeg.num_jpeg_inst)
+			device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
+		drm_dev_exit(idx);
+	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
index e8adfd0a570a..33a714ddfbbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -23,6 +23,7 @@
  * Authors: Christian König, Felix Kuehling
  */
 
+#include <drm/drm_drv.h>
 #include "amdgpu.h"
 
 /**
@@ -129,7 +130,7 @@ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
 void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
 {
 	struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
-	int ret;
+	int idx, ret;
 
 	ttm_resource_manager_set_used(man, false);
 
@@ -137,7 +138,10 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
 	if (ret)
 		return;
 
-	device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+	if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
+		device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+		drm_dev_exit(idx);
+	}
 
 	ttm_resource_manager_cleanup(man);
 	ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 8c89b69edc20..24e9daacaabb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_drv.h>
 #include <linux/firmware.h>
 #include "amdgpu.h"
 #include "amdgpu_sdma.h"
@@ -448,9 +449,14 @@ int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
 
 void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
 {
+	int idx;
+
 	if (!amdgpu_gpu_recovery)
 		return;
 
-	if (adev->sdma.num_instances)
-		device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		if (adev->sdma.num_instances)
+			device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+		drm_dev_exit(idx);
+	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 67264f0be491..1481c36ac663 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1320,6 +1320,11 @@ int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev)
 
 void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
 {
-	if (adev->vcn.num_vcn_inst)
-		device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
+	int idx;
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		if (adev->vcn.num_vcn_inst)
+			device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
+		drm_dev_exit(idx);
+	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 02bda187f982..b5f5a1a81c29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -904,8 +904,13 @@ int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev)
 
 void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev)
 {
-	if (adev->vpe.num_instances)
-		device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
+	int idx;
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		if (adev->vpe.num_instances)
+			device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
+		drm_dev_exit(idx);
+	}
 }
 
 static const struct amdgpu_ring_funcs vpe_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 483a441b46aa..54c05af2eed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+#include <drm/drm_drv.h>
 #include "amdgpu.h"
 #include "df_v3_6.h"
 
@@ -254,9 +255,12 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
 
 static void df_v3_6_sw_fini(struct amdgpu_device *adev)
 {
+	int idx;
 
-	device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
-
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+		drm_dev_exit(idx);
+	}
 }
 
 static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
-- 
2.25.1



More information about the amd-gfx mailing list