[PATCH] drm/xe: remove sysfs entries on driver unbind

Andrzej Hajda andrzej.hajda at intel.com
Tue Apr 30 15:51:11 UTC 2024


After unbinding driver from the device, driver is responsible for
leaving the device in it's initial state, to be ready to bind to another
driver or to rebind. Particularly device sysfs entries added by the driver
should disappear. Removing them together with drm device is too late,
as it's life-time can be prolonged by userspace keeping open drm
descriptor. Removing them at driver's unbind seems the proper solution.

Signed-off-by: Andrzej Hajda <andrzej.hajda at intel.com>
---
Hi all,

xe driver seems to be not protected against hot-rebind scenario - driver
re-bind with opened drm descriptor.
This patch solves one of the issues - removal of sysfs entires, but
apparently there are more:
[  845.740016] ---[ end trace 0000000000000000 ]---
[  845.771285] xe 0000:00:02.0: [drm] *ERROR* power well DC_off state mismatch (refcount 1/enabled 0)
[  845.771309] xe 0000:00:02.0: [drm] *ERROR* power well PW_2 state mismatch (refcount 1/enabled 0)
[  845.771324] xe 0000:00:02.0: [drm] *ERROR* power well PW_A state mismatch (refcount 1/enabled 0)
[  845.771338] xe 0000:00:02.0: [drm] *ERROR* power well PW_B state mismatch (refcount 1/enabled 0)
[  845.771351] xe 0000:00:02.0: [drm] *ERROR* power well PW_C state mismatch (refcount 1/enabled 0)
[  845.771365] xe 0000:00:02.0: [drm] *ERROR* power well PW_D state mismatch (refcount 1/enabled 0)
[  845.774682] ------------[ cut here ]------------
[  845.774697] xe 0000:00:02.0: Missing outer runtime PM protection
[  845.774743] WARNING: CPU: 16 PID: 2228 at drivers/gpu/drm/xe/xe_pm.c:550 xe_pm_runtime_get_noresume+0x141/0x1d0 [xe]
...
[  845.777795] Call Trace:
[  845.777801]  <TASK>
[  845.777809]  ? show_regs+0x71/0x90
[  845.777824]  ? __warn+0xce/0x300
[  845.777835]  ? xe_pm_runtime_get_noresume+0x141/0x1d0 [xe]
[  845.778002]  ? report_bug+0x2ad/0x300
[  845.778015]  ? handle_bug+0x46/0x90
[  845.778027]  ? exc_invalid_op+0x19/0x50
[  845.778037]  ? asm_exc_invalid_op+0x1b/0x20
[  845.778052]  ? xe_pm_runtime_get_noresume+0x141/0x1d0 [xe]
[  845.778215]  xe_ggtt_remove_node+0x69/0x1c0 [xe]
[  845.780754]  xe_ggtt_remove_bo+0x10a/0x650 [xe]
[  845.780909]  ? __pfx_xe_ggtt_remove_bo+0x10/0x10 [xe]
[  845.781064]  ? _raw_write_unlock+0x23/0x50
[  845.781076]  ? drm_vma_offset_remove+0x72/0x90 [drm]
[  845.781190]  xe_ttm_bo_destroy+0x351/0x770 [xe]
[  845.781341]  ? ww_mutex_unlock+0x1a6/0x280
[  845.781354]  ttm_bo_release+0x41d/0xae0 [ttm]
[  845.781380]  ? __pfx_ttm_bo_release+0x10/0x10 [ttm]
[  845.781402]  ? __pfx___mutex_unlock_slowpath+0x10/0x10
[  845.781413]  ? __kasan_check_read+0x11/0x20
[  845.781425]  ? do_raw_spin_unlock+0x5c/0x210
[  845.781437]  ? _raw_spin_unlock+0x23/0x50
[  845.781448]  ttm_bo_put+0x50/0x80 [ttm]
[  845.781469]  xe_gem_object_free+0x88/0x190 [xe]
[  845.781610]  drm_gem_object_free+0x59/0x90 [drm]
[  845.781697]  __xe_bo_unpin_map_no_vm+0xcf/0x110 [xe]
[  845.781839]  drm_managed_release+0x1a3/0x4e0 [drm]
[  845.781925]  ? drm_client_dev_restore+0x1f4/0x280 [drm]
[  845.782001]  drm_minor_release+0xd1/0x140 [drm]
[  845.782079]  drm_release_noglobal+0xad/0x100 [drm]

I guess after device remove call driver should not touch the hardware
at all, it could only maintain objects allocated by userspace in kind
of zombie mode - no interactions with hw, if userspace request hw access
return error.
Question to maintainers of xe/drm. How should it be implemented in xe
driver?
Or maybe drm provides already well tested way of handling such cases?

Signed-off-by: Andrzej Hajda <andrzej.hajda at intel.com>
---
 drivers/gpu/drm/xe/xe_device_sysfs.c          |  4 ++--
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c           |  4 ++--
 drivers/gpu/drm/xe/xe_gt_freq.c               |  4 ++--
 drivers/gpu/drm/xe/xe_gt_idle.c               |  4 ++--
 drivers/gpu/drm/xe/xe_gt_sysfs.c              |  4 ++--
 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c     |  4 ++--
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 12 ++++++------
 drivers/gpu/drm/xe/xe_tile_sysfs.c            |  4 ++--
 drivers/gpu/drm/xe/xe_vram_freq.c             |  4 ++--
 9 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 21677b8cd977..8d096db750fa 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -69,7 +69,7 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RW(vram_d3cold_threshold);
 
-static void xe_device_sysfs_fini(struct drm_device *drm, void *arg)
+static void xe_device_sysfs_fini(void *arg)
 {
 	struct xe_device *xe = arg;
 
@@ -85,5 +85,5 @@ int xe_device_sysfs_init(struct xe_device *xe)
 	if (ret)
 		return ret;
 
-	return drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
+	return devm_add_action_or_reset(xe->drm.dev, xe_device_sysfs_fini, xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 396aeb5b9924..890da8870b0d 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -150,7 +150,7 @@ static const struct attribute *gt_ccs_mode_attrs[] = {
 	NULL,
 };
 
-static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg)
+static void xe_gt_ccs_mode_sysfs_fini(void *arg)
 {
 	struct xe_gt *gt = arg;
 
@@ -182,5 +182,5 @@ int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
+	return devm_add_action_or_reset(xe->drm.dev, xe_gt_ccs_mode_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 855de40e40ea..ed7949df5bf5 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -209,7 +209,7 @@ static const struct attribute *freq_attrs[] = {
 	NULL
 };
 
-static void freq_fini(struct drm_device *drm, void *arg)
+static void freq_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -237,7 +237,7 @@ int xe_gt_freq_init(struct xe_gt *gt)
 	if (!gt->freq)
 		return -ENOMEM;
 
-	err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
+	err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 8fc0f3f6ecc5..9e907033f32b 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -144,7 +144,7 @@ static const struct attribute *gt_idle_attrs[] = {
 	NULL,
 };
 
-static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
+static void gt_idle_sysfs_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -181,7 +181,7 @@ int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 		return err;
 	}
 
-	return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, gt_idle_sysfs_fini, kobj);
 }
 
 void xe_gt_idle_enable_c6(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index 1e5971072bc8..ec2b8246204b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -22,7 +22,7 @@ static const struct kobj_type xe_gt_sysfs_kobj_type = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
-static void gt_sysfs_fini(struct drm_device *drm, void *arg)
+static void gt_sysfs_fini(void *arg)
 {
 	struct xe_gt *gt = arg;
 
@@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt)
 
 	gt->sysfs = &kg->base;
 
-	return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
+	return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
index fbe21a8599ca..c9e04151286d 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
@@ -229,7 +229,7 @@ static const struct attribute_group throttle_group_attrs = {
 	.attrs = throttle_attrs,
 };
 
-static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
+static void gt_throttle_sysfs_fini(void *arg)
 {
 	struct xe_gt *gt = arg;
 
@@ -245,5 +245,5 @@ int xe_gt_throttle_sysfs_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
+	return devm_add_action_or_reset(xe->drm.dev, gt_throttle_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 844ec68cbbb8..258078a6b461 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -492,7 +492,7 @@ static const struct attribute * const files[] = {
 	NULL
 };
 
-static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
+static void kobj_xe_hw_engine_class_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -517,7 +517,7 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char
 	}
 	keclass->xe = xe;
 
-	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
+	err = devm_add_action_or_reset(xe->drm.dev, kobj_xe_hw_engine_class_fini,
 				       &keclass->base);
 	if (err)
 		return NULL;
@@ -525,7 +525,7 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char
 	return keclass;
 }
 
-static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg)
+static void hw_engine_class_defaults_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -552,7 +552,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
 	if (err)
 		goto err_object;
 
-	return drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, hw_engine_class_defaults_fini, kobj);
 
 err_object:
 	kobject_put(kobj);
@@ -611,7 +611,7 @@ static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
 	.sysfs_ops = &xe_hw_engine_class_sysfs_ops,
 };
 
-static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
+static void hw_engine_class_sysfs_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -698,7 +698,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 			goto err_object;
 	}
 
-	return drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, hw_engine_class_sysfs_fini, kobj);
 
 err_object:
 	kobject_put(kobj);
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 64661403afcd..12b2c3e5c421 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -22,7 +22,7 @@ static const struct kobj_type xe_tile_sysfs_kobj_type = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
-static void tile_sysfs_fini(struct drm_device *drm, void *arg)
+static void tile_sysfs_fini(void *arg)
 {
 	struct xe_tile *tile = arg;
 
@@ -55,5 +55,5 @@ int xe_tile_sysfs_init(struct xe_tile *tile)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
+	return devm_add_action_or_reset(dev, tile_sysfs_fini, tile);
 }
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c
index 3e21ddc6e60c..99ff95e408e0 100644
--- a/drivers/gpu/drm/xe/xe_vram_freq.c
+++ b/drivers/gpu/drm/xe/xe_vram_freq.c
@@ -87,7 +87,7 @@ static const struct attribute_group freq_group_attrs = {
 	.attrs = freq_attrs,
 };
 
-static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg)
+static void vram_freq_sysfs_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -122,5 +122,5 @@ int xe_vram_freq_sysfs_init(struct xe_tile *tile)
 		return err;
 	}
 
-	return drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, vram_freq_sysfs_fini, kobj);
 }

---
base-commit: 4caf410766add8cf376a3afc910b17dd0961dd75
change-id: 20240430-hotrebind_xu-897cc3413a7f

Best regards,
-- 
Andrzej Hajda <andrzej.hajda at intel.com>



More information about the Intel-xe mailing list