[PATCH v3 5/7] drm/xe/gt: Abort driver load for sysfs creation failure

Lucas De Marchi lucas.demarchi at intel.com
Fri Apr 12 13:30:13 UTC 2024


On Fri, Apr 12, 2024 at 01:32:43PM +0530, Himal Prasad Ghimiray wrote:
>Instead of allowing the driver to load with incomplete sysfs entries in
>case of sysfs creation failure, we should terminate the driver loading.
>This change ensures that the status of all gt associated sysfs entries
>creation is relayed to xe_gt_init, leading to a driver load abort if any
>sysfs creation failures occur.
>
>Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>---
> drivers/gpu/drm/xe/xe_gt.c                    | 16 ++++++----
> drivers/gpu/drm/xe/xe_gt_freq.c               | 29 ++++++++++---------
> drivers/gpu/drm/xe/xe_gt_freq.h               |  2 +-
> drivers/gpu/drm/xe/xe_gt_idle.c               | 17 ++++-------
> drivers/gpu/drm/xe/xe_gt_idle.h               |  2 +-
> drivers/gpu/drm/xe/xe_gt_sysfs.c              | 15 ++++------
> drivers/gpu/drm/xe/xe_gt_sysfs.h              |  2 +-
> drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c     | 11 +++----
> drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h     |  2 +-
> drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c |  6 ++--
> 10 files changed, 47 insertions(+), 55 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>index dca0e9fb3315..089e4513c7e3 100644
>--- a/drivers/gpu/drm/xe/xe_gt.c
>+++ b/drivers/gpu/drm/xe/xe_gt.c
>@@ -360,7 +360,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
> 			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
> 	}
>
>-	xe_gt_idle_sysfs_init(&gt->gtidle);
>+	err = xe_gt_idle_sysfs_init(&gt->gtidle);
>+	if (err)
>+		goto err_gt_sysfs_create;

see comment on previous patch about just re-using the current label.

>
> 	/* Enable per hw engine IRQs */
> 	xe_irq_enable_hwe(gt);
>@@ -374,9 +376,7 @@ static int gt_fw_domain_init(struct xe_gt *gt)
>
> 	err = xe_hw_engine_class_sysfs_init(gt);
> 	if (err)
>-		drm_warn(&gt_to_xe(gt)->drm,
>-			 "failed to register engines sysfs directory, err: %d\n",
>-			 err);
>+		goto err_gt_sysfs_create;
>
> 	/* Initialize CCS mode sysfs after early initialization of HW engines */
> 	err = xe_gt_ccs_mode_sysfs_init(gt);
>@@ -549,13 +549,17 @@ int xe_gt_init(struct xe_gt *gt)
>
> 	xe_mocs_init_early(gt);
>
>-	xe_gt_sysfs_init(gt);
>+	err = xe_gt_sysfs_init(gt);
>+	if (err)
>+		return err;
>
> 	err = gt_fw_domain_init(gt);
> 	if (err)
> 		return err;
>
>-	xe_gt_freq_init(gt);
>+	err = xe_gt_freq_init(gt);
>+	if (err)
>+		return err;
>
> 	xe_force_wake_init_engines(gt, gt_to_fw(gt));
>
>diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
>index 32b9a743629c..a0c57332d362 100644
>--- a/drivers/gpu/drm/xe/xe_gt_freq.c
>+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
>@@ -222,33 +222,34 @@ static void freq_fini(struct drm_device *drm, void *arg)
>  * @gt: Xe GT object
>  *
>  * It needs to be initialized after GT Sysfs and GuC PC components are ready.
>+ *
>+ * Returns: Returns error value for failure and 0 for success.
>  */
>-void xe_gt_freq_init(struct xe_gt *gt)
>+int xe_gt_freq_init(struct xe_gt *gt)
> {
> 	struct xe_device *xe = gt_to_xe(gt);
> 	int err;
>
> 	if (xe->info.skip_guc_pc)
>-		return;
>+		return 0;
>
> 	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
> 	if (!gt->freq) {
>-		drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
>-			 kobject_name(gt->sysfs));
>-		return;
>+		drm_err(&xe->drm, "failed to add freq0 directory to %s\n",
>+			kobject_name(gt->sysfs));

lib/kobject.c:kobject_create_and_add()

         retval = kobject_add(kobj, parent, "%s", name);                      
         if (retval) {                                                        
                 pr_warn("%s: kobject_add error: %d\n", __func__, retval);    

and the other case return without log message is about memory allocation
fail. It's kernel policy not to log on that. So just remove the message
here rather than s/warn/err/.

>+		return -ENOMEM;
> 	}
>
> 	err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
>-	if (err) {
>-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
>-			 __func__, err);
>-		return;
>-	}
>+	if (err)
>+		return err;
>
> 	err = sysfs_create_files(gt->freq, freq_attrs);
>-	if (err)
>-		drm_warn(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
>-			 kobject_name(gt->freq), err);
>+	if (err) {
>+		drm_err(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
>+			kobject_name(gt->freq), err);
>+		return err;

same as above:

fs/sysfs/file.c:sysfs_create_file_ns()

         if (WARN_ON(!kobj || !kobj->sd || !attr))
                 return -EINVAL;

>+	}
>
>-	xe_gt_throttle_sysfs_init(gt);
>+	return xe_gt_throttle_sysfs_init(gt);
> }
>diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h
>index f3fe3c90491a..b7fddbe7b9b6 100644
>--- a/drivers/gpu/drm/xe/xe_gt_freq.h
>+++ b/drivers/gpu/drm/xe/xe_gt_freq.h
>@@ -8,6 +8,6 @@
>
> struct xe_gt;
>
>-void xe_gt_freq_init(struct xe_gt *gt);
>+int xe_gt_freq_init(struct xe_gt *gt);
>
> #endif
>diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
>index bc1426f8d731..581c8ce75858 100644
>--- a/drivers/gpu/drm/xe/xe_gt_idle.c
>+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
>@@ -152,7 +152,7 @@ static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
> 	kobject_put(kobj);
> }
>
>-void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
>+int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
> {
> 	struct xe_gt *gt = gtidle_to_gt(gtidle);
> 	struct xe_device *xe = gt_to_xe(gt);
>@@ -160,10 +160,8 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
> 	int err;
>
> 	kobj = kobject_create_and_add("gtidle", gt->sysfs);
>-	if (!kobj) {
>-		drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM);
>-		return;
>-	}
>+	if (!kobj)
>+		return -ENOMEM;
>
> 	if (xe_gt_is_media_type(gt)) {
> 		snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id);
>@@ -180,14 +178,11 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
> 	err = sysfs_create_files(kobj, gt_idle_attrs);
> 	if (err) {
> 		kobject_put(kobj);
>-		drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err);
>-		return;
>+		drm_err(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err);

ditto

>+		return err;
> 	}
>
>-	err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
>-	if (err)
>-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
>-			 __func__, err);
>+	return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
> }
>
> void xe_gt_idle_enable_c6(struct xe_gt *gt)
>diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
>index 69280fd16b03..75bd99659b1b 100644
>--- a/drivers/gpu/drm/xe/xe_gt_idle.h
>+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
>@@ -10,7 +10,7 @@
>
> struct xe_gt;
>
>-void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
>+int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
> void xe_gt_idle_enable_c6(struct xe_gt *gt);
> void xe_gt_idle_disable_c6(struct xe_gt *gt);
>
>diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
>index c69d2e8a0fe1..21ed87715da7 100644
>--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
>+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
>@@ -29,7 +29,7 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg)
> 	kobject_put(gt->sysfs);
> }
>
>-void xe_gt_sysfs_init(struct xe_gt *gt)
>+int xe_gt_sysfs_init(struct xe_gt *gt)
> {
> 	struct xe_tile *tile = gt_to_tile(gt);
> 	struct xe_device *xe = gt_to_xe(gt);
>@@ -38,24 +38,19 @@ void xe_gt_sysfs_init(struct xe_gt *gt)
>
> 	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
> 	if (!kg)
>-		return;
>+		return -ENOMEM;
>
> 	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
> 	kg->gt = gt;
>
> 	err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
> 	if (err) {
>-		drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err);
>+		drm_err(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err);

looking at lib/kobject.c, we can apply the same reasoning as above.
We even dump the stack in this one.

> 		kobject_put(&kg->base);
>-		return;
>+		return err;
> 	}
>
> 	gt->sysfs = &kg->base;
>
>-	err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
>-	if (err) {
>-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
>-			 __func__, err);
>-		return;
>-	}
>+	return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
> }
>diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
>index e3ec278ca0be..ecbfcc5c7d42 100644
>--- a/drivers/gpu/drm/xe/xe_gt_sysfs.h
>+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
>@@ -8,7 +8,7 @@
>
> #include "xe_gt_sysfs_types.h"
>
>-void xe_gt_sysfs_init(struct xe_gt *gt);
>+int xe_gt_sysfs_init(struct xe_gt *gt);
>
> static inline struct xe_gt *
> kobj_to_gt(struct kobject *kobj)
>diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
>index 9c33045ff1ef..93d08d682dd8 100644
>--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
>+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
>@@ -236,19 +236,16 @@ static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
> 	sysfs_remove_group(gt->freq, &throttle_group_attrs);
> }
>
>-void xe_gt_throttle_sysfs_init(struct xe_gt *gt)
>+int xe_gt_throttle_sysfs_init(struct xe_gt *gt)
> {
> 	struct xe_device *xe = gt_to_xe(gt);
> 	int err;
>
> 	err = sysfs_create_group(gt->freq, &throttle_group_attrs);
> 	if (err) {
>-		drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err);
>-		return;
>+		drm_err(&xe->drm, "failed to register throttle sysfs, err: %d\n", err);

ditto

Lucas De Marchi

>+		return err;
> 	}
>
>-	err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
>-	if (err)
>-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
>-			 __func__, err);
>+	return drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
> }
>diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
>index 3ecfd4beffe1..6c61e6f228a8 100644
>--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
>+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
>@@ -10,7 +10,7 @@
>
> struct xe_gt;
>
>-void xe_gt_throttle_sysfs_init(struct xe_gt *gt);
>+int xe_gt_throttle_sysfs_init(struct xe_gt *gt);
>
> #endif /* _XE_GT_THROTTLE_SYSFS_H_ */
>
>diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
>index daab970f8be8..62ce4fd1775d 100644
>--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
>+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
>@@ -691,9 +691,9 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
> 		keclass->eclass = hwe->eclass;
> 		err = xe_add_hw_engine_class_defaults(xe, &keclass->base);
> 		if (err) {
>-			drm_warn(&xe->drm,
>-				 "Add .defaults to engines failed!, err: %d\n",
>-				 err);
>+			drm_err(&xe->drm,
>+				"Add .defaults to engines failed!, err: %d\n",
>+				err);
> 			goto err_object;
> 		}
>
>-- 
>2.25.1
>


More information about the Intel-xe mailing list