[PATCH v3 5/7] drm/xe/gt: Abort driver load for sysfs creation failure

Ghimiray, Himal Prasad himal.prasad.ghimiray at intel.com
Fri Apr 12 15:38:50 UTC 2024


On 12-04-2024 19:00, Lucas De Marchi wrote:
> On Fri, Apr 12, 2024 at 01:32:43PM +0530, Himal Prasad Ghimiray wrote:
>> Instead of allowing the driver to load with incomplete sysfs entries in
>> case of sysfs creation failure, we should terminate the driver loading.
>> This change ensures that the status of all gt associated sysfs entries
>> creation is relayed to xe_gt_init, leading to a driver load abort if any
>> sysfs creation failures occur.
>>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_gt.c                    | 16 ++++++----
>> drivers/gpu/drm/xe/xe_gt_freq.c               | 29 ++++++++++---------
>> drivers/gpu/drm/xe/xe_gt_freq.h               |  2 +-
>> drivers/gpu/drm/xe/xe_gt_idle.c               | 17 ++++-------
>> drivers/gpu/drm/xe/xe_gt_idle.h               |  2 +-
>> drivers/gpu/drm/xe/xe_gt_sysfs.c              | 15 ++++------
>> drivers/gpu/drm/xe/xe_gt_sysfs.h              |  2 +-
>> drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c     | 11 +++----
>> drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h     |  2 +-
>> drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c |  6 ++--
>> 10 files changed, 47 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index dca0e9fb3315..089e4513c7e3 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -360,7 +360,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
>>             xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
>>     }
>>
>> -    xe_gt_idle_sysfs_init(&gt->gtidle);
>> +    err = xe_gt_idle_sysfs_init(&gt->gtidle);
>> +    if (err)
>> +        goto err_gt_sysfs_create;
>
> see comment on previous patch about just re-using the current label.
>
>>
>>     /* Enable per hw engine IRQs */
>>     xe_irq_enable_hwe(gt);
>> @@ -374,9 +376,7 @@ static int gt_fw_domain_init(struct xe_gt *gt)
>>
>>     err = xe_hw_engine_class_sysfs_init(gt);
>>     if (err)
>> -        drm_warn(&gt_to_xe(gt)->drm,
>> -             "failed to register engines sysfs directory, err: %d\n",
>> -             err);
>> +        goto err_gt_sysfs_create;
>>
>>     /* Initialize CCS mode sysfs after early initialization of HW 
>> engines */
>>     err = xe_gt_ccs_mode_sysfs_init(gt);
>> @@ -549,13 +549,17 @@ int xe_gt_init(struct xe_gt *gt)
>>
>>     xe_mocs_init_early(gt);
>>
>> -    xe_gt_sysfs_init(gt);
>> +    err = xe_gt_sysfs_init(gt);
>> +    if (err)
>> +        return err;
>>
>>     err = gt_fw_domain_init(gt);
>>     if (err)
>>         return err;
>>
>> -    xe_gt_freq_init(gt);
>> +    err = xe_gt_freq_init(gt);
>> +    if (err)
>> +        return err;
>>
>>     xe_force_wake_init_engines(gt, gt_to_fw(gt));
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c 
>> b/drivers/gpu/drm/xe/xe_gt_freq.c
>> index 32b9a743629c..a0c57332d362 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_freq.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_freq.c
>> @@ -222,33 +222,34 @@ static void freq_fini(struct drm_device *drm, 
>> void *arg)
>>  * @gt: Xe GT object
>>  *
>>  * It needs to be initialized after GT Sysfs and GuC PC components 
>> are ready.
>> + *
>> + * Returns: Returns error value for failure and 0 for success.
>>  */
>> -void xe_gt_freq_init(struct xe_gt *gt)
>> +int xe_gt_freq_init(struct xe_gt *gt)
>> {
>>     struct xe_device *xe = gt_to_xe(gt);
>>     int err;
>>
>>     if (xe->info.skip_guc_pc)
>> -        return;
>> +        return 0;
>>
>>     gt->freq = kobject_create_and_add("freq0", gt->sysfs);
>>     if (!gt->freq) {
>> -        drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
>> -             kobject_name(gt->sysfs));
>> -        return;
>> +        drm_err(&xe->drm, "failed to add freq0 directory to %s\n",
>> +            kobject_name(gt->sysfs));
>
> lib/kobject.c:kobject_create_and_add()
>
>         retval = kobject_add(kobj, parent, "%s", 
> name);                              if (retval) {                 
> pr_warn("%s: kobject_add error: %d\n", __func__, retval);
> and the other case return without log message is about memory allocation
> fail. It's kernel policy not to log on that. So just remove the message
> here rather than s/warn/err/.
>
>> +        return -ENOMEM;
>>     }
>>
>>     err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
>> -    if (err) {
>> -        drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, 
>> err: %d\n",
>> -             __func__, err);
>> -        return;
>> -    }
>> +    if (err)
>> +        return err;
>>
>>     err = sysfs_create_files(gt->freq, freq_attrs);
>> -    if (err)
>> -        drm_warn(&xe->drm,  "failed to add freq attrs to %s, err: 
>> %d\n",
>> -             kobject_name(gt->freq), err);
>> +    if (err) {
>> +        drm_err(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
>> +            kobject_name(gt->freq), err);
>> +        return err;
>
> same as above:
>
> fs/sysfs/file.c:sysfs_create_file_ns()
>
>         if (WARN_ON(!kobj || !kobj->sd || !attr))
>                 return -EINVAL;
>
>> +    }
>>
>> -    xe_gt_throttle_sysfs_init(gt);
>> +    return xe_gt_throttle_sysfs_init(gt);
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h 
>> b/drivers/gpu/drm/xe/xe_gt_freq.h
>> index f3fe3c90491a..b7fddbe7b9b6 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_freq.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_freq.h
>> @@ -8,6 +8,6 @@
>>
>> struct xe_gt;
>>
>> -void xe_gt_freq_init(struct xe_gt *gt);
>> +int xe_gt_freq_init(struct xe_gt *gt);
>>
>> #endif
>> diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c 
>> b/drivers/gpu/drm/xe/xe_gt_idle.c
>> index bc1426f8d731..581c8ce75858 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_idle.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_idle.c
>> @@ -152,7 +152,7 @@ static void gt_idle_sysfs_fini(struct drm_device 
>> *drm, void *arg)
>>     kobject_put(kobj);
>> }
>>
>> -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
>> +int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
>> {
>>     struct xe_gt *gt = gtidle_to_gt(gtidle);
>>     struct xe_device *xe = gt_to_xe(gt);
>> @@ -160,10 +160,8 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle 
>> *gtidle)
>>     int err;
>>
>>     kobj = kobject_create_and_add("gtidle", gt->sysfs);
>> -    if (!kobj) {
>> -        drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM);
>> -        return;
>> -    }
>> +    if (!kobj)
>> +        return -ENOMEM;
>>
>>     if (xe_gt_is_media_type(gt)) {
>>         snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", 
>> gt->info.id);
>> @@ -180,14 +178,11 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle 
>> *gtidle)
>>     err = sysfs_create_files(kobj, gt_idle_attrs);
>>     if (err) {
>>         kobject_put(kobj);
>> -        drm_warn(&xe->drm, "failed to register gtidle sysfs, err: 
>> %d\n", err);
>> -        return;
>> +        drm_err(&xe->drm, "failed to register gtidle sysfs, err: 
>> %d\n", err);
>
> ditto
>
>> +        return err;
>>     }
>>
>> -    err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
>> -    if (err)
>> -        drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, 
>> err: %d\n",
>> -             __func__, err);
>> +    return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, 
>> kobj);
>> }
>>
>> void xe_gt_idle_enable_c6(struct xe_gt *gt)
>> diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h 
>> b/drivers/gpu/drm/xe/xe_gt_idle.h
>> index 69280fd16b03..75bd99659b1b 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_idle.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_idle.h
>> @@ -10,7 +10,7 @@
>>
>> struct xe_gt;
>>
>> -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
>> +int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
>> void xe_gt_idle_enable_c6(struct xe_gt *gt);
>> void xe_gt_idle_disable_c6(struct xe_gt *gt);
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c 
>> b/drivers/gpu/drm/xe/xe_gt_sysfs.c
>> index c69d2e8a0fe1..21ed87715da7 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
>> @@ -29,7 +29,7 @@ static void gt_sysfs_fini(struct drm_device *drm, 
>> void *arg)
>>     kobject_put(gt->sysfs);
>> }
>>
>> -void xe_gt_sysfs_init(struct xe_gt *gt)
>> +int xe_gt_sysfs_init(struct xe_gt *gt)
>> {
>>     struct xe_tile *tile = gt_to_tile(gt);
>>     struct xe_device *xe = gt_to_xe(gt);
>> @@ -38,24 +38,19 @@ void xe_gt_sysfs_init(struct xe_gt *gt)
>>
>>     kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>>     if (!kg)
>> -        return;
>> +        return -ENOMEM;
>>
>>     kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
>>     kg->gt = gt;
>>
>>     err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
>>     if (err) {
>> -        drm_warn(&xe->drm, "failed to add GT sysfs directory, err: 
>> %d\n", err);
>> +        drm_err(&xe->drm, "failed to add GT sysfs directory, err: 
>> %d\n", err);
>
> looking at lib/kobject.c, we can apply the same reasoning as above.
> We even dump the stack in this one.
>
>>         kobject_put(&kg->base);
>> -        return;
>> +        return err;
>>     }
>>
>>     gt->sysfs = &kg->base;
>>
>> -    err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
>> -    if (err) {
>> -        drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, 
>> err: %d\n",
>> -             __func__, err);
>> -        return;
>> -    }
>> +    return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h 
>> b/drivers/gpu/drm/xe/xe_gt_sysfs.h
>> index e3ec278ca0be..ecbfcc5c7d42 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sysfs.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
>> @@ -8,7 +8,7 @@
>>
>> #include "xe_gt_sysfs_types.h"
>>
>> -void xe_gt_sysfs_init(struct xe_gt *gt);
>> +int xe_gt_sysfs_init(struct xe_gt *gt);
>>
>> static inline struct xe_gt *
>> kobj_to_gt(struct kobject *kobj)
>> diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c 
>> b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
>> index 9c33045ff1ef..93d08d682dd8 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
>> @@ -236,19 +236,16 @@ static void gt_throttle_sysfs_fini(struct 
>> drm_device *drm, void *arg)
>>     sysfs_remove_group(gt->freq, &throttle_group_attrs);
>> }
>>
>> -void xe_gt_throttle_sysfs_init(struct xe_gt *gt)
>> +int xe_gt_throttle_sysfs_init(struct xe_gt *gt)
>> {
>>     struct xe_device *xe = gt_to_xe(gt);
>>     int err;
>>
>>     err = sysfs_create_group(gt->freq, &throttle_group_attrs);
>>     if (err) {
>> -        drm_warn(&xe->drm, "failed to register throttle sysfs, err: 
>> %d\n", err);
>> -        return;
>> +        drm_err(&xe->drm, "failed to register throttle sysfs, err: 
>> %d\n", err);
>
> ditto
Noted. Will fix in next version
>
> Lucas De Marchi
>
>> +        return err;
>>     }
>>
>> -    err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, 
>> gt);
>> -    if (err)
>> -        drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, 
>> err: %d\n",
>> -             __func__, err);
>> +    return drmm_add_action_or_reset(&xe->drm, 
>> gt_throttle_sysfs_fini, gt);
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h 
>> b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
>> index 3ecfd4beffe1..6c61e6f228a8 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
>> @@ -10,7 +10,7 @@
>>
>> struct xe_gt;
>>
>> -void xe_gt_throttle_sysfs_init(struct xe_gt *gt);
>> +int xe_gt_throttle_sysfs_init(struct xe_gt *gt);
>>
>> #endif /* _XE_GT_THROTTLE_SYSFS_H_ */
>>
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c 
>> b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
>> index daab970f8be8..62ce4fd1775d 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
>> @@ -691,9 +691,9 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
>>         keclass->eclass = hwe->eclass;
>>         err = xe_add_hw_engine_class_defaults(xe, &keclass->base);
>>         if (err) {
>> -            drm_warn(&xe->drm,
>> -                 "Add .defaults to engines failed!, err: %d\n",
>> -                 err);
>> +            drm_err(&xe->drm,
>> +                "Add .defaults to engines failed!, err: %d\n",
>> +                err);
>>             goto err_object;
>>         }
>>
>> -- 
>> 2.25.1
>>


More information about the Intel-xe mailing list