[PATCH v3 13/13] drm/xe/hwmon: Stop ignoring errors on probe
Lucas De Marchi
lucas.demarchi at intel.com
Mon Feb 10 15:15:36 UTC 2025
On Mon, Feb 10, 2025 at 11:01:15AM +0530, Nilawar, Badal wrote:
>Hi Lucas,
>
>On 08-02-2025 03:49, Lucas De Marchi wrote:
>>Not registering hwmon because it's not available (SRIOV_VF and DGFX) is
>>different from failing the initialization. Handle the errors
>>appropriately.
>>
>>Cc: Badal Nilawar <badal.nilawar at intel.com>
>>Cc: Karthik Poosa <karthik.poosa at intel.com>
>>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>---
>> drivers/gpu/drm/xe/xe_device.c | 4 +++-
>> drivers/gpu/drm/xe/xe_hwmon.c | 31 ++++++++++++++++---------------
>> drivers/gpu/drm/xe/xe_hwmon.h | 4 ++--
>> 3 files changed, 21 insertions(+), 18 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>>index 002e066f5f288..2e6f42d1b1d01 100644
>>--- a/drivers/gpu/drm/xe/xe_device.c
>>+++ b/drivers/gpu/drm/xe/xe_device.c
>>@@ -889,7 +889,9 @@ int xe_device_probe(struct xe_device *xe)
>> xe_debugfs_register(xe);
>>- xe_hwmon_register(xe);
>>+ err = xe_hwmon_register(xe);
>>+ if (err)
>>+ goto err_unregister_display;
>> for_each_gt(gt, xe, id)
>> xe_gt_sanitize_freq(gt);
>>diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
>>index 7f327e3342123..48d80ffdf7bb9 100644
>>--- a/drivers/gpu/drm/xe/xe_hwmon.c
>>+++ b/drivers/gpu/drm/xe/xe_hwmon.c
>>@@ -839,10 +839,9 @@ static const struct hwmon_chip_info hwmon_chip_info = {
>> };
>> static void
>>-xe_hwmon_get_preregistration_info(struct xe_device *xe)
>>+xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
>> {
>>- struct xe_mmio *mmio = xe_root_tile_mmio(xe);
>>- struct xe_hwmon *hwmon = xe->hwmon;
>>+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
>> long energy;
>> u64 val_sku_unit = 0;
>> int channel;
>>@@ -876,33 +875,34 @@ static void xe_hwmon_mutex_destroy(void *arg)
>> mutex_destroy(&hwmon->hwmon_lock);
>> }
>>-void xe_hwmon_register(struct xe_device *xe)
>>+int xe_hwmon_register(struct xe_device *xe)
>> {
>> struct device *dev = xe->drm.dev;
>> struct xe_hwmon *hwmon;
>>+ int ret;
>> /* hwmon is available only for dGfx */
>> if (!IS_DGFX(xe))
>>- return;
>>+ return 0;
>> /* hwmon is not available on VFs */
>> if (IS_SRIOV_VF(xe))
>>- return;
>>+ return 0;
>> hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
>> if (!hwmon)
>>- return;
>>-
>>- xe->hwmon = hwmon;
>>+ return -ENOMEM;
>> mutex_init(&hwmon->hwmon_lock);
>>- if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon))
>>- return;
>>+ ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon);
>>+ if (ret)
>>+ return ret;
>> /* There's only one instance of hwmon per device */
>> hwmon->xe = xe;
>>+ xe->hwmon = hwmon;
>>- xe_hwmon_get_preregistration_info(xe);
>>+ xe_hwmon_get_preregistration_info(hwmon);
>> drm_dbg(&xe->drm, "Register xe hwmon interface\n");
>>@@ -910,11 +910,12 @@ void xe_hwmon_register(struct xe_device *xe)
>> hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon,
>> &hwmon_chip_info,
>> hwmon_groups);
>>-
>> if (IS_ERR(hwmon->hwmon_dev)) {
>>- drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
>>+ drm_err(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
>> xe->hwmon = NULL;
>>- return;
>>+ return PTR_ERR(hwmon->hwmon_dev);
>
>Hwmon is not part of drm subsystem so we didn't tie hwmon registration
>failure with xe probe.
We don't want a half-initialized driver. We never did. The problem is
that we added integration with other subsystems and brought this
initialization-style from i915. We are just hiding bugs.
Lucas De Marchi
>
>Regards,
>Badal
>
>> }
>>+
>>+ return 0;
>> }
>>diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
>>index c42a1de2cd7a2..d02c1bfe8c0a0 100644
>>--- a/drivers/gpu/drm/xe/xe_hwmon.h
>>+++ b/drivers/gpu/drm/xe/xe_hwmon.h
>>@@ -11,9 +11,9 @@
>> struct xe_device;
>> #if IS_REACHABLE(CONFIG_HWMON)
>>-void xe_hwmon_register(struct xe_device *xe);
>>+int xe_hwmon_register(struct xe_device *xe);
>> #else
>>-static inline void xe_hwmon_register(struct xe_device *xe) { };
>>+static inline int xe_hwmon_register(struct xe_device *xe) { return 0; };
>> #endif
>> #endif /* _XE_HWMON_H_ */
More information about the Intel-xe
mailing list