[PATCH v3 13/13] drm/xe/hwmon: Stop ignoring errors on probe

Lucas De Marchi lucas.demarchi at intel.com
Mon Feb 10 15:15:36 UTC 2025


On Mon, Feb 10, 2025 at 11:01:15AM +0530, Nilawar, Badal wrote:
>Hi Lucas,
>
>On 08-02-2025 03:49, Lucas De Marchi wrote:
>>Not registering hwmon because it's not available (SRIOV_VF and DGFX) is
>>different from failing the initialization. Handle the errors
>>appropriately.
>>
>>Cc: Badal Nilawar <badal.nilawar at intel.com>
>>Cc: Karthik Poosa <karthik.poosa at intel.com>
>>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>---
>>  drivers/gpu/drm/xe/xe_device.c |  4 +++-
>>  drivers/gpu/drm/xe/xe_hwmon.c  | 31 ++++++++++++++++---------------
>>  drivers/gpu/drm/xe/xe_hwmon.h  |  4 ++--
>>  3 files changed, 21 insertions(+), 18 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>>index 002e066f5f288..2e6f42d1b1d01 100644
>>--- a/drivers/gpu/drm/xe/xe_device.c
>>+++ b/drivers/gpu/drm/xe/xe_device.c
>>@@ -889,7 +889,9 @@ int xe_device_probe(struct xe_device *xe)
>>  	xe_debugfs_register(xe);
>>-	xe_hwmon_register(xe);
>>+	err = xe_hwmon_register(xe);
>>+	if (err)
>>+		goto err_unregister_display;
>>  	for_each_gt(gt, xe, id)
>>  		xe_gt_sanitize_freq(gt);
>>diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
>>index 7f327e3342123..48d80ffdf7bb9 100644
>>--- a/drivers/gpu/drm/xe/xe_hwmon.c
>>+++ b/drivers/gpu/drm/xe/xe_hwmon.c
>>@@ -839,10 +839,9 @@ static const struct hwmon_chip_info hwmon_chip_info = {
>>  };
>>  static void
>>-xe_hwmon_get_preregistration_info(struct xe_device *xe)
>>+xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
>>  {
>>-	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
>>-	struct xe_hwmon *hwmon = xe->hwmon;
>>+	struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
>>  	long energy;
>>  	u64 val_sku_unit = 0;
>>  	int channel;
>>@@ -876,33 +875,34 @@ static void xe_hwmon_mutex_destroy(void *arg)
>>  	mutex_destroy(&hwmon->hwmon_lock);
>>  }
>>-void xe_hwmon_register(struct xe_device *xe)
>>+int xe_hwmon_register(struct xe_device *xe)
>>  {
>>  	struct device *dev = xe->drm.dev;
>>  	struct xe_hwmon *hwmon;
>>+	int ret;
>>  	/* hwmon is available only for dGfx */
>>  	if (!IS_DGFX(xe))
>>-		return;
>>+		return 0;
>>  	/* hwmon is not available on VFs */
>>  	if (IS_SRIOV_VF(xe))
>>-		return;
>>+		return 0;
>>  	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
>>  	if (!hwmon)
>>-		return;
>>-
>>-	xe->hwmon = hwmon;
>>+		return -ENOMEM;
>>  	mutex_init(&hwmon->hwmon_lock);
>>-	if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon))
>>-		return;
>>+	ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon);
>>+	if (ret)
>>+		return ret;
>>  	/* There's only one instance of hwmon per device */
>>  	hwmon->xe = xe;
>>+	xe->hwmon = hwmon;
>>-	xe_hwmon_get_preregistration_info(xe);
>>+	xe_hwmon_get_preregistration_info(hwmon);
>>  	drm_dbg(&xe->drm, "Register xe hwmon interface\n");
>>@@ -910,11 +910,12 @@ void xe_hwmon_register(struct xe_device *xe)
>>  	hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon,
>>  								&hwmon_chip_info,
>>  								hwmon_groups);
>>-
>>  	if (IS_ERR(hwmon->hwmon_dev)) {
>>-		drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
>>+		drm_err(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
>>  		xe->hwmon = NULL;
>>-		return;
>>+		return PTR_ERR(hwmon->hwmon_dev);
>
>Hwmon is not part of drm subsystem so we didn't tie hwmon registration 
>failure with xe probe.

We don't want a half-initialized driver. We never did. The problem is
that we added integration with other subsystems and brought this
initialization-style from i915. We are just hiding bugs.

Lucas De Marchi


>
>Regards,
>Badal
>
>>  	}
>>+
>>+	return 0;
>>  }
>>diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
>>index c42a1de2cd7a2..d02c1bfe8c0a0 100644
>>--- a/drivers/gpu/drm/xe/xe_hwmon.h
>>+++ b/drivers/gpu/drm/xe/xe_hwmon.h
>>@@ -11,9 +11,9 @@
>>  struct xe_device;
>>  #if IS_REACHABLE(CONFIG_HWMON)
>>-void xe_hwmon_register(struct xe_device *xe);
>>+int xe_hwmon_register(struct xe_device *xe);
>>  #else
>>-static inline void xe_hwmon_register(struct xe_device *xe) { };
>>+static inline int xe_hwmon_register(struct xe_device *xe) { return 0; };
>>  #endif
>>  #endif /* _XE_HWMON_H_ */


More information about the Intel-xe mailing list