[PATCH v3] drm/xe/hwmon: Update xe hwmon with couple of fixes

Lucas De Marchi lucas.demarchi at intel.com
Tue Apr 2 19:18:21 UTC 2024


On Tue, Apr 02, 2024 at 11:39:56PM +0530, Karthik Poosa wrote:
>Address potential overflows in result of multiplication of two lower
>precision (u32) operands before widening it to higher precision (u64).
>
>Initialize variables which were being used uninitialized.
>
>v2:
> - Rebase.
> - In xe_hwmon_process_reg, set argument 'value' to 0 on failure.
>   With this change caller function need not initialize value to 0.
>
>v3:
> - Update commit msg as per review comments (Rodrigo, Lucas).
> - Update xe_hwmon_get_reg. Return bool from xe_hwmon_get_reg
>   and output xe_reg. This is to have abstracted usage of xe_reg. (Lucas)
>
>Fixes: 4446fcf220ce ("drm/xe/hwmon: Expose power1_max_interval")
>Signed-off-by: Karthik Poosa <karthik.poosa at intel.com>
>Reviewed-by: Anshuman Gupta <anshuman.gupta at intel.com>
>---
> drivers/gpu/drm/xe/xe_hwmon.c | 60 ++++++++++++++++++++---------------
> 1 file changed, 35 insertions(+), 25 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
>index 7e8caac838e0..8c805ace592c 100644
>--- a/drivers/gpu/drm/xe/xe_hwmon.c
>+++ b/drivers/gpu/drm/xe/xe_hwmon.c
>@@ -79,46 +79,48 @@ struct xe_hwmon {
> 	struct xe_hwmon_energy_info ei[CHANNEL_MAX];
> };
>
>-static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, int channel)
>+static bool xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
>+			     int channel, struct xe_reg *reg)

patch 1 could add xe_reg_is_valid(), just like we have for i915. This
will create the missing abstraction

patch 2 could change this function, but rather than returning bool,
return struct xe_reg (yes, by value). The caller will then do a
if (!xe_reg_is_valid(reg)) return. This will unbreak the abstraction.


> {
> 	struct xe_device *xe = gt_to_xe(hwmon->gt);
>-	struct xe_reg reg = XE_REG(0);
>+
>+	*reg = XE_REG(0);
>
> 	switch (hwmon_reg) {
> 	case REG_PKG_RAPL_LIMIT:
> 		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
>-			reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
>+			*reg = PVC_GT0_PACKAGE_RAPL_LIMIT;

with the return by value above you can update all these with
things like

	if (...)
		return PVC_GT0_PACKAGE_RAPL_LIMIT;
	if (...)
		return PCU_CR_PACKAGE_RAPL_LIMIT;


> 		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
>-			reg = PCU_CR_PACKAGE_RAPL_LIMIT;
>+			*reg = PCU_CR_PACKAGE_RAPL_LIMIT;
> 		break;
> 	case REG_PKG_POWER_SKU:
> 		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
>-			reg = PVC_GT0_PACKAGE_POWER_SKU;
>+			*reg = PVC_GT0_PACKAGE_POWER_SKU;
> 		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
>-			reg = PCU_CR_PACKAGE_POWER_SKU;
>+			*reg = PCU_CR_PACKAGE_POWER_SKU;
> 		break;
> 	case REG_PKG_POWER_SKU_UNIT:
> 		if (xe->info.platform == XE_PVC)
>-			reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
>+			*reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
> 		else if (xe->info.platform == XE_DG2)
>-			reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
>+			*reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
> 		break;
> 	case REG_GT_PERF_STATUS:
> 		if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG)
>-			reg = GT_PERF_STATUS;
>+			*reg = GT_PERF_STATUS;
> 		break;
> 	case REG_PKG_ENERGY_STATUS:
> 		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
>-			reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
>+			*reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
> 		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
>-			reg = PCU_CR_PACKAGE_ENERGY_STATUS;
>+			*reg = PCU_CR_PACKAGE_ENERGY_STATUS;
> 		break;
> 	default:
> 		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
>-		break;
>+		return false;
> 	}
>
>-	return reg.raw;
>+	return true;
> }
>
> static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
>@@ -127,10 +129,10 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon
> {
> 	struct xe_reg reg;
>
>-	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg, channel);
>-
>-	if (!reg.raw)
>+	if (!xe_hwmon_get_reg(hwmon, hwmon_reg, channel, &reg)) {
>+		*value = 0;

setting *value to 0 would be patch 3, with the commit message
explaining why. Although the code seems weird returning void and setting
the *value. Why not just return u64 with the value?

The caller is passed an uninitialized value and hoping it's set by the
callee rather than doing value = xe_hwmon_process_reg() and making sure
the callee could possibly forget to set.

> 		return;
>+	}
>
> 	switch (operation) {
> 	case REG_READ32:
>@@ -143,6 +145,7 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon
> 		*value = xe_mmio_read64_2x32(hwmon->gt, reg);
> 		break;
> 	default:
>+		*value = 0;
> 		drm_warn(&gt_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n",
> 			 operation);
> 		break;
>@@ -298,7 +301,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
> 	 * As y can be < 2, we compute tau4 = (4 | x) << y
> 	 * and then add 2 when doing the final right shift to account for units
> 	 */
>-	tau4 = ((1 << x_w) | x) << y;
>+	tau4 = (u64)((1 << x_w) | x) << y;
>
> 	/* val in hwmon interface units (millisec) */
> 	out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
>@@ -339,7 +342,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
> 	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
> 	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
> 	y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
>-	tau4 = ((1 << x_w) | x) << y;
>+	tau4 = (u64)((1 << x_w) | x) << y;

as said previously, these 2 hunks are another issue and should be in
another patch.... make it patch 4



Lucas De Marchi

> 	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
>
> 	if (val > max_win)
>@@ -397,10 +400,11 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
> 	struct device *dev = kobj_to_dev(kobj);
> 	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
> 	int ret = 0;
>+	struct xe_reg reg;
>
> 	xe_pm_runtime_get(gt_to_xe(hwmon->gt));
>
>-	ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index) ? attr->mode : 0;
>+	ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index, &reg) ? attr->mode : 0;
>
> 	xe_pm_runtime_put(gt_to_xe(hwmon->gt));
>
>@@ -493,19 +497,20 @@ static umode_t
> xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
> {
> 	u32 uval;
>+	struct xe_reg reg;
>
> 	switch (attr) {
> 	case hwmon_power_max:
>-		return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel) ? 0664 : 0;
>+		return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel, &reg) ? 0664 : 0;
> 	case hwmon_power_rated_max:
>-		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel) ? 0444 : 0;
>+		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel, &reg) ? 0444 : 0;
> 	case hwmon_power_crit:
> 		if (channel == CHANNEL_PKG)
> 			return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
> 				!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
> 		break;
> 	case hwmon_power_label:
>-		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, channel) ? 0444 : 0;
>+		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, channel, &reg) ? 0444 : 0;
> 	default:
> 		return 0;
> 	}
>@@ -585,10 +590,12 @@ xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
> static umode_t
> xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
> {
>+	struct xe_reg reg;
>+
> 	switch (attr) {
> 	case hwmon_in_input:
> 	case hwmon_in_label:
>-		return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel) ? 0444 : 0;
>+		return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel, &reg) ? 0444 : 0;
> 	default:
> 		return 0;
> 	}
>@@ -609,10 +616,12 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
> static umode_t
> xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
> {
>+	struct xe_reg reg;
>+
> 	switch (attr) {
> 	case hwmon_energy_input:
> 	case hwmon_energy_label:
>-		return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, channel) ? 0444 : 0;
>+		return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, channel, &reg) ? 0444 : 0;
> 	default:
> 		return 0;
> 	}
>@@ -758,12 +767,13 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
> 	long energy;
> 	u64 val_sku_unit = 0;
> 	int channel;
>+	struct xe_reg reg;
>
> 	/*
> 	 * The contents of register PKG_POWER_SKU_UNIT do not change,
> 	 * so read it once and store the shift values.
> 	 */
>-	if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0)) {
>+	if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0, &reg)) {
> 		xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
> 				     REG_READ32, &val_sku_unit, 0, 0, 0);
> 		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
>-- 
>2.25.1
>


More information about the Intel-xe mailing list