[PATCH v3] drm/xe/hwmon: Update xe hwmon with couple of fixes

Poosa, Karthik karthik.poosa at intel.com
Wed Apr 3 12:52:24 UTC 2024


On 03-04-2024 00:48, Lucas De Marchi wrote:
> On Tue, Apr 02, 2024 at 11:39:56PM +0530, Karthik Poosa wrote:
>> Address potential overflows in result of multiplication of two lower
>> precision (u32) operands before widening it to higher precision (u64).
>>
>> Initialize variables which were being used uninitialized.
>>
>> v2:
>> - Rebase.
>> - In xe_hwmon_process_reg, set argument 'value' to 0 on failure.
>>   With this change caller function need not initialize value to 0.
>>
>> v3:
>> - Update commit msg as per review comments (Rodrigo, Lucas).
>> - Update xe_hwmon_get_reg. Return bool from xe_hwmon_get_reg
>>   and output xe_reg. This is to have abstracted usage of xe_reg. (Lucas)
>>
>> Fixes: 4446fcf220ce ("drm/xe/hwmon: Expose power1_max_interval")
>> Signed-off-by: Karthik Poosa <karthik.poosa at intel.com>
>> Reviewed-by: Anshuman Gupta <anshuman.gupta at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_hwmon.c | 60 ++++++++++++++++++++---------------
>> 1 file changed, 35 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_hwmon.c 
>> b/drivers/gpu/drm/xe/xe_hwmon.c
>> index 7e8caac838e0..8c805ace592c 100644
>> --- a/drivers/gpu/drm/xe/xe_hwmon.c
>> +++ b/drivers/gpu/drm/xe/xe_hwmon.c
>> @@ -79,46 +79,48 @@ struct xe_hwmon {
>>     struct xe_hwmon_energy_info ei[CHANNEL_MAX];
>> };
>>
>> -static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum 
>> xe_hwmon_reg hwmon_reg, int channel)
>> +static bool xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum 
>> xe_hwmon_reg hwmon_reg,
>> +                 int channel, struct xe_reg *reg)
>
> patch 1 could add xe_reg_is_valid(), just like we have for i915. This
> will create the missing abstraction
>
Should this be added in xe_regs.h or xe_hwmon.c ?
> patch 2 could change this function, but rather than returning bool,
> return struct xe_reg (yes, by value). The caller will then do a
> if (!xe_reg_is_valid(reg)) return. This will unbreak the abstraction.
>
There would be a call to xe_reg_is_valid() after every xe_hwmon_get_reg.

Can't this be as part of xe_hwmon_get_reg itself ?

>
>> {
>>     struct xe_device *xe = gt_to_xe(hwmon->gt);
>> -    struct xe_reg reg = XE_REG(0);
>> +
>> +    *reg = XE_REG(0);
>>
>>     switch (hwmon_reg) {
>>     case REG_PKG_RAPL_LIMIT:
>>         if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
>> -            reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
>> +            *reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
>
> with the return by value above you can update all these with
> things like
>
>     if (...)
>         return PVC_GT0_PACKAGE_RAPL_LIMIT;
>     if (...)
>         return PCU_CR_PACKAGE_RAPL_LIMIT;
>
>
>>         else if ((xe->info.platform == XE_DG2) && (channel == 
>> CHANNEL_PKG))
>> -            reg = PCU_CR_PACKAGE_RAPL_LIMIT;
>> +            *reg = PCU_CR_PACKAGE_RAPL_LIMIT;
>>         break;
>>     case REG_PKG_POWER_SKU:
>>         if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
>> -            reg = PVC_GT0_PACKAGE_POWER_SKU;
>> +            *reg = PVC_GT0_PACKAGE_POWER_SKU;
>>         else if ((xe->info.platform == XE_DG2) && (channel == 
>> CHANNEL_PKG))
>> -            reg = PCU_CR_PACKAGE_POWER_SKU;
>> +            *reg = PCU_CR_PACKAGE_POWER_SKU;
>>         break;
>>     case REG_PKG_POWER_SKU_UNIT:
>>         if (xe->info.platform == XE_PVC)
>> -            reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
>> +            *reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
>>         else if (xe->info.platform == XE_DG2)
>> -            reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
>> +            *reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
>>         break;
>>     case REG_GT_PERF_STATUS:
>>         if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG)
>> -            reg = GT_PERF_STATUS;
>> +            *reg = GT_PERF_STATUS;
>>         break;
>>     case REG_PKG_ENERGY_STATUS:
>>         if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
>> -            reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
>> +            *reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
>>         else if ((xe->info.platform == XE_DG2) && (channel == 
>> CHANNEL_PKG))
>> -            reg = PCU_CR_PACKAGE_ENERGY_STATUS;
>> +            *reg = PCU_CR_PACKAGE_ENERGY_STATUS;
>>         break;
>>     default:
>>         drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
>> -        break;
>> +        return false;
>>     }
>>
>> -    return reg.raw;
>> +    return true;
>> }
>>
>> static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum 
>> xe_hwmon_reg hwmon_reg,
>> @@ -127,10 +129,10 @@ static void xe_hwmon_process_reg(struct 
>> xe_hwmon *hwmon, enum xe_hwmon_reg hwmon
>> {
>>     struct xe_reg reg;
>>
>> -    reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg, channel);
>> -
>> -    if (!reg.raw)
>> +    if (!xe_hwmon_get_reg(hwmon, hwmon_reg, channel, &reg)) {
>> +        *value = 0;
>
> setting *value to 0 would be patch 3, with the commit message
> explaining why. Although the code seems weird returning void and setting
> the *value. Why not just return u64 with the value?
>
> The caller is passed an uninitialized value and hoping it's set by the
> callee rather than doing value = xe_hwmon_process_reg() and making sure
> the callee could possibly forget to set.
>
>>         return;
>> +    }
>>
>>     switch (operation) {
>>     case REG_READ32:
>> @@ -143,6 +145,7 @@ static void xe_hwmon_process_reg(struct xe_hwmon 
>> *hwmon, enum xe_hwmon_reg hwmon
>>         *value = xe_mmio_read64_2x32(hwmon->gt, reg);
>>         break;
>>     default:
>> +        *value = 0;
>>         drm_warn(&gt_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg 
>> operation: %d\n",
>>              operation);
>>         break;
>> @@ -298,7 +301,7 @@ xe_hwmon_power_max_interval_show(struct device 
>> *dev, struct device_attribute *at
>>      * As y can be < 2, we compute tau4 = (4 | x) << y
>>      * and then add 2 when doing the final right shift to account for 
>> units
>>      */
>> -    tau4 = ((1 << x_w) | x) << y;
>> +    tau4 = (u64)((1 << x_w) | x) << y;
>>
>>     /* val in hwmon interface units (millisec) */
>>     out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
>> @@ -339,7 +342,7 @@ xe_hwmon_power_max_interval_store(struct device 
>> *dev, struct device_attribute *a
>>     r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
>>     x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
>>     y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
>> -    tau4 = ((1 << x_w) | x) << y;
>> +    tau4 = (u64)((1 << x_w) | x) << y;
>
> as said previously, these 2 hunks are another issue and should be in
> another patch.... make it patch 4
>
>
>
> Lucas De Marchi
>
>>     max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + 
>> x_w);
>>
>>     if (val > max_win)
>> @@ -397,10 +400,11 @@ static umode_t 
>> xe_hwmon_attributes_visible(struct kobject *kobj,
>>     struct device *dev = kobj_to_dev(kobj);
>>     struct xe_hwmon *hwmon = dev_get_drvdata(dev);
>>     int ret = 0;
>> +    struct xe_reg reg;
>>
>>     xe_pm_runtime_get(gt_to_xe(hwmon->gt));
>>
>> -    ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index) ? 
>> attr->mode : 0;
>> +    ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index, &reg) ? 
>> attr->mode : 0;
>>
>>     xe_pm_runtime_put(gt_to_xe(hwmon->gt));
>>
>> @@ -493,19 +497,20 @@ static umode_t
>> xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
>> {
>>     u32 uval;
>> +    struct xe_reg reg;
>>
>>     switch (attr) {
>>     case hwmon_power_max:
>> -        return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel) 
>> ? 0664 : 0;
>> +        return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel, 
>> &reg) ? 0664 : 0;
>>     case hwmon_power_rated_max:
>> -        return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel) ? 
>> 0444 : 0;
>> +        return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel, 
>> &reg) ? 0444 : 0;
>>     case hwmon_power_crit:
>>         if (channel == CHANNEL_PKG)
>>             return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
>>                 !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
>>         break;
>>     case hwmon_power_label:
>> -        return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 
>> channel) ? 0444 : 0;
>> +        return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 
>> channel, &reg) ? 0444 : 0;
>>     default:
>>         return 0;
>>     }
>> @@ -585,10 +590,12 @@ xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 
>> attr, int channel, long val)
>> static umode_t
>> xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
>> {
>> +    struct xe_reg reg;
>> +
>>     switch (attr) {
>>     case hwmon_in_input:
>>     case hwmon_in_label:
>> -        return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel) 
>> ? 0444 : 0;
>> +        return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel, 
>> &reg) ? 0444 : 0;
>>     default:
>>         return 0;
>>     }
>> @@ -609,10 +616,12 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 
>> attr, int channel, long *val)
>> static umode_t
>> xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int 
>> channel)
>> {
>> +    struct xe_reg reg;
>> +
>>     switch (attr) {
>>     case hwmon_energy_input:
>>     case hwmon_energy_label:
>> -        return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 
>> channel) ? 0444 : 0;
>> +        return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, 
>> channel, &reg) ? 0444 : 0;
>>     default:
>>         return 0;
>>     }
>> @@ -758,12 +767,13 @@ xe_hwmon_get_preregistration_info(struct 
>> xe_device *xe)
>>     long energy;
>>     u64 val_sku_unit = 0;
>>     int channel;
>> +    struct xe_reg reg;
>>
>>     /*
>>      * The contents of register PKG_POWER_SKU_UNIT do not change,
>>      * so read it once and store the shift values.
>>      */
>> -    if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0)) {
>> +    if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0, &reg)) {
>>         xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
>>                      REG_READ32, &val_sku_unit, 0, 0, 0);
>>         hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, 
>> val_sku_unit);
>> -- 
>> 2.25.1
>>


More information about the Intel-xe mailing list