[Intel-xe] [PATCH v3 2/6] drm/xe/hwmon: Expose power attributes
Nilawar, Badal
badal.nilawar at intel.com
Fri Aug 4 14:21:33 UTC 2023
On 03-08-2023 04:53, Andi Shyti wrote:
> Hi Badal,
>
> On Wed, Aug 02, 2023 at 07:22:37PM +0530, Badal Nilawar wrote:
>> Expose power_max (pl1) and power_rated_max (tdp) attributes.
>
> can you please write a few words more here to explain the
> interface being exposed and what these powers are?
>
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2023 Intel Corporation
>> + */
>> +
>> +#ifndef _XE_MCHBAR_REGS_H__
>> +#define _XE_MCHBAR_REGS_H_
>
> there is an extra '_' in the ifndef
Sure I will fix this.
>
>> +
>
> [...]
>
>> #include <linux/hwmon.h>
>>
>> #include <drm/drm_managed.h>
>> +#include "regs/xe_mchbar_regs.h"
>> #include "regs/xe_gt_regs.h"
>> #include "xe_device.h"
>> #include "xe_hwmon.h"
>> +#include "xe_mmio.h"
>> +#include "xe_gt.h"
>
> can we keep these in alphabetical order?
Sure
>
>> +enum hwmon_reg_name {
>> + REG_PKG_RAPL_LIMIT,
>> + REG_PKG_POWER_SKU,
>> + REG_PKG_POWER_SKU_UNIT,
>> +};
>
> Are these names or id's? With name I understand string/Can't say ids. I will remove _name prefix to avoid confusion.
>
>> +enum hwmon_reg_operation {
>> + REG_READ,
>> + REG_WRITE,
>> + REG_RMW,
>> +};
>
> I'm not checking on the prefixes here... I let someone more
> experienced than me comment if there anything wrong.
>
>> +/*
>> + * SF_* - scale factors for particular quantities according to hwmon spec.
>> + * - power - microwatts
>> + */
>
> this comment looks a bit off to me, what does
> " - power - microwatts" stand for?
unit of power is microwatts as per hwmon spec.
>
>> +#define SF_POWER 1000000
>>
>> struct xe_hwmon_data {
>> struct device *hwmon_dev;
>> @@ -18,13 +39,268 @@ struct xe_hwmon_data {
>>
>> struct xe_hwmon {
>> struct xe_hwmon_data ddat;
>> - struct mutex hwmon_lock;
>> + struct mutex hwmon_lock; /* rmw operations*/
>
> please put this change in the previous patch.
Sure
>
>> + bool reset_in_progress;
>> + wait_queue_head_t waitq;
>> + int scl_shift_power;
>> };
>>
>> +#define ddat_to_xe_hwmon(ddat) ({ container_of(ddat, struct xe_hwmon, ddat); })
>
> Any particular reason for the ({ ... }) ?
>
>> +static int process_hwmon_reg(struct xe_hwmon_data *ddat, enum hwmon_reg_name reg_name,
>> + enum hwmon_reg_operation operation, u32 *value,
>> + u32 clr, u32 set)
>> +{
>> + struct xe_reg reg;
>> + int ret = 0;
>> +
>> + reg.raw = hwmon_get_reg(ddat, reg_name);
>> +
>> + if (!reg.raw)
>> + return -EOPNOTSUPP;
>> +
>> + switch (operation) {
>> + case REG_READ:
>> + *value = xe_mmio_read32(ddat->gt, reg);
>> + break;
>> + case REG_WRITE:
>> + xe_mmio_write32(ddat->gt, reg, *value);
>> + break;
>> + case REG_RMW:
>> + *value = xe_mmio_rmw32(ddat->gt, reg, clr, set);
>> + break;
>> + default:
>> + XE_MISSING_CASE(operation);
>> + ret = -EOPNOTSUPP;
>
> you could just return 0 or return -EOPNOTSUPP everywhere and save
> "ret" and a return (maybe not needed).
>
> Just a personal preference, feel free to ignro and do as you like
> it.
Sure I will fix this in next rev.
>
>> + break;
>> + }
>> +
>> + return ret;
>> +}
>
> [...]
>
>> +static int hwmon_power_max_read(struct xe_hwmon_data *ddat, long *value)
>> +{
>> + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat);
>> + u32 reg_val;
>> + u64 r, min, max;
>> +
>> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, 0, 0);
>> + /* Check if PL1 limit is disabled */
>> + if (!(reg_val & PKG_PWR_LIM_1_EN)) {
>> + *value = PL1_DISABLE;
>> + return 0;
>> + }
>> +
>> + reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
>> + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
>> +
>> + process_hwmon_reg_read64(ddat, REG_PKG_POWER_SKU, &r);
>> + min = REG_FIELD_GET(PKG_MIN_PWR, r);
>> + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
>> + max = REG_FIELD_GET(PKG_MAX_PWR, r);
>> + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
>> +
>> + if (min && max)
>> + *value = clamp_t(u64, *value, min, max);
>> +
>> + return 0;
>
> you are returning '0' in any case, can we make this void?
Top layer function expects return so added return here.
>
>> +}
>> +
>> +static inline bool check_reset_in_progress(struct xe_hwmon *hwmon)
>> +{
>> + mutex_lock(&hwmon->hwmon_lock);
>> + if (!hwmon->reset_in_progress)
>> + return true;
>> + mutex_unlock(&hwmon->hwmon_lock);
>> + return false;
>
> This is a bit scary (apart from the indentation) and without a
> strong explanation I can't let this go.
>
> I'm pretty sure that we don't need this... can you explain?
In case of guc load not in progress (!reset_in_progress) mutex shouldn't
be unlock, which will get unlocked once rmw operations are over.
Other way could be get mutex_lock after !reset_in_progress but that will
add race.
wait_event(hwmon->waitq, reset_in_progress);
At this place there is posibility that reset_in_progress get set. So
this becomes racy.
mutex_lock(&hwmon->hwmon_lock);
Any better idea to implement this?
>
>> +}
>> +
>> +static int hwmon_power_max_write(struct xe_hwmon_data *ddat, long value)
>> +{
>> + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat);
>> + DEFINE_WAIT(wait);
>> + int ret = 0;
>> + u32 nval;
>> +
>> + /* hwmon->hwmon_lock remain held till rmw operation is over */
>> + wait_event(hwmon->waitq, check_reset_in_progress(hwmon));
>> +
>> + /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
>> + if (value == PL1_DISABLE) {
>> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval,
>> + PKG_PWR_LIM_1_EN, 0);
>> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, &nval,
>> + PKG_PWR_LIM_1_EN, 0);
>> +
>> + if (nval & PKG_PWR_LIM_1_EN)
>> + ret = -ENODEV;
>> + goto unlock;
>> + }
>> +
>> + /* Computation in 64-bits to avoid overflow. Round to nearest. */
>> + nval = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
>> + nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
>> +
>> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval,
>> + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
>> +unlock:
>> + mutex_unlock(&hwmon->hwmon_lock);
>
> Where is this lock taken? Are you relying on the fact that this
> lock might not be taken? In any case it is not allowed to unlock
> a without previously locking.
Lock is taken in check_reset_in_progress();
>
> It's very error prone when you lock in a function and unlock in
> another function and in the rare cases when this is done it has
> to be written in the function name.
Sure I will add comment here.
>
>> + return 0;
>> +}
>> +
>> +static int hwmon_power_rated_max_read(struct xe_hwmon_data *ddat, long *value)
>> +{
>> + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat);
>> + u32 reg_val;
>> +
>> + process_hwmon_reg(ddat, REG_PKG_POWER_SKU, REG_READ, ®_val, 0, 0);
>> + reg_val = REG_FIELD_GET(PKG_PKG_TDP, reg_val);
>> + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
>> +
>> + return 0;
>
> Can this function be void?
Top level function expect return.
>
>> +}
>
> [...]
>
>> +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old)
>> +{
>> + struct xe_hwmon *hwmon = xe->hwmon;
>> + struct xe_hwmon_data *ddat = &hwmon->ddat;
>> + u32 r;
>> +
>> + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT)))
>> + return;
>> +
>> + xe_device_assert_mem_access(gt_to_xe(ddat->gt));
>> +
>> + mutex_lock(&hwmon->hwmon_lock);
>> +
>> + hwmon->reset_in_progress = true;
>> +
>> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r,
>> + PKG_PWR_LIM_1_EN, 0);
>> + *old = !!(r & PKG_PWR_LIM_1_EN);
>
> do we need to place under lock these last to lines?
Yes, want to guard this rmw operation.
>
>> + mutex_unlock(&hwmon->hwmon_lock);
>> +}
>> +
>> +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old)
>> +{
>> + struct xe_hwmon *hwmon = xe->hwmon;
>> + struct xe_hwmon_data *ddat = &hwmon->ddat;
>> + u32 r;
>> +
>> + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT)))
>> + return;
>> +
>> + xe_device_assert_mem_access(gt_to_xe(ddat->gt));
>> +
>> + mutex_lock(&hwmon->hwmon_lock);
>> +
>> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r,
>> + PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
>> +
>> + hwmon->reset_in_progress = false;
>> + wake_up_all(&hwmon->waitq);
>
> does the wake up need to be under lock?
wake up can be added after unlock.
>
> Now... does it eve happen that "check_reset_in_progress()"
> returns false and therefore unlocks the mutex?
Didn't get this? check_reset_in_progress() will keep waiting for mutex
till it is released by this function.
>
>> +
>> + mutex_unlock(&hwmon->hwmon_lock);
>> +}
>
> [...]
>
>> void xe_hwmon_register(struct xe_device *xe)
>> @@ -128,13 +425,16 @@ void xe_hwmon_register(struct xe_device *xe)
>>
>> hwmon_get_preregistration_info(xe);
>>
>> + init_waitqueue_head(&hwmon->waitq);
>> +
>> drm_dbg(&xe->drm, "Register xe hwmon interface\n");
>>
>> - /* hwmon_dev points to device hwmon<i> */
>> + /* hwmon_dev points to device hwmon<i> */
>
> Please this change needs to go in the previous patch.
> What is <i>?
>
>> hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
>> ddat,
>> &hwmon_chip_info,
>> NULL);
>> +
>
> This change in the previous patch.
>
>> if (IS_ERR(hwmon_dev)) {
>> drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));
>> xe->hwmon = NULL;
>> diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
>> index a078eeb0a68b..a5dc693569c5 100644
>> --- a/drivers/gpu/drm/xe/xe_hwmon.h
>> +++ b/drivers/gpu/drm/xe/xe_hwmon.h
>> @@ -14,9 +14,13 @@ struct xe_device;
>> #if IS_REACHABLE(CONFIG_HWMON)
>> void xe_hwmon_register(struct xe_device *xe);
>> void xe_hwmon_unregister(struct xe_device *xe);
>> +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old);
>> +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old);
>> #else
>> static inline void xe_hwmon_register(struct xe_device *xe) { };
>> static inline void xe_hwmon_unregister(struct xe_device *xe) { };
>> +static inline void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) { };
>> +static inline void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) { };
>> #endif
>>
>> #endif /* __XE_HWMON_H__ */
>> diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
>> index daf56c846d03..030296f8f863 100644
>> --- a/drivers/gpu/drm/xe/xe_macros.h
>> +++ b/drivers/gpu/drm/xe/xe_macros.h
>> @@ -15,4 +15,7 @@
>> "Ioctl argument check failed at %s:%d: %s", \
>> __FILE__, __LINE__, #cond), 1))
>>
>> +#define XE_MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
>> + __stringify(x), (long)(x))
>> +
>
> Should this have its own patch?
Sure, I will create separate patch for this.
>
> Andi
>
>> #endif
More information about the Intel-xe
mailing list