[PATCH v10 3/5] drm/xe/hwmon: Add support to manage PL2 though mailbox

Rodrigo Vivi rodrigo.vivi at intel.com
Fri May 23 17:16:46 UTC 2025


On Fri, May 23, 2025 at 08:30:12PM +0530, Karthik Poosa wrote:
> Add support to manage power limit PL2 (burst limit) through
> pcode mailbox commands.
> 
> v2:
>  - Update power1_cap definition in hwmon documentation. (Badal)
>  - Clamp PL2 power limit to BIOS default value.
> 
> v3:
>  - Activate the power label when either the PL1 or PL2 power
>    limit is enabled.
> 
> v4:
>  - Update description of pl2_on_boot variable to fix kernel-doc
>    error.
> 
> v5:
>  - Remove unnecessary drm_warn.

Please let's not merge this series and calm down here and stop trying
to rush things in.

If drm_warn is not necessary, please have it in separate commits explaining why.

Think about the use case. When the user are reading 0 because the read failed,
don't we want a splat?
Why it would only appear when debug is enabled?
In this case is zero a meaningful value?

Well, I'm not telling that one way or another is right here, but
I'm seeing that this series is trying hard to bypass CI and rush
things in. We need to calm down here please.

Think about the cases were the reads are failing. Is it really
only when it is enabled? Then shouldn't we check for the enabling
before attempt to read? no clean way to inform the user of errors?

>  - Rectify powerX_label permission to read-only on platforms
>    without mailbox power limits support.
>  - Expose powerX_cap entries only on platforms with mailbox
>    support.
> 
> Signed-off-by: Karthik Poosa <karthik.poosa at intel.com>
> Reviewed-by: Badal Nilawar <badal.nilawar at intel.com>
> ---
>  .../ABI/testing/sysfs-driver-intel-xe-hwmon   | 30 ++++++
>  drivers/gpu/drm/xe/xe_hwmon.c                 | 97 +++++++++++++------
>  2 files changed, 96 insertions(+), 31 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
> index 5a91dcccd3ac..dffd6443664a 100644
> --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
> +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
> @@ -148,3 +148,33 @@ Contact:	intel-xe at lists.freedesktop.org
>  Description:	RO. Fan 3 speed in RPM.
>  
>  		Only supported for particular Intel Xe graphics platforms.
> +
> +What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap
> +Date:		May 2025
> +KernelVersion:	6.15
> +Contact:	intel-xe at lists.freedesktop.org
> +Description:	RW. Card burst (PL2) power limit in microwatts.
> +
> +		The power controller will throttle the operating frequency
> +		if the power averaged over a window (typically milli seconds)
> +		exceeds this limit. A read value of 0 means that the PL2
> +		power limit is disabled, writing 0 disables the	limit.
> +		PL2 is greater than PL1 and its time window is lesser
> +		compared to PL1.
> +
> +		Only supported for particular Intel Xe graphics platforms.
> +
> +What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap
> +Date:		May 2025
> +KernelVersion:	6.15
> +Contact:	intel-xe at lists.freedesktop.org
> +Description:	RW. Package burst (PL2) power limit in microwatts.
> +
> +		The power controller will throttle the operating frequency
> +		if the power averaged over a window (typically milli seconds)
> +		exceeds this limit. A read value of 0 means that the PL2
> +		power limit is disabled, writing 0 disables the	limit.
> +		PL2 is greater than PL1 and its time window is lesser
> +		compared to PL1.
> +
> +		Only supported for particular Intel Xe graphics platforms.
> diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
> index 7cfb233ac9a2..caf6a43ac92b 100644
> --- a/drivers/gpu/drm/xe/xe_hwmon.c
> +++ b/drivers/gpu/drm/xe/xe_hwmon.c
> @@ -51,6 +51,14 @@ enum xe_fan_channel {
>  	FAN_MAX,
>  };
>  
> +/* Attribute index for powerX_xxx_interval sysfs entries */
> +enum sensor_attr_power {
> +	SENSOR_INDEX_PSYS_PL1,
> +	SENSOR_INDEX_PKG_PL1,
> +	SENSOR_INDEX_PSYS_PL2,
> +	SENSOR_INDEX_PKG_PL2,
> +};
> +
>  /*
>   * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is
>   * not supported and below are SKU units to be used.
> @@ -72,8 +80,9 @@ enum xe_fan_channel {
>   * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute.
>   */
>  #define PL1_HWMON_ATTR	hwmon_power_max
> +#define PL2_HWMON_ATTR	hwmon_power_cap
>  
> -#define PWR_ATTR_TO_STR(attr)	(((attr) == hwmon_power_max) ? "PL1" : "Invalid")
> +#define PWR_ATTR_TO_STR(attr)	(((attr) == hwmon_power_max) ? "PL1" : "PL2")
>  
>  /*
>   * Timeout for power limit write mailbox command.
> @@ -124,6 +133,9 @@ struct xe_hwmon {
>  	bool boot_power_limit_read;
>  	/** @pl1_on_boot: power limit PL1 on boot */
>  	u32 pl1_on_boot[CHANNEL_MAX];
> +	/** @pl2_on_boot: power limit PL2 on boot */
> +	u32 pl2_on_boot[CHANNEL_MAX];
> +
>  };
>  
>  static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel,
> @@ -151,8 +163,10 @@ static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 att
>  	/* return the value only if limit is enabled */
>  	if (attr == PL1_HWMON_ATTR)
>  		*uval = (val0 & PWR_LIM_EN) ? val0 : 0;
> +	else if (attr == PL2_HWMON_ATTR)
> +		*uval = (val1 & PWR_LIM_EN) ? val1 : 0;
>  	else if (attr == hwmon_power_label)
> -		*uval = (val0 & PWR_LIM_EN) ? 1 : 0;
> +		*uval = (val0 & PWR_LIM_EN) ? 1 : (val1 & PWR_LIM_EN) ? 1 : 0;
>  	else
>  		*uval = 0;
>  
> @@ -180,6 +194,8 @@ static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 at
>  
>  	if (attr == PL1_HWMON_ATTR)
>  		val0 = uval;
> +	else if (attr == PL2_HWMON_ATTR)
> +		val1 = uval;
>  	else
>  		return -EIO;
>  
> @@ -273,7 +289,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
>   */
>  static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value)
>  {
> -	u64 reg_val, min, max;
> +	u64 reg_val = 0, min, max;
>  	struct xe_device *xe = hwmon->xe;
>  	struct xe_reg rapl_limit, pkg_power_sku;
>  	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
> @@ -301,8 +317,8 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe
>  	/* Check if PL limits are disabled. */
>  	if (!(reg_val & PWR_LIM_EN)) {
>  		*value = PL_DISABLE;
> -		drm_warn(&hwmon->xe->drm, "%s disabled for channel %d !, val 0x%016llx\n",
> -			 PWR_ATTR_TO_STR(attr), channel, reg_val);
> +		drm_dbg(&hwmon->xe->drm, "%s disabled for channel %d\n",  PWR_ATTR_TO_STR(attr),
> +			channel);
>  		goto unlock;
>  	}
>  
> @@ -327,7 +343,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
>  {
>  	struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
>  	int ret = 0;
> -	u32 reg_val;
> +	u32 reg_val, max;
>  	struct xe_reg rapl_limit;
>  
>  	mutex_lock(&hwmon->hwmon_lock);
> @@ -355,20 +371,24 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
>  
>  	/* Computation in 64-bits to avoid overflow. Round to nearest. */
>  	reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
> -	reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val);
>  
>  	/*
>  	 * Clamp power limit to card-firmware default as maximum, as an additional protection to
>  	 * pcode clamp.
>  	 */
>  	if (hwmon->xe->info.has_mbx_power_limits) {
> -		if (reg_val > REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel])) {
> -			reg_val = REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel]);
> +		max = (attr == PL1_HWMON_ATTR) ?
> +		       hwmon->pl1_on_boot[channel] : hwmon->pl2_on_boot[channel];
> +		max = REG_FIELD_PREP(PWR_LIM_VAL, max);
> +		if (reg_val > max) {
> +			reg_val = max;
>  			drm_dbg(&hwmon->xe->drm, "Clamping power limit to firmware default 0x%x\n",
>  				reg_val);
>  		}
>  	}
>  
> +	reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val);
> +
>  	if (hwmon->xe->info.has_mbx_power_limits)
>  		ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val);
>  	else
> @@ -452,8 +472,9 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
>  	struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
>  	u32 x, y, x_w = 2; /* 2 bits */
>  	u64 r, tau4, out;
> -	int channel = to_sensor_dev_attr(attr)->index;
> +	int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD;
>  	u32 power_attr = PL1_HWMON_ATTR;
> +
>  	int ret = 0;
>  
>  	xe_pm_runtime_get(hwmon->xe);
> @@ -506,9 +527,9 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
>  	u32 x, y, rxy, x_w = 2; /* 2 bits */
>  	u64 tau4, r, max_win;
>  	unsigned long val;
> -	int ret;
> -	int channel = to_sensor_dev_attr(attr)->index;
> +	int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD;
>  	u32 power_attr = PL1_HWMON_ATTR;
> +	int ret;
>  
>  	ret = kstrtoul(buf, 0, &val);
>  	if (ret)
> @@ -535,10 +556,8 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
>  	tau4 = (u64)((1 << x_w) | x) << y;
>  	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
>  
> -	if (val > max_win) {
> -		drm_warn(&hwmon->xe->drm, "power_interval invalid val 0x%lx\n", val);
> +	if (val > max_win)
>  		return -EINVAL;
> -	}
>  
>  	/* val in hw units */
>  	val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1;
> @@ -582,11 +601,11 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
>  /* PSYS PL1 */
>  static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
>  			  xe_hwmon_power_max_interval_show,
> -			  xe_hwmon_power_max_interval_store, CHANNEL_CARD);
> -
> +			  xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL1);
> +/* PKG PL1 */
>  static SENSOR_DEVICE_ATTR(power2_max_interval, 0664,
>  			  xe_hwmon_power_max_interval_show,
> -			  xe_hwmon_power_max_interval_store, CHANNEL_PKG);
> +			  xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL1);
>  
>  static struct attribute *hwmon_attributes[] = {
>  	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
> @@ -600,7 +619,7 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
>  	struct device *dev = kobj_to_dev(kobj);
>  	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
>  	int ret = 0;
> -	int channel = index ? CHANNEL_PKG : CHANNEL_CARD;
> +	int channel = (index % 2) ? CHANNEL_PKG : CHANNEL_CARD;
>  	u32 power_attr = PL1_HWMON_ATTR;
>  	u32 uval;
>  
> @@ -609,7 +628,7 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
>  	if (hwmon->xe->info.has_mbx_power_limits) {
>  		xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval);
>  		ret = (uval & PWR_LIM_EN) ? attr->mode : 0;
> -	} else {
> +	} else if (power_attr != PL2_HWMON_ATTR) {
>  		ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
>  						       channel)) ? attr->mode : 0;
>  	}
> @@ -632,8 +651,9 @@ static const struct attribute_group *hwmon_groups[] = {
>  static const struct hwmon_channel_info * const hwmon_info[] = {
>  	HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL,
>  			   HWMON_T_INPUT | HWMON_T_LABEL),
> -	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT,
> -			   HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL),
> +	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT |
> +			   HWMON_P_CAP,
> +			   HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP),
>  	HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL),
>  	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL),
>  	HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL),
> @@ -754,17 +774,22 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
>  static umode_t
>  xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
>  {
> -	u32 uval;
> +	u32 uval = 0;
> +	struct xe_reg rapl_limit;
> +	struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
>  
>  	switch (attr) {
>  	case hwmon_power_max:
> +	case hwmon_power_cap:
> +	case hwmon_power_label:
>  		if (hwmon->xe->info.has_mbx_power_limits) {
>  			xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval);
> -			return (uval) ? 0664 : 0;
> -		} else {
> -			return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
> -				       channel)) ? 0664 : 0;
> +		} else if (attr != PL2_HWMON_ATTR) {
> +			rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
> +			if (xe_reg_is_valid(rapl_limit))
> +				uval = xe_mmio_read32(mmio, rapl_limit);
>  		}
> +		return (uval) ? (attr == hwmon_power_label) ? 0444 : 0664 : 0;
>  	case hwmon_power_rated_max:
>  		if (hwmon->xe->info.has_mbx_power_limits)
>  			return 0;
> @@ -772,11 +797,9 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
>  			return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU,
>  					       channel)) ? 0444 : 0;
>  	case hwmon_power_crit:
> -	case hwmon_power_label:
>  		if (channel == CHANNEL_CARD) {
>  			xe_hwmon_pcode_read_i1(hwmon, &uval);
> -			return (uval & POWER_SETUP_I1_WATTS) ? (attr == hwmon_power_label) ?
> -				0444 : 0644 : 0;
> +			return (uval & POWER_SETUP_I1_WATTS) ? 0644 : 0;
>  		}
>  		break;
>  	default:
> @@ -790,6 +813,7 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
>  {
>  	switch (attr) {
>  	case hwmon_power_max:
> +	case hwmon_power_cap:
>  		xe_hwmon_power_max_read(hwmon, attr, channel, val);
>  		return 0;
>  	case hwmon_power_rated_max:
> @@ -806,6 +830,7 @@ static int
>  xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
>  {
>  	switch (attr) {
> +	case hwmon_power_cap:
>  	case hwmon_power_max:
>  		return xe_hwmon_power_max_write(hwmon, attr, channel, val);
>  	case hwmon_power_crit:
> @@ -1132,7 +1157,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
>  		if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD,
>  						    &hwmon->pl1_on_boot[CHANNEL_CARD]) |
>  		    xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG,
> -						    &hwmon->pl1_on_boot[CHANNEL_PKG])) {
> +						    &hwmon->pl1_on_boot[CHANNEL_PKG]) |
> +		    xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_CARD,
> +						    &hwmon->pl2_on_boot[CHANNEL_CARD]) |
> +		    xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG,
> +						    &hwmon->pl2_on_boot[CHANNEL_PKG])) {
>  			drm_warn(&hwmon->xe->drm,
>  				 "Failed to read power limits, check card firmware !\n");
>  		} else {
> @@ -1144,6 +1173,12 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
>  			xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR,
>  							 CHANNEL_PKG,
>  							 hwmon->pl1_on_boot[CHANNEL_PKG]);
> +			xe_hwmon_pcode_write_power_limit(hwmon, PL2_HWMON_ATTR,
> +							 CHANNEL_CARD,
> +							 hwmon->pl2_on_boot[CHANNEL_CARD]);
> +			xe_hwmon_pcode_write_power_limit(hwmon, PL2_HWMON_ATTR,
> +							 CHANNEL_PKG,
> +							 hwmon->pl2_on_boot[CHANNEL_PKG]);
>  			hwmon->scl_shift_power = PWR_UNIT;
>  			hwmon->scl_shift_energy = ENERGY_UNIT;
>  			hwmon->scl_shift_time = TIME_UNIT;
> -- 
> 2.25.1
> 


More information about the Intel-xe mailing list