[PATCH v3 5/5] drm/xe/hwmon: Read energy status from PMT

Nilawar, Badal badal.nilawar at intel.com
Tue May 6 07:26:35 UTC 2025


On 01-05-2025 02:06, Karthik Poosa wrote:
> Read card and package energy status using pmt apis instead
> of xe_mmio for supported platforms.
> Enable Battlemage to read energy from PMT.
>
> Signed-off-by: Karthik Poosa <karthik.poosa at intel.com>
> ---
>   drivers/gpu/drm/xe/regs/xe_pcode_regs.h |  3 +-
>   drivers/gpu/drm/xe/regs/xe_pmt.h        |  2 ++
>   drivers/gpu/drm/xe/xe_device_types.h    |  2 ++
>   drivers/gpu/drm/xe/xe_hwmon.c           | 46 +++++++++++++++++++------
>   drivers/gpu/drm/xe/xe_pci.c             |  3 ++
>   drivers/gpu/drm/xe/xe_vsec.c            |  4 +--
>   drivers/gpu/drm/xe/xe_vsec.h            |  4 +++
>   7 files changed, 49 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
> index c7d5d782e3f9..487a01455f2a 100644
> --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
> @@ -20,14 +20,13 @@
>   
>   #define BMG_PACKAGE_POWER_SKU			XE_REG(0x138098)
>   #define BMG_PACKAGE_POWER_SKU_UNIT		XE_REG(0x1380dc)
> -#define BMG_PACKAGE_ENERGY_STATUS		XE_REG(0x138120)
>   #define BMG_FAN_1_SPEED				XE_REG(0x138140)
>   #define BMG_FAN_2_SPEED				XE_REG(0x138170)
>   #define BMG_FAN_3_SPEED				XE_REG(0x1381a0)
>   #define BMG_VRAM_TEMPERATURE			XE_REG(0x1382c0)
>   #define BMG_PACKAGE_TEMPERATURE			XE_REG(0x138434)
>   #define BMG_PACKAGE_RAPL_LIMIT			XE_REG(0x138440)
> -#define BMG_PLATFORM_ENERGY_STATUS		XE_REG(0x138458)
>   #define BMG_PLATFORM_POWER_LIMIT		XE_REG(0x138460)
> +#define BMG_ENERGY_STATUS_PMT_OFFSET		(0x30)
>   
>   #endif /* _XE_PCODE_REGS_H_ */
> diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h
> index f45abcd96ba8..ad91f6fef6bb 100644
> --- a/drivers/gpu/drm/xe/regs/xe_pmt.h
> +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h
> @@ -10,6 +10,8 @@
>   #define BMG_PMT_BASE_OFFSET		0xDB000
>   #define BMG_DISCOVERY_OFFSET		(SOC_BASE + BMG_PMT_BASE_OFFSET)
>   
> +#define PUNIT_TELEMETRY_GUID		XE_REG(BMG_DISCOVERY_OFFSET + 0x4)
> +
>   #define BMG_TELEMETRY_BASE_OFFSET	0xE0000
>   #define BMG_TELEMETRY_OFFSET		(SOC_BASE + BMG_TELEMETRY_BASE_OFFSET)
>   
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 2c8321eb41b9..cfac809dd7aa 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -330,6 +330,8 @@ struct xe_device {
>   		 * pcode mailbox commands.
>   		 */
>   		u8 has_mbx_power_limits:1;
> +		/** @info.has_pxp: Device has energy status in PMT */
> +		u8 has_pmt_energy:1;
This flag is not used anywhere, better to drop it.
>   		/** @info.has_pxp: Device has PXP support */
>   		u8 has_pxp:1;
>   		/** @info.has_range_tlb_invalidation: Has range based TLB invalidations */
> diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
> index ada8c15829a3..349eb27d977b 100644
> --- a/drivers/gpu/drm/xe/xe_hwmon.c
> +++ b/drivers/gpu/drm/xe/xe_hwmon.c
> @@ -20,6 +20,8 @@
>   #include "xe_pcode_api.h"
>   #include "xe_sriov.h"
>   #include "xe_pm.h"
> +#include "xe_vsec.h"
> +#include "regs/xe_pmt.h"
>   
>   enum xe_hwmon_reg {
>   	REG_TEMP,
> @@ -263,12 +265,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
>   			return GT_PERF_STATUS;
>   		break;
>   	case REG_PKG_ENERGY_STATUS:
> -		if (xe->info.platform == XE_BATTLEMAGE) {
> -			if (channel == CHANNEL_PKG)
> -				return BMG_PACKAGE_ENERGY_STATUS;
> -			else
> -				return BMG_PLATFORM_ENERGY_STATUS;
> -		} else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
> +		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
>   			return PVC_GT0_PLATFORM_ENERGY_STATUS;
>   		} else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) {
>   			return PCU_CR_PACKAGE_ENERGY_STATUS;
> @@ -449,9 +446,29 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
>   	struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
>   	struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
>   	u64 reg_val;
> +	int ret = 0;
>   
> -	reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
> -							channel));
> +	/* Energy is supported only for card and pkg */
> +	if (channel > CHANNEL_PKG) {
> +		*energy = 0;
> +		return;
> +	}
> +
> +	if (hwmon->xe->info.platform == XE_BATTLEMAGE) {
> +		ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev),
> +					xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID),
> +					&reg_val, BMG_ENERGY_STATUS_PMT_OFFSET,	sizeof(reg_val));
> +		drm_dbg(&hwmon->xe->drm, "energy from pmt, ch %d read from mbx 0x%016llx, ret %d\n",
> +			channel, reg_val, ret);
> +
> +		if (channel == CHANNEL_PKG)
> +			reg_val &= 0xFFFFFFFF;
> +		else
> +			reg_val = reg_val >> 32;

Could you please add the GEN_MASK for this.

Regards,
Badal

> +	} else {
> +		reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
> +								channel));
> +	}
>   
>   	if (reg_val >= ei->reg_val_prev)
>   		ei->accum_energy += reg_val - ei->reg_val_prev;
> @@ -927,11 +944,18 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
>   static umode_t
>   xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
>   {
> +	long energy = 0;
> +
>   	switch (attr) {
>   	case hwmon_energy_input:
>   	case hwmon_energy_label:
> -		return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
> -				       channel)) ? 0444 : 0;
> +		if (hwmon->xe->info.platform == XE_BATTLEMAGE) {
> +			xe_hwmon_energy_get(hwmon, channel, &energy);
> +			return energy ? 0444 : 0;
> +		} else {
> +			return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
> +					       channel)) ? 0444 : 0;
> +		}
>   	default:
>   		return 0;
>   	}
> @@ -1278,4 +1302,4 @@ int xe_hwmon_register(struct xe_device *xe)
>   
>   	return 0;
>   }
> -
> +MODULE_IMPORT_NS("INTEL_PMT_TELEMETRY");
> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> index 95a2a458e8f7..f49452e4ef08 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -67,6 +67,7 @@ struct xe_device_desc {
>   	u8 has_heci_cscfi:1;
>   	u8 has_llc:1;
>   	u8 has_mbx_power_limits:1;
> +	u8 has_pmt_energy:1;
>   	u8 has_pxp:1;
>   	u8 has_sriov:1;
>   	u8 needs_scratch:1;
> @@ -345,6 +346,7 @@ static const struct xe_device_desc bmg_desc = {
>   	.has_display = true,
>   	.has_fan_control = true,
>   	.has_mbx_power_limits = true,
> +	.has_pmt_energy = true,
>   	.has_heci_cscfi = 1,
>   	.needs_scratch = true,
>   };
> @@ -588,6 +590,7 @@ static int xe_info_init_early(struct xe_device *xe,
>   	xe->info.is_dgfx = desc->is_dgfx;
>   	xe->info.has_fan_control = desc->has_fan_control;
>   	xe->info.has_mbx_power_limits = desc->has_mbx_power_limits;
> +	xe->info.has_pmt_energy = desc->has_pmt_energy;
>   	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
>   	xe->info.has_heci_cscfi = desc->has_heci_cscfi;
>   	xe->info.has_llc = desc->has_llc;
> diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c
> index b378848d3b7b..3e573b0b7ebd 100644
> --- a/drivers/gpu/drm/xe/xe_vsec.c
> +++ b/drivers/gpu/drm/xe/xe_vsec.c
> @@ -149,8 +149,8 @@ static int xe_guid_decode(u32 guid, int *index, u32 *offset)
>   	return 0;
>   }
>   
> -static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset,
> -			     u32 count)
> +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset,
> +		      u32 count)
>   {
>   	struct xe_device *xe = pdev_to_xe_device(pdev);
>   	void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET;
> diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h
> index 5777c53faec2..6d0db46d4700 100644
> --- a/drivers/gpu/drm/xe/xe_vsec.h
> +++ b/drivers/gpu/drm/xe/xe_vsec.h
> @@ -4,8 +4,12 @@
>   #ifndef _XE_VSEC_H_
>   #define _XE_VSEC_H_
>   
> +#include "linux/types.h"
> +#include "linux/pci.h"
> +
>   struct xe_device;
>   
>   void xe_vsec_init(struct xe_device *xe);
> +int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count);
>   
>   #endif


More information about the Intel-xe mailing list