[PATCH v3 11/11] platform/x86/intel/pmt: support BMG crashlog

Ruhl, Michael J michael.j.ruhl at intel.com
Mon Jun 9 14:45:54 UTC 2025



>-----Original Message-----
>From: David E. Box <david.e.box at linux.intel.com>
>Sent: Friday, June 6, 2025 7:21 PM
>To: Ruhl, Michael J <michael.j.ruhl at intel.com>; platform-driver-
>x86 at vger.kernel.org; intel-xe at lists.freedesktop.org; hdegoede at redhat.com;
>ilpo.jarvinen at linux.intel.com; De Marchi, Lucas <lucas.demarchi at intel.com>;
>Vivi, Rodrigo <rodrigo.vivi at intel.com>; thomas.hellstrom at linux.intel.com;
>airlied at gmail.com; simona at ffwll.ch
>Subject: Re: [PATCH v3 11/11] platform/x86/intel/pmt: support BMG crashlog
>
>On Thu, 2025-06-05 at 14:44 -0400, Michael J. Ruhl wrote:
>> The Battlemage GPU has the type 1 version 2 crashlog feature.
>>
>> Update the crashlog driver to support this crashlog version.
>>
>> Signed-off-by: Michael J. Ruhl <michael.j.ruhl at intel.com>
>> ---
>>  drivers/platform/x86/intel/pmt/crashlog.c | 283 ++++++++++++++++++++-
>-
>>  1 file changed, 264 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/platform/x86/intel/pmt/crashlog.c
>> b/drivers/platform/x86/intel/pmt/crashlog.c
>> index fe6563721886..0fb60036a9bb 100644
>> --- a/drivers/platform/x86/intel/pmt/crashlog.c
>> +++ b/drivers/platform/x86/intel/pmt/crashlog.c
>> @@ -51,20 +51,53 @@
>>  #define TYPE1_VER0_COMPLETE		BIT(31)
>>  #define TYPE1_VER0_TRIGGER_MASK		GENMASK(31, 28)
>>
>> +/*
>> + * Type 1 Version 2
>> + * status and control are two different registers
>> + */
>> +#define TYPE1_VER2_STATUS_OFFSET	0x00
>> +#define TYPE1_VER2_CONTROL_OFFSET	0x14
>> +
>> +/* status register */
>> +#define TYPE1_VER2_CLEAR_SUPPORT	BIT(20)
>> +#define TYPE1_VER2_REARMED		BIT(25)
>> +#define TYPE1_VER2_ERROR		BIT(26)
>> +#define TYPE1_VER2_CONSUMED		BIT(27)
>> +#define TYPE1_VER2_DISABLED		BIT(28)
>> +#define TYPE1_VER2_CLEARED		BIT(29)
>> +#define TYPE1_VER2_IN_PROGRESS		BIT(30)
>> +#define TYPE1_VER2_COMPLETE		BIT(31)
>> +
>> +/* control register */
>> +#define TYPE1_VER2_CONSUME		BIT(25)
>> +#define TYPE1_VER2_REARM		BIT(28)
>> +#define TYPE1_VER2_EXECUTE		BIT(29)
>> +#define TYPE1_VER2_CLEAR		BIT(30)
>> +#define TYPE1_VER2_DISABLE		BIT(31)
>> +#define TYPE1_VER2_TRIGGER_MASK
>	(TYPE1_VER2_CONSUME |
>> TYPE1_VER2_EXECUTE | \
>> +					 TYPE1_VER2_CLEAR |
>> TYPE1_VER2_DISABLE)
>> +
>>  /* After offset, order alphabetically, not bit ordered */
>>  struct crashlog_status {
>>  	u32 offset;
>> -	u32 clear;
>> +	u32 clear_supported;
>> +	u32 cleared;
>>  	u32 complete;
>> -	u32 disable;
>> +	u32 consumed;
>> +	u32 disabled;
>> +	u32 error;
>> +	u32 in_progress;
>> +	u32 rearmed;
>>  };
>
>Since this struct was just introduced, better to use the intended names in the
>original patch to avoid unnecessary churn.
>
>>
>>  struct crashlog_control {
>>  	u32 offset;
>>  	u32 trigger_mask;
>>  	u32 clear;
>> +	u32 consume;
>>  	u32 disable;
>>  	u32 manual;
>> +	u32 rearm;
>>  };
>>
>>  struct crashlog_info {
>> @@ -74,9 +107,9 @@ struct crashlog_info {
>>
>>  const struct crashlog_info crashlog_type1_ver0 = {
>>  	.status.offset = TYPE1_VER0_STATUS_OFFSET,
>> -	.status.clear = TYPE1_VER0_CLEAR,
>> +	.status.cleared = TYPE1_VER0_CLEAR,
>>  	.status.complete = TYPE1_VER0_COMPLETE,
>> -	.status.disable = TYPE1_VER0_DISABLE,
>> +	.status.disabled = TYPE1_VER0_DISABLE,
>>
>>  	.control.offset = TYPE1_VER0_CONTROL_OFFSET,
>>  	.control.trigger_mask = TYPE1_VER0_TRIGGER_MASK,
>> @@ -85,6 +118,26 @@ const struct crashlog_info crashlog_type1_ver0 = {
>>  	.control.manual = TYPE1_VER0_EXECUTE,
>>  };
>>
>> +const struct crashlog_info crashlog_type1_ver2 = {
>> +	.status.offset = TYPE1_VER2_STATUS_OFFSET,
>> +	.status.clear_supported = TYPE1_VER2_CLEAR_SUPPORT,
>> +	.status.cleared = TYPE1_VER2_CLEARED,
>> +	.status.complete = TYPE1_VER2_COMPLETE,
>> +	.status.consumed = TYPE1_VER2_CONSUMED,
>> +	.status.disabled = TYPE1_VER2_DISABLED,
>> +	.status.error = TYPE1_VER2_ERROR,
>> +	.status.in_progress = TYPE1_VER2_IN_PROGRESS,
>> +	.status.rearmed = TYPE1_VER2_REARMED,
>> +
>> +	.control.offset = TYPE1_VER2_CONTROL_OFFSET,
>> +	.control.trigger_mask = TYPE1_VER2_TRIGGER_MASK,
>> +	.control.clear = TYPE1_VER2_CLEAR,
>> +	.control.consume = TYPE1_VER2_CONSUME,
>> +	.control.disable = TYPE1_VER2_DISABLE,
>> +	.control.manual = TYPE1_VER2_EXECUTE,
>> +	.control.rearm = TYPE1_VER2_REARM,
>> +};
>> +
>>  struct crashlog_entry {
>>  	/* entry must be first member of struct */
>>  	struct intel_pmt_entry		entry;
>> @@ -99,30 +152,35 @@ struct pmt_crashlog_priv {
>>
>>  /*
>>   * This is the generic access to a PMT struct. So the use of
>> - * struct crashlog_entry
>> - * doesn't "make sense" here.
>> + *   struct crashlog_entry
>> + * doesn't "make sense" here, i.e. use:
>> + *   struct intel_pmt_entry
>>   */
>
>I don't think this comment is needed. Still, this could have been done in the
>patch that added it.

I will remove.

>> -static bool pmt_crashlog_supported(struct intel_pmt_entry *entry)
>> +static bool pmt_crashlog_supported(struct intel_pmt_entry *entry, u32
>> *crash_type, u32 *version)
>>  {
>>  	u32 discovery_header = readl(entry->disc_table + CONTROL_OFFSET);
>> -	u32 crash_type, version;
>>
>> -	crash_type = GET_TYPE(discovery_header);
>> -	version = GET_VERSION(discovery_header);
>> +	*crash_type = GET_TYPE(discovery_header);
>> +	*version = GET_VERSION(discovery_header);
>>
>>  	/*
>> -	 * Currently we only recognize OOBMSM version 0 devices.
>> -	 * We can ignore all other crashlog devices in the system.
>> +	 * Currently we only recognize OOBMSM (type 1) and version 0 or 2
>> +	 * devices.
>> +	 *
>> +	 * Ignore all other crashlog devices in the system.
>>  	 */
>> -	return crash_type == CRASH_TYPE_OOBMSM && version == 0;
>> +	if (*crash_type == CRASH_TYPE_OOBMSM && (*version == 0 ||
>*version ==
>> 2))
>> +		return true;
>> +
>> +	return false;
>>  }
>>
>>  /*
>>   * I/O
>>   */
>>
>> -#define SET     true
>> -#define CLEAR   false
>> +#define SET	true
>> +#define CLEAR	false
>
>Ditto

Not sure why this happened...I will get this cleaned up.

>>
>>  static void read_modify_write(struct crashlog_entry *crashlog, u32 bit, bool
>> set)
>>  {
>> @@ -157,7 +215,7 @@ static bool pmt_crashlog_complete(struct
>crashlog_entry
>> *crashlog)
>>  static bool pmt_crashlog_disabled(struct crashlog_entry *crashlog)
>>  {
>>  	/* return current value of the crashlog disabled flag */
>> -	return read_check(crashlog, crashlog->info->status.disable);
>> +	return read_check(crashlog, crashlog->info->status.disabled);
>>  }
>>
>>  static void pmt_crashlog_set_disable(struct crashlog_entry *crashlog, bool
>> disable)
>> @@ -175,9 +233,119 @@ static void pmt_crashlog_set_execute(struct
>> crashlog_entry *crashlog)
>>  	read_modify_write(crashlog, crashlog->info->control.manual, SET);
>>  }
>>
>> +/* version 2 support */
>> +static bool pmt_crashlog_cleared(struct crashlog_entry *crashlog)
>> +{
>> +	/* return current value of the crashlog cleared flag */
>> +	return read_check(crashlog, crashlog->info->status.cleared);
>> +}
>> +
>> +static bool pmt_crashlog_consumed(struct crashlog_entry *crashlog)
>> +{
>> +	/* return current value of the crashlog consumedflag */
>> +	return read_check(crashlog, crashlog->info->status.consumed);
>> +}
>> +
>> +static void pmt_crashlog_set_consumed(struct crashlog_entry *crashlog)
>> +{
>> +	read_modify_write(crashlog, crashlog->info->control.consume, SET);
>> +}
>> +
>> +static bool pmt_crashlog_error(struct crashlog_entry *crashlog)
>> +{
>> +	/* return current value of the crashlog error flag */
>> +	return read_check(crashlog, crashlog->info->status.error);
>> +}
>> +
>> +static bool pmt_crashlog_rearm(struct crashlog_entry *crashlog)
>> +{
>> +	/* return current value of the crashlog reamed flag */
>> +	return read_check(crashlog, crashlog->info->status.rearmed);
>> +}
>> +
>> +static void pmt_crashlog_set_rearm(struct crashlog_entry *crashlog)
>> +{
>> +	read_modify_write(crashlog, crashlog->info->control.rearm, SET);
>> +}
>> +
>>  /*
>>   * sysfs
>>   */
>> +static ssize_t
>> +clear_show(struct device *dev, struct device_attribute *attr, char *buf)
>> +{
>> +	struct crashlog_entry *crashlog = dev_get_drvdata(dev);
>> +	bool cleared = pmt_crashlog_cleared(crashlog);
>> +
>> +	return sysfs_emit(buf, "%d\n", cleared);
>> +}
>> +
>> +static ssize_t
>> +clear_store(struct device *dev, struct device_attribute *attr,
>> +	    const char *buf, size_t count)
>> +{
>> +	struct crashlog_entry *crashlog;
>> +	bool clear;
>> +	int result;
>> +
>> +	crashlog = dev_get_drvdata(dev);
>> +
>> +	result = kstrtobool(buf, &clear);
>> +	if (result)
>> +		return result;
>> +
>> +	/* set bit only */
>> +	if (!clear)
>> +		return -EINVAL;
>> +
>> +	guard(mutex)(&crashlog->control_mutex);
>> +
>> +	pmt_crashlog_set_clear(crashlog);
>> +
>> +	return count;
>> +}
>> +static DEVICE_ATTR_RW(clear);
>> +
>> +static ssize_t
>> +consumed_show(struct device *dev, struct device_attribute *attr, char *buf)
>> +{
>> +	struct crashlog_entry *crashlog = dev_get_drvdata(dev);
>> +	bool consumed = pmt_crashlog_consumed(crashlog);
>> +
>> +	return sysfs_emit(buf, "%d\n", consumed);
>> +}
>> +
>> +static ssize_t consumed_store(struct device *dev, struct device_attribute
>> *attr,
>> +			      const char *buf, size_t count)
>> +{
>> +	struct crashlog_entry *crashlog;
>> +	bool consumed;
>> +	int result;
>> +
>> +	crashlog = dev_get_drvdata(dev);
>> +
>> +	result = kstrtobool(buf, &consumed);
>> +	if (result)
>> +		return result;
>> +
>> +	/* set bit only */
>> +	if (!consumed)
>> +		return -EINVAL;
>> +
>> +	guard(mutex)(&crashlog->control_mutex);
>> +
>> +	if (pmt_crashlog_disabled(crashlog))
>> +		return -EBUSY;
>> +
>> +	if (!pmt_crashlog_complete(crashlog))
>> +		return -EEXIST;
>> +
>> +	pmt_crashlog_set_consumed(crashlog);
>> +
>> +	return count;
>> +}
>> +static DEVICE_ATTR_RW(consumed);
>> +
>>  static ssize_t
>>  enable_show(struct device *dev, struct device_attribute *attr, char *buf)
>>  {
>> @@ -209,6 +377,50 @@ enable_store(struct device *dev, struct
>device_attribute
>> *attr,
>>  }
>>  static DEVICE_ATTR_RW(enable);
>>
>> +static ssize_t
>> +error_show(struct device *dev, struct device_attribute *attr, char *buf)
>> +{
>> +	struct crashlog_entry *crashlog = dev_get_drvdata(dev);
>> +	bool error = pmt_crashlog_error(crashlog);
>> +
>> +	return sysfs_emit(buf, "%d\n", error);
>> +}
>> +static DEVICE_ATTR_RO(error);
>> +
>> +static ssize_t
>> +rearm_show(struct device *dev, struct device_attribute *attr, char *buf)
>> +{
>> +	struct crashlog_entry *crashlog = dev_get_drvdata(dev);
>> +	int rearmed = pmt_crashlog_rearm(crashlog);
>> +
>> +	return sysfs_emit(buf, "%d\n", rearmed);
>> +}
>> +
>> +static ssize_t rearm_store(struct device *dev, struct device_attribute *attr,
>> +			   const char *buf, size_t count)
>> +{
>> +	struct crashlog_entry *crashlog;
>> +	bool rearm;
>> +	int result;
>> +
>> +	crashlog = dev_get_drvdata(dev);
>> +
>> +	result = kstrtobool(buf, &rearm);
>> +	if (result)
>> +		return result;
>> +
>> +	/* set only */
>> +	if (!rearm)
>> +		return -EINVAL;
>> +
>> +	guard(mutex)(&crashlog->control_mutex);
>> +
>> +	pmt_crashlog_set_rearm(crashlog);
>> +
>> +	return count;
>> +}
>> +static DEVICE_ATTR_RW(rearm);
>> +
>>  static ssize_t
>>  trigger_show(struct device *dev, struct device_attribute *attr, char *buf)
>>  {
>> @@ -263,24 +475,57 @@ static struct attribute *pmt_crashlog_attrs[] = {
>>  	NULL
>>  };
>>
>
>For what follows ...
>
>>
>> +static struct attribute *pmt_crashlog_ver2_attrs[] = {
>> +	&dev_attr_clear.attr,
>> +	&dev_attr_consumed.attr,
>> +	&dev_attr_enable.attr,
>> +	&dev_attr_error.attr,
>> +	&dev_attr_rearm.attr,
>> +	&dev_attr_trigger.attr,
>> +	NULL
>> +};
>> +
>>  static const struct attribute_group pmt_crashlog_group = {
>>  	.attrs	= pmt_crashlog_attrs,
>>  };
>>
>> +static const struct attribute_group pmt_crashlog_ver2_group = {
>> +	.attrs = pmt_crashlog_ver2_attrs,
>> +};
>
>... I would have the group and attributes both reflect type and version in the
>name. Update pmt_crashlog_group as well.

Makes sense.  I will update.

Thank you for your comments!

Mike

>David
>
>> +
>> +static const struct crashlog_info *select_crashlog_info(u32 type, u32
>> version)
>> +{
>> +	if (version == 0)
>> +		return &crashlog_type1_ver0;
>> +
>> +	return &crashlog_type1_ver2;
>> +}
>> +
>> +static const struct attribute_group *select_sysfs_grp(u32 type, u32 version)
>> +{
>> +	if (version == 0)
>> +		return &pmt_crashlog_group;
>> +
>> +	return &pmt_crashlog_ver2_group;
>> +}
>> +
>>  static int pmt_crashlog_header_decode(struct intel_pmt_entry *entry,
>>  				      struct device *dev)
>>  {
>>  	void __iomem *disc_table = entry->disc_table;
>>  	struct intel_pmt_header *header = &entry->header;
>>  	struct crashlog_entry *crashlog;
>> +	u32 version;
>> +	u32 type;
>>
>> -	if (!pmt_crashlog_supported(entry))
>> +	if (!pmt_crashlog_supported(entry, &type, &version))
>>  		return 1;
>>
>>  	/* initialize the crashlog struct */
>>  	crashlog = container_of(entry, struct crashlog_entry, entry);
>>  	mutex_init(&crashlog->control_mutex);
>> -	crashlog->info = &crashlog_type1_ver0;
>> +
>> +	crashlog->info = select_crashlog_info(type, version);
>>
>>  	header->access_type = GET_ACCESS(readl(disc_table));
>>  	header->guid = readl(disc_table + GUID_OFFSET);
>> @@ -289,7 +534,7 @@ static int pmt_crashlog_header_decode(struct
>> intel_pmt_entry *entry,
>>  	/* Size is measured in DWORDS, but accessor returns bytes */
>>  	header->size = GET_SIZE(readl(disc_table + SIZE_OFFSET));
>>
>> -	entry->attr_grp = &pmt_crashlog_group;
>> +	entry->attr_grp = select_sysfs_grp(type, version);
>>
>>  	return 0;
>>  }



More information about the Intel-xe mailing list