[PATCH 3/3] drm/xe/guc: Cache DSS info when creating capture register list

John Harrison john.c.harrison at intel.com
Thu Apr 17 20:52:06 UTC 2025


On 4/17/2025 1:31 PM, Matt Roper wrote:
> On Thu, Apr 17, 2025 at 12:52:14PM -0700, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> Calculating the DSS id (index of a steered register) currently
>> requires reading state from the hwconfig table and that currently
>> requires dynamically allocating memory. The GuC based register capture
>> (for dev core dumps) includes this index as part of the register name
>> in the dump. However, it was calculating said index at the time of the
>> dump for every dump. That is wasteful. It also breaks anyone trying to
>> do the dump at a time when memory allocations are not allowed.
>>
>> So rather than calculating on every print, just calculate at start of
>> day when creating the register list in the first place.
>>
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_guc_capture.c       | 92 +++++++++++------------
>>   drivers/gpu/drm/xe/xe_guc_capture_types.h |  2 +
>>   2 files changed, 46 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
>> index f4b08136e472..139e4e3bcc32 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_capture.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
>> @@ -105,49 +105,49 @@ struct __guc_capture_parsed_output {
>>    *     3. Incorrect order will trigger XE_WARN.
>>    */
>>   #define COMMON_XELP_BASE_GLOBAL \
>> -	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	"FORCEWAKE_GT"}
>> +	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	0,	"FORCEWAKE_GT"}
>>   
>>   #define COMMON_BASE_ENGINE_INSTANCE \
>> -	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	"HWSTAM"}, \
>> -	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	"RING_HWS_PGA"}, \
>> -	{ RING_HEAD(0),			REG_32BIT,	0,	0,	"RING_HEAD"}, \
>> -	{ RING_TAIL(0),			REG_32BIT,	0,	0,	"RING_TAIL"}, \
>> -	{ RING_CTL(0),			REG_32BIT,	0,	0,	"RING_CTL"}, \
>> -	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	"RING_MI_MODE"}, \
>> -	{ RING_MODE(0),			REG_32BIT,	0,	0,	"RING_MODE"}, \
>> -	{ RING_ESR(0),			REG_32BIT,	0,	0,	"RING_ESR"}, \
>> -	{ RING_EMR(0),			REG_32BIT,	0,	0,	"RING_EMR"}, \
>> -	{ RING_EIR(0),			REG_32BIT,	0,	0,	"RING_EIR"}, \
>> -	{ RING_IMR(0),			REG_32BIT,	0,	0,	"RING_IMR"}, \
>> -	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	"IPEHR"}, \
>> -	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	"RING_INSTDONE"}, \
>> -	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	"INDIRECT_RING_STATE"}, \
>> -	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
>> -	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"ACTHD"}, \
>> -	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
>> -	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_BBADDR"}, \
>> -	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
>> -	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_START"}, \
>> -	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
>> -	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_DMA_FADD"}, \
>> -	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	NULL}, \
>> -	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	"RING_EXECLIST_STATUS"}, \
>> -	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	NULL}, \
>> -	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
>> +	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	0,	"HWSTAM"}, \
>> +	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	0,	"RING_HWS_PGA"}, \
>> +	{ RING_HEAD(0),			REG_32BIT,	0,	0,	0,	"RING_HEAD"}, \
>> +	{ RING_TAIL(0),			REG_32BIT,	0,	0,	0,	"RING_TAIL"}, \
>> +	{ RING_CTL(0),			REG_32BIT,	0,	0,	0,	"RING_CTL"}, \
>> +	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	0,	"RING_MI_MODE"}, \
>> +	{ RING_MODE(0),			REG_32BIT,	0,	0,	0,	"RING_MODE"}, \
>> +	{ RING_ESR(0),			REG_32BIT,	0,	0,	0,	"RING_ESR"}, \
>> +	{ RING_EMR(0),			REG_32BIT,	0,	0,	0,	"RING_EMR"}, \
>> +	{ RING_EIR(0),			REG_32BIT,	0,	0,	0,	"RING_EIR"}, \
>> +	{ RING_IMR(0),			REG_32BIT,	0,	0,	0,	"RING_IMR"}, \
>> +	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	0,	"IPEHR"}, \
>> +	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	0,	"RING_INSTDONE"}, \
>> +	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	0,	"INDIRECT_RING_STATE"}, \
>> +	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
>> +	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"ACTHD"}, \
>> +	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
>> +	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_BBADDR"}, \
>> +	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
>> +	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_START"}, \
>> +	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
>> +	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_DMA_FADD"}, \
>> +	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
>> +	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_STATUS"}, \
>> +	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
>> +	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
>>   
>>   #define COMMON_XELP_RC_CLASS \
>> -	{ RCU_MODE,			REG_32BIT,	0,	0,	"RCU_MODE"}
>> +	{ RCU_MODE,			REG_32BIT,	0,	0,	0,	"RCU_MODE"}
>>   
>>   #define COMMON_XELP_RC_CLASS_INSTDONE \
>> -	{ SC_INSTDONE,			REG_32BIT,	0,	0,	"SC_INSTDONE"}, \
>> -	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	"SC_INSTDONE_EXTRA"}, \
>> -	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	"SC_INSTDONE_EXTRA2"}
>> +	{ SC_INSTDONE,			REG_32BIT,	0,	0,	0,	"SC_INSTDONE"}, \
>> +	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA"}, \
>> +	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA2"}
>>   
>>   #define XELP_VEC_CLASS_REGS \
>> -	{ SFC_DONE(0),			0,	0,	0,	"SFC_DONE[0]"}, \
>> -	{ SFC_DONE(1),			0,	0,	0,	"SFC_DONE[1]"}, \
>> -	{ SFC_DONE(2),			0,	0,	0,	"SFC_DONE[2]"}, \
>> -	{ SFC_DONE(3),			0,	0,	0,	"SFC_DONE[3]"}
>> +	{ SFC_DONE(0),			0,	0,	0,	0,	"SFC_DONE[0]"}, \
>> +	{ SFC_DONE(1),			0,	0,	0,	0,	"SFC_DONE[1]"}, \
>> +	{ SFC_DONE(2),			0,	0,	0,	0,	"SFC_DONE[2]"}, \
>> +	{ SFC_DONE(3),			0,	0,	0,	0,	"SFC_DONE[3]"}
>>   
>>   /* XE_LP Global */
>>   static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
>> @@ -350,7 +350,7 @@ static const struct __ext_steer_reg xehpg_extregs[] = {
>>   	{"INSTDONE_GEOM_SVGUNIT",	XEHPG_INSTDONE_GEOM_SVGUNIT}
>>   };
>>   
>> -static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
>> +static void __fill_ext_reg(struct xe_gt *gt, struct __guc_mmio_reg_descr *ext,
>>   			   const struct __ext_steer_reg *extlist,
>>   			   int slice_id, int subslice_id)
>>   {
>> @@ -361,6 +361,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
>>   	ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
>>   	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
>>   	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
>> +	ext->dss_id = xe_gt_mcr_steering_info_to_dss_id(gt, slice_id, subslice_id);
> This seems a bit roundabout... for_each_dss_steering() converts the DSS
> ID into group/instance values for steering.  And then we're converting
> those steering values back into the original ID (which we already know)
> here.  Can't we just pass 'iter' from the for_each_dss_steering() loop
> as a parameter and avoid the extra hwconfig lookups?

It's not exactly obvious what the for_each_dss_steering does, especially 
with the iteration just being called 'iter' not 'dss'! But yeah, I guess 
it is doing 'iter' -> 'group/instance' -> 'dss' == 'iter'. I'll re-spin 
with the iterator renamed to dss_id and pass that in directly.

Thanks.

>
>
> Matt
>
>>   	ext->regname = extlist->name;
>>   }
>>   
>> @@ -456,13 +457,13 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
>>   	extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
>>   	for_each_dss_steering(iter, gt, slice, subslice) {
>>   		for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
>> -			__fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
>> +			__fill_ext_reg(gt, extarray, &xe_extregs[i], slice, subslice);
>>   			++extarray;
>>   		}
>>   
>>   		if (has_xehpg_extregs)
>>   			for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
>> -				__fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
>> +				__fill_ext_reg(gt, extarray, &xehpg_extregs[i], slice, subslice);
>>   				++extarray;
>>   			}
>>   	}
>> @@ -1747,17 +1748,12 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_
>>   			 */
>>   			XE_WARN_ON(low32_ready);
>>   
>> -			if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags)) {
>> -				int dss, group, instance;
>> -
>> -				group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
>> -				instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
>> -				dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
>> -
>> -				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
>> -			} else {
>> +			if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags))
>> +				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname,
>> +					   reg_desc->dss_id, value);
>> +			else
>>   				drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
>> -			}
>> +
>>   			break;
>>   		}
>>   	}
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
>> index ca2d390ccbee..6cb439115597 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
>> @@ -39,6 +39,8 @@ struct __guc_mmio_reg_descr {
>>   	u32 flags;
>>   	/** @mask: The mask to apply */
>>   	u32 mask;
>> +	/** @dss_id: Cached index for steered registers */
>> +	u32 dss_id;
>>   	/** @regname: Name of the register */
>>   	const char *regname;
>>   };
>> -- 
>> 2.49.0
>>
>>



More information about the Intel-xe mailing list