[PATCH 3/3] drm/xe/guc: Cache DSS info when creating capture register list

Matt Roper matthew.d.roper at intel.com
Thu Apr 17 20:31:49 UTC 2025


On Thu, Apr 17, 2025 at 12:52:14PM -0700, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
> 
> Calculating the DSS id (index of a steered register) currently
> requires reading state from the hwconfig table and that currently
> requires dynamically allocating memory. The GuC based register capture
> (for dev core dumps) includes this index as part of the register name
> in the dump. However, it was calculating said index at the time of the
> dump for every dump. That is wasteful. It also breaks anyone trying to
> do the dump at a time when memory allocations are not allowed.
> 
> So rather than calculating on every print, just calculate at start of
> day when creating the register list in the first place.
> 
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc_capture.c       | 92 +++++++++++------------
>  drivers/gpu/drm/xe/xe_guc_capture_types.h |  2 +
>  2 files changed, 46 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
> index f4b08136e472..139e4e3bcc32 100644
> --- a/drivers/gpu/drm/xe/xe_guc_capture.c
> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
> @@ -105,49 +105,49 @@ struct __guc_capture_parsed_output {
>   *     3. Incorrect order will trigger XE_WARN.
>   */
>  #define COMMON_XELP_BASE_GLOBAL \
> -	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	"FORCEWAKE_GT"}
> +	{ FORCEWAKE_GT,			REG_32BIT,	0,	0,	0,	"FORCEWAKE_GT"}
>  
>  #define COMMON_BASE_ENGINE_INSTANCE \
> -	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	"HWSTAM"}, \
> -	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	"RING_HWS_PGA"}, \
> -	{ RING_HEAD(0),			REG_32BIT,	0,	0,	"RING_HEAD"}, \
> -	{ RING_TAIL(0),			REG_32BIT,	0,	0,	"RING_TAIL"}, \
> -	{ RING_CTL(0),			REG_32BIT,	0,	0,	"RING_CTL"}, \
> -	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	"RING_MI_MODE"}, \
> -	{ RING_MODE(0),			REG_32BIT,	0,	0,	"RING_MODE"}, \
> -	{ RING_ESR(0),			REG_32BIT,	0,	0,	"RING_ESR"}, \
> -	{ RING_EMR(0),			REG_32BIT,	0,	0,	"RING_EMR"}, \
> -	{ RING_EIR(0),			REG_32BIT,	0,	0,	"RING_EIR"}, \
> -	{ RING_IMR(0),			REG_32BIT,	0,	0,	"RING_IMR"}, \
> -	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	"IPEHR"}, \
> -	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	"RING_INSTDONE"}, \
> -	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	"INDIRECT_RING_STATE"}, \
> -	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
> -	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"ACTHD"}, \
> -	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
> -	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_BBADDR"}, \
> -	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
> -	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_START"}, \
> -	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	NULL}, \
> -	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	"RING_DMA_FADD"}, \
> -	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	NULL}, \
> -	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	"RING_EXECLIST_STATUS"}, \
> -	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	NULL}, \
> -	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
> +	{ RING_HWSTAM(0),		REG_32BIT,	0,	0,	0,	"HWSTAM"}, \
> +	{ RING_HWS_PGA(0),		REG_32BIT,	0,	0,	0,	"RING_HWS_PGA"}, \
> +	{ RING_HEAD(0),			REG_32BIT,	0,	0,	0,	"RING_HEAD"}, \
> +	{ RING_TAIL(0),			REG_32BIT,	0,	0,	0,	"RING_TAIL"}, \
> +	{ RING_CTL(0),			REG_32BIT,	0,	0,	0,	"RING_CTL"}, \
> +	{ RING_MI_MODE(0),		REG_32BIT,	0,	0,	0,	"RING_MI_MODE"}, \
> +	{ RING_MODE(0),			REG_32BIT,	0,	0,	0,	"RING_MODE"}, \
> +	{ RING_ESR(0),			REG_32BIT,	0,	0,	0,	"RING_ESR"}, \
> +	{ RING_EMR(0),			REG_32BIT,	0,	0,	0,	"RING_EMR"}, \
> +	{ RING_EIR(0),			REG_32BIT,	0,	0,	0,	"RING_EIR"}, \
> +	{ RING_IMR(0),			REG_32BIT,	0,	0,	0,	"RING_IMR"}, \
> +	{ RING_IPEHR(0),		REG_32BIT,	0,	0,	0,	"IPEHR"}, \
> +	{ RING_INSTDONE(0),		REG_32BIT,	0,	0,	0,	"RING_INSTDONE"}, \
> +	{ INDIRECT_RING_STATE(0),	REG_32BIT,	0,	0,	0,	"INDIRECT_RING_STATE"}, \
> +	{ RING_ACTHD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
> +	{ RING_ACTHD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"ACTHD"}, \
> +	{ RING_BBADDR(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
> +	{ RING_BBADDR_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_BBADDR"}, \
> +	{ RING_START(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
> +	{ RING_START_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_START"}, \
> +	{ RING_DMA_FADD(0),		REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
> +	{ RING_DMA_FADD_UDW(0),		REG_64BIT_HI_DW, 0,	0,	0,	"RING_DMA_FADD"}, \
> +	{ RING_EXECLIST_STATUS_LO(0),	REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
> +	{ RING_EXECLIST_STATUS_HI(0),	REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_STATUS"}, \
> +	{ RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0,	0,	0,	NULL}, \
> +	{ RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0,	0,	0,	"RING_EXECLIST_SQ_CONTENTS"}
>  
>  #define COMMON_XELP_RC_CLASS \
> -	{ RCU_MODE,			REG_32BIT,	0,	0,	"RCU_MODE"}
> +	{ RCU_MODE,			REG_32BIT,	0,	0,	0,	"RCU_MODE"}
>  
>  #define COMMON_XELP_RC_CLASS_INSTDONE \
> -	{ SC_INSTDONE,			REG_32BIT,	0,	0,	"SC_INSTDONE"}, \
> -	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	"SC_INSTDONE_EXTRA"}, \
> -	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	"SC_INSTDONE_EXTRA2"}
> +	{ SC_INSTDONE,			REG_32BIT,	0,	0,	0,	"SC_INSTDONE"}, \
> +	{ SC_INSTDONE_EXTRA,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA"}, \
> +	{ SC_INSTDONE_EXTRA2,		REG_32BIT,	0,	0,	0,	"SC_INSTDONE_EXTRA2"}
>  
>  #define XELP_VEC_CLASS_REGS \
> -	{ SFC_DONE(0),			0,	0,	0,	"SFC_DONE[0]"}, \
> -	{ SFC_DONE(1),			0,	0,	0,	"SFC_DONE[1]"}, \
> -	{ SFC_DONE(2),			0,	0,	0,	"SFC_DONE[2]"}, \
> -	{ SFC_DONE(3),			0,	0,	0,	"SFC_DONE[3]"}
> +	{ SFC_DONE(0),			0,	0,	0,	0,	"SFC_DONE[0]"}, \
> +	{ SFC_DONE(1),			0,	0,	0,	0,	"SFC_DONE[1]"}, \
> +	{ SFC_DONE(2),			0,	0,	0,	0,	"SFC_DONE[2]"}, \
> +	{ SFC_DONE(3),			0,	0,	0,	0,	"SFC_DONE[3]"}
>  
>  /* XE_LP Global */
>  static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
> @@ -350,7 +350,7 @@ static const struct __ext_steer_reg xehpg_extregs[] = {
>  	{"INSTDONE_GEOM_SVGUNIT",	XEHPG_INSTDONE_GEOM_SVGUNIT}
>  };
>  
> -static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
> +static void __fill_ext_reg(struct xe_gt *gt, struct __guc_mmio_reg_descr *ext,
>  			   const struct __ext_steer_reg *extlist,
>  			   int slice_id, int subslice_id)
>  {
> @@ -361,6 +361,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
>  	ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
>  	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
>  	ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
> +	ext->dss_id = xe_gt_mcr_steering_info_to_dss_id(gt, slice_id, subslice_id);

This seems a bit roundabout... for_each_dss_steering() converts the DSS
ID into group/instance values for steering.  And then we're converting
those steering values back into the original ID (which we already know)
here.  Can't we just pass 'iter' from the for_each_dss_steering() loop
as a parameter and avoid the extra hwconfig lookups?


Matt

>  	ext->regname = extlist->name;
>  }
>  
> @@ -456,13 +457,13 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
>  	extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
>  	for_each_dss_steering(iter, gt, slice, subslice) {
>  		for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
> -			__fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
> +			__fill_ext_reg(gt, extarray, &xe_extregs[i], slice, subslice);
>  			++extarray;
>  		}
>  
>  		if (has_xehpg_extregs)
>  			for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
> -				__fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
> +				__fill_ext_reg(gt, extarray, &xehpg_extregs[i], slice, subslice);
>  				++extarray;
>  			}
>  	}
> @@ -1747,17 +1748,12 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_
>  			 */
>  			XE_WARN_ON(low32_ready);
>  
> -			if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags)) {
> -				int dss, group, instance;
> -
> -				group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
> -				instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
> -				dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
> -
> -				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
> -			} else {
> +			if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags))
> +				drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname,
> +					   reg_desc->dss_id, value);
> +			else
>  				drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
> -			}
> +
>  			break;
>  		}
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
> index ca2d390ccbee..6cb439115597 100644
> --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
> @@ -39,6 +39,8 @@ struct __guc_mmio_reg_descr {
>  	u32 flags;
>  	/** @mask: The mask to apply */
>  	u32 mask;
> +	/** @dss_id: Cached index for steered registers */
> +	u32 dss_id;
>  	/** @regname: Name of the register */
>  	const char *regname;
>  };
> -- 
> 2.49.0
> 
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list