[PATCH 3/3] drm/xe/guc: Cache DSS info when creating capture register list
Matt Roper
matthew.d.roper at intel.com
Thu Apr 17 20:31:49 UTC 2025
On Thu, Apr 17, 2025 at 12:52:14PM -0700, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> Calculating the DSS id (index of a steered register) currently
> requires reading state from the hwconfig table and that currently
> requires dynamically allocating memory. The GuC based register capture
> (for dev core dumps) includes this index as part of the register name
> in the dump. However, it was calculating said index at the time of the
> dump for every dump. That is wasteful. It also breaks anyone trying to
> do the dump at a time when memory allocations are not allowed.
>
> So rather than calculating on every print, just calculate at start of
> day when creating the register list in the first place.
>
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc_capture.c | 92 +++++++++++------------
> drivers/gpu/drm/xe/xe_guc_capture_types.h | 2 +
> 2 files changed, 46 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
> index f4b08136e472..139e4e3bcc32 100644
> --- a/drivers/gpu/drm/xe/xe_guc_capture.c
> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
> @@ -105,49 +105,49 @@ struct __guc_capture_parsed_output {
> * 3. Incorrect order will trigger XE_WARN.
> */
> #define COMMON_XELP_BASE_GLOBAL \
> - { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"}
> + { FORCEWAKE_GT, REG_32BIT, 0, 0, 0, "FORCEWAKE_GT"}
>
> #define COMMON_BASE_ENGINE_INSTANCE \
> - { RING_HWSTAM(0), REG_32BIT, 0, 0, "HWSTAM"}, \
> - { RING_HWS_PGA(0), REG_32BIT, 0, 0, "RING_HWS_PGA"}, \
> - { RING_HEAD(0), REG_32BIT, 0, 0, "RING_HEAD"}, \
> - { RING_TAIL(0), REG_32BIT, 0, 0, "RING_TAIL"}, \
> - { RING_CTL(0), REG_32BIT, 0, 0, "RING_CTL"}, \
> - { RING_MI_MODE(0), REG_32BIT, 0, 0, "RING_MI_MODE"}, \
> - { RING_MODE(0), REG_32BIT, 0, 0, "RING_MODE"}, \
> - { RING_ESR(0), REG_32BIT, 0, 0, "RING_ESR"}, \
> - { RING_EMR(0), REG_32BIT, 0, 0, "RING_EMR"}, \
> - { RING_EIR(0), REG_32BIT, 0, 0, "RING_EIR"}, \
> - { RING_IMR(0), REG_32BIT, 0, 0, "RING_IMR"}, \
> - { RING_IPEHR(0), REG_32BIT, 0, 0, "IPEHR"}, \
> - { RING_INSTDONE(0), REG_32BIT, 0, 0, "RING_INSTDONE"}, \
> - { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, "INDIRECT_RING_STATE"}, \
> - { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
> - { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, "ACTHD"}, \
> - { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
> - { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_BBADDR"}, \
> - { RING_START(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
> - { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_START"}, \
> - { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
> - { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_DMA_FADD"}, \
> - { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
> - { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_STATUS"}, \
> - { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
> - { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_SQ_CONTENTS"}
> + { RING_HWSTAM(0), REG_32BIT, 0, 0, 0, "HWSTAM"}, \
> + { RING_HWS_PGA(0), REG_32BIT, 0, 0, 0, "RING_HWS_PGA"}, \
> + { RING_HEAD(0), REG_32BIT, 0, 0, 0, "RING_HEAD"}, \
> + { RING_TAIL(0), REG_32BIT, 0, 0, 0, "RING_TAIL"}, \
> + { RING_CTL(0), REG_32BIT, 0, 0, 0, "RING_CTL"}, \
> + { RING_MI_MODE(0), REG_32BIT, 0, 0, 0, "RING_MI_MODE"}, \
> + { RING_MODE(0), REG_32BIT, 0, 0, 0, "RING_MODE"}, \
> + { RING_ESR(0), REG_32BIT, 0, 0, 0, "RING_ESR"}, \
> + { RING_EMR(0), REG_32BIT, 0, 0, 0, "RING_EMR"}, \
> + { RING_EIR(0), REG_32BIT, 0, 0, 0, "RING_EIR"}, \
> + { RING_IMR(0), REG_32BIT, 0, 0, 0, "RING_IMR"}, \
> + { RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \
> + { RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \
> + { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \
> + { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
> + { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \
> + { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
> + { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_BBADDR"}, \
> + { RING_START(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
> + { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_START"}, \
> + { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
> + { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_DMA_FADD"}, \
> + { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
> + { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_STATUS"}, \
> + { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
> + { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_SQ_CONTENTS"}
>
> #define COMMON_XELP_RC_CLASS \
> - { RCU_MODE, REG_32BIT, 0, 0, "RCU_MODE"}
> + { RCU_MODE, REG_32BIT, 0, 0, 0, "RCU_MODE"}
>
> #define COMMON_XELP_RC_CLASS_INSTDONE \
> - { SC_INSTDONE, REG_32BIT, 0, 0, "SC_INSTDONE"}, \
> - { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA"}, \
> - { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA2"}
> + { SC_INSTDONE, REG_32BIT, 0, 0, 0, "SC_INSTDONE"}, \
> + { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA"}, \
> + { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA2"}
>
> #define XELP_VEC_CLASS_REGS \
> - { SFC_DONE(0), 0, 0, 0, "SFC_DONE[0]"}, \
> - { SFC_DONE(1), 0, 0, 0, "SFC_DONE[1]"}, \
> - { SFC_DONE(2), 0, 0, 0, "SFC_DONE[2]"}, \
> - { SFC_DONE(3), 0, 0, 0, "SFC_DONE[3]"}
> + { SFC_DONE(0), 0, 0, 0, 0, "SFC_DONE[0]"}, \
> + { SFC_DONE(1), 0, 0, 0, 0, "SFC_DONE[1]"}, \
> + { SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \
> + { SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"}
>
> /* XE_LP Global */
> static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
> @@ -350,7 +350,7 @@ static const struct __ext_steer_reg xehpg_extregs[] = {
> {"INSTDONE_GEOM_SVGUNIT", XEHPG_INSTDONE_GEOM_SVGUNIT}
> };
>
> -static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
> +static void __fill_ext_reg(struct xe_gt *gt, struct __guc_mmio_reg_descr *ext,
> const struct __ext_steer_reg *extlist,
> int slice_id, int subslice_id)
> {
> @@ -361,6 +361,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
> ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
> ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
> ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
> + ext->dss_id = xe_gt_mcr_steering_info_to_dss_id(gt, slice_id, subslice_id);
This seems a bit roundabout... for_each_dss_steering() converts the DSS
ID into group/instance values for steering. And then we're converting
those steering values back into the original ID (which we already know)
here. Can't we just pass 'iter' from the for_each_dss_steering() loop
as a parameter and avoid the extra hwconfig lookups?
Matt
> ext->regname = extlist->name;
> }
>
> @@ -456,13 +457,13 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
> extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
> for_each_dss_steering(iter, gt, slice, subslice) {
> for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
> - __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
> + __fill_ext_reg(gt, extarray, &xe_extregs[i], slice, subslice);
> ++extarray;
> }
>
> if (has_xehpg_extregs)
> for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
> - __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
> + __fill_ext_reg(gt, extarray, &xehpg_extregs[i], slice, subslice);
> ++extarray;
> }
> }
> @@ -1747,17 +1748,12 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_
> */
> XE_WARN_ON(low32_ready);
>
> - if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags)) {
> - int dss, group, instance;
> -
> - group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
> - instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
> - dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
> -
> - drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
> - } else {
> + if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags))
> + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname,
> + reg_desc->dss_id, value);
> + else
> drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
> - }
> +
> break;
> }
> }
> diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
> index ca2d390ccbee..6cb439115597 100644
> --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
> @@ -39,6 +39,8 @@ struct __guc_mmio_reg_descr {
> u32 flags;
> /** @mask: The mask to apply */
> u32 mask;
> + /** @dss_id: Cached index for steered registers */
> + u32 dss_id;
> /** @regname: Name of the register */
> const char *regname;
> };
> --
> 2.49.0
>
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list