[PATCH 3/3] drm/xe/guc: Cache DSS info when creating capture register list
John Harrison
john.c.harrison at intel.com
Thu Apr 17 20:52:06 UTC 2025
On 4/17/2025 1:31 PM, Matt Roper wrote:
> On Thu, Apr 17, 2025 at 12:52:14PM -0700, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> Calculating the DSS id (index of a steered register) currently
>> requires reading state from the hwconfig table and that currently
>> requires dynamically allocating memory. The GuC based register capture
>> (for dev core dumps) includes this index as part of the register name
>> in the dump. However, it was calculating said index at the time of the
>> dump for every dump. That is wasteful. It also breaks anyone trying to
>> do the dump at a time when memory allocations are not allowed.
>>
>> So rather than calculating on every print, just calculate at start of
>> day when creating the register list in the first place.
>>
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_guc_capture.c | 92 +++++++++++------------
>> drivers/gpu/drm/xe/xe_guc_capture_types.h | 2 +
>> 2 files changed, 46 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
>> index f4b08136e472..139e4e3bcc32 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_capture.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
>> @@ -105,49 +105,49 @@ struct __guc_capture_parsed_output {
>> * 3. Incorrect order will trigger XE_WARN.
>> */
>> #define COMMON_XELP_BASE_GLOBAL \
>> - { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"}
>> + { FORCEWAKE_GT, REG_32BIT, 0, 0, 0, "FORCEWAKE_GT"}
>>
>> #define COMMON_BASE_ENGINE_INSTANCE \
>> - { RING_HWSTAM(0), REG_32BIT, 0, 0, "HWSTAM"}, \
>> - { RING_HWS_PGA(0), REG_32BIT, 0, 0, "RING_HWS_PGA"}, \
>> - { RING_HEAD(0), REG_32BIT, 0, 0, "RING_HEAD"}, \
>> - { RING_TAIL(0), REG_32BIT, 0, 0, "RING_TAIL"}, \
>> - { RING_CTL(0), REG_32BIT, 0, 0, "RING_CTL"}, \
>> - { RING_MI_MODE(0), REG_32BIT, 0, 0, "RING_MI_MODE"}, \
>> - { RING_MODE(0), REG_32BIT, 0, 0, "RING_MODE"}, \
>> - { RING_ESR(0), REG_32BIT, 0, 0, "RING_ESR"}, \
>> - { RING_EMR(0), REG_32BIT, 0, 0, "RING_EMR"}, \
>> - { RING_EIR(0), REG_32BIT, 0, 0, "RING_EIR"}, \
>> - { RING_IMR(0), REG_32BIT, 0, 0, "RING_IMR"}, \
>> - { RING_IPEHR(0), REG_32BIT, 0, 0, "IPEHR"}, \
>> - { RING_INSTDONE(0), REG_32BIT, 0, 0, "RING_INSTDONE"}, \
>> - { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, "INDIRECT_RING_STATE"}, \
>> - { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
>> - { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, "ACTHD"}, \
>> - { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
>> - { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_BBADDR"}, \
>> - { RING_START(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
>> - { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_START"}, \
>> - { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
>> - { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, "RING_DMA_FADD"}, \
>> - { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
>> - { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_STATUS"}, \
>> - { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, \
>> - { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, "RING_EXECLIST_SQ_CONTENTS"}
>> + { RING_HWSTAM(0), REG_32BIT, 0, 0, 0, "HWSTAM"}, \
>> + { RING_HWS_PGA(0), REG_32BIT, 0, 0, 0, "RING_HWS_PGA"}, \
>> + { RING_HEAD(0), REG_32BIT, 0, 0, 0, "RING_HEAD"}, \
>> + { RING_TAIL(0), REG_32BIT, 0, 0, 0, "RING_TAIL"}, \
>> + { RING_CTL(0), REG_32BIT, 0, 0, 0, "RING_CTL"}, \
>> + { RING_MI_MODE(0), REG_32BIT, 0, 0, 0, "RING_MI_MODE"}, \
>> + { RING_MODE(0), REG_32BIT, 0, 0, 0, "RING_MODE"}, \
>> + { RING_ESR(0), REG_32BIT, 0, 0, 0, "RING_ESR"}, \
>> + { RING_EMR(0), REG_32BIT, 0, 0, 0, "RING_EMR"}, \
>> + { RING_EIR(0), REG_32BIT, 0, 0, 0, "RING_EIR"}, \
>> + { RING_IMR(0), REG_32BIT, 0, 0, 0, "RING_IMR"}, \
>> + { RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \
>> + { RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \
>> + { INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \
>> + { RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
>> + { RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \
>> + { RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
>> + { RING_BBADDR_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_BBADDR"}, \
>> + { RING_START(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
>> + { RING_START_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_START"}, \
>> + { RING_DMA_FADD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
>> + { RING_DMA_FADD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_DMA_FADD"}, \
>> + { RING_EXECLIST_STATUS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
>> + { RING_EXECLIST_STATUS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_STATUS"}, \
>> + { RING_EXECLIST_SQ_CONTENTS_LO(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
>> + { RING_EXECLIST_SQ_CONTENTS_HI(0), REG_64BIT_HI_DW, 0, 0, 0, "RING_EXECLIST_SQ_CONTENTS"}
>>
>> #define COMMON_XELP_RC_CLASS \
>> - { RCU_MODE, REG_32BIT, 0, 0, "RCU_MODE"}
>> + { RCU_MODE, REG_32BIT, 0, 0, 0, "RCU_MODE"}
>>
>> #define COMMON_XELP_RC_CLASS_INSTDONE \
>> - { SC_INSTDONE, REG_32BIT, 0, 0, "SC_INSTDONE"}, \
>> - { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA"}, \
>> - { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, "SC_INSTDONE_EXTRA2"}
>> + { SC_INSTDONE, REG_32BIT, 0, 0, 0, "SC_INSTDONE"}, \
>> + { SC_INSTDONE_EXTRA, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA"}, \
>> + { SC_INSTDONE_EXTRA2, REG_32BIT, 0, 0, 0, "SC_INSTDONE_EXTRA2"}
>>
>> #define XELP_VEC_CLASS_REGS \
>> - { SFC_DONE(0), 0, 0, 0, "SFC_DONE[0]"}, \
>> - { SFC_DONE(1), 0, 0, 0, "SFC_DONE[1]"}, \
>> - { SFC_DONE(2), 0, 0, 0, "SFC_DONE[2]"}, \
>> - { SFC_DONE(3), 0, 0, 0, "SFC_DONE[3]"}
>> + { SFC_DONE(0), 0, 0, 0, 0, "SFC_DONE[0]"}, \
>> + { SFC_DONE(1), 0, 0, 0, 0, "SFC_DONE[1]"}, \
>> + { SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \
>> + { SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"}
>>
>> /* XE_LP Global */
>> static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
>> @@ -350,7 +350,7 @@ static const struct __ext_steer_reg xehpg_extregs[] = {
>> {"INSTDONE_GEOM_SVGUNIT", XEHPG_INSTDONE_GEOM_SVGUNIT}
>> };
>>
>> -static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
>> +static void __fill_ext_reg(struct xe_gt *gt, struct __guc_mmio_reg_descr *ext,
>> const struct __ext_steer_reg *extlist,
>> int slice_id, int subslice_id)
>> {
>> @@ -361,6 +361,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
>> ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1);
>> ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
>> ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
>> + ext->dss_id = xe_gt_mcr_steering_info_to_dss_id(gt, slice_id, subslice_id);
> This seems a bit roundabout... for_each_dss_steering() converts the DSS
> ID into group/instance values for steering. And then we're converting
> those steering values back into the original ID (which we already know)
> here. Can't we just pass 'iter' from the for_each_dss_steering() loop
> as a parameter and avoid the extra hwconfig lookups?
It's not exactly obvious what the for_each_dss_steering does, especially
with the iteration just being called 'iter' not 'dss'! But yeah, I guess
it is doing 'iter' -> 'group/instance' -> 'dss' == 'iter'. I'll re-spin
with the iterator renamed to dss_id and pass that in directly.
Thanks.
>
>
> Matt
>
>> ext->regname = extlist->name;
>> }
>>
>> @@ -456,13 +457,13 @@ static void guc_capture_alloc_steered_lists(struct xe_guc *guc)
>> extarray = (struct __guc_mmio_reg_descr *)extlists[0].list;
>> for_each_dss_steering(iter, gt, slice, subslice) {
>> for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
>> - __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
>> + __fill_ext_reg(gt, extarray, &xe_extregs[i], slice, subslice);
>> ++extarray;
>> }
>>
>> if (has_xehpg_extregs)
>> for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
>> - __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
>> + __fill_ext_reg(gt, extarray, &xehpg_extregs[i], slice, subslice);
>> ++extarray;
>> }
>> }
>> @@ -1747,17 +1748,12 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_
>> */
>> XE_WARN_ON(low32_ready);
>>
>> - if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags)) {
>> - int dss, group, instance;
>> -
>> - group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags);
>> - instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags);
>> - dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance);
>> -
>> - drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value);
>> - } else {
>> + if (FIELD_GET(GUC_REGSET_STEERING_NEEDED, reg_desc->flags))
>> + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname,
>> + reg_desc->dss_id, value);
>> + else
>> drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value);
>> - }
>> +
>> break;
>> }
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
>> index ca2d390ccbee..6cb439115597 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
>> @@ -39,6 +39,8 @@ struct __guc_mmio_reg_descr {
>> u32 flags;
>> /** @mask: The mask to apply */
>> u32 mask;
>> + /** @dss_id: Cached index for steered registers */
>> + u32 dss_id;
>> /** @regname: Name of the register */
>> const char *regname;
>> };
>> --
>> 2.49.0
>>
>>
More information about the Intel-xe
mailing list