[Intel-xe] [PATCH v2 3/3] drm/xe: Add INSTDONE registers to devcoredump

Matt Roper matthew.d.roper at intel.com
Tue Oct 10 17:49:57 UTC 2023


On Tue, Oct 10, 2023 at 08:32:04AM -0700, Souza, Jose wrote:
> On Mon, 2023-10-09 at 16:23 -0700, Matt Roper wrote:
> > On Mon, Oct 09, 2023 at 01:21:49PM -0700, José Roberto de Souza wrote:
> > > This registers contains important information that can help with debug
> > > of GPU hangs.
> > > 
> > > While at it also fixing the double line jump at the end of engine
> > > registers for CCS engines.
> > > 
> > > v2:
> > > - print other INSTDONE registers
> > > 
> > > Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > > Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/regs/xe_engine_regs.h |   1 +
> > >  drivers/gpu/drm/xe/regs/xe_gt_regs.h     |   9 ++
> > >  drivers/gpu/drm/xe/xe_gt_types.h         |   3 +-
> > >  drivers/gpu/drm/xe/xe_hw_engine.c        | 107 ++++++++++++++++++++++-
> > >  drivers/gpu/drm/xe/xe_hw_engine_types.h  |  10 +++
> > >  5 files changed, 126 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > > index 35dd4837dd75f..32465e384fc24 100644
> > > --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > > +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > > @@ -32,6 +32,7 @@
> > >  #define RING_ACTHD_UDW(base)			XE_REG((base) + 0x5c)
> > >  #define RING_DMA_FADD_UDW(base)			XE_REG((base) + 0x60)
> > >  #define RING_IPEHR(base)			XE_REG((base) + 0x68)
> > > +#define RING_INSTDONE(base)			XE_REG((base) + 0x6c)
> > >  #define RING_ACTHD(base)			XE_REG((base) + 0x74)
> > >  #define RING_DMA_FADD(base)			XE_REG((base) + 0x78)
> > >  #define RING_HWS_PGA(base)			XE_REG((base) + 0x80)
> > > diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > > index cd1821d96a5d8..c6650124ae6a6 100644
> > > --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > > +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > > @@ -105,6 +105,8 @@
> > >  #define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
> > >  #define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
> > >  
> > > +#define XEHPG_INSTDONE_GEOM_SVG			XE_REG_MCR(0x666c)
> > > +
> > >  #define CACHE_MODE_1				XE_REG(0x7004, XE_REG_OPTION_MASKED)
> > >  #define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
> > >  
> > > @@ -117,6 +119,10 @@
> > >  
> > >  #define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
> > >  
> > > +#define SLICE_COMMON_INSTDONE			XE_REG(0x7100)
> > > +#define SLICE_COMMON_INSTDONE_EXTRA		XE_REG(0x7104)
> > > +#define SLICE_COMMON_INSTDONE_EXTRA2		XE_REG(0x7108)
> > > +
> > >  #define COMMON_SLICE_CHICKEN4			XE_REG(0x7300, XE_REG_OPTION_MASKED)
> > >  #define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
> > >  
> > > @@ -299,6 +305,9 @@
> > >  #define XE_OAG_BLT_BUSY_FREE			XE_REG(0xdbbc)
> > >  #define XE_OAG_RENDER_BUSY_FREE			XE_REG(0xdbdc)
> > >  
> > > +#define SAMPLER_INSTDONE			XE_REG_MCR(0xe160)
> > > +#define ROW_INSTDONE				XE_REG_MCR(0xe164)
> > > +
> > >  #define SAMPLER_MODE				XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
> > >  #define   ENABLE_SMALLPL			REG_BIT(15)
> > >  #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > > index d4310be3e1e7c..d566df56fb8ea 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > @@ -25,9 +25,10 @@ enum xe_gt_type {
> > >  };
> > >  
> > >  #define XE_MAX_DSS_FUSE_REGS	3
> > > +#define XE_MAX_DSS_FUSE_BITS	(32 * XE_MAX_DSS_FUSE_REGS)
> > >  #define XE_MAX_EU_FUSE_REGS	1
> > >  
> > > -typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
> > > +typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
> > >  typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
> > >  
> > >  struct xe_mmio_range {
> > > diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> > > index 37b3ee1268090..6433016fa10f8 100644
> > > --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> > > +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> > > @@ -16,6 +16,7 @@
> > >  #include "xe_execlist.h"
> > >  #include "xe_force_wake.h"
> > >  #include "xe_gt.h"
> > > +#include "xe_gt_mcr.h"
> > >  #include "xe_gt_topology.h"
> > >  #include "xe_hw_fence.h"
> > >  #include "xe_irq.h"
> > > @@ -655,6 +656,45 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
> > >  		xe_hw_fence_irq_run(hwe->fence_irq);
> > >  }
> > >  
> > > +static void
> > > +xe_he_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
> > > +				       struct xe_hw_engine_snapshot *snapshot)
> > > +{
> > > +	struct xe_gt *gt = hwe->gt;
> > > +	struct xe_device *xe = gt_to_xe(gt);
> > > +	unsigned int dss;
> > > +
> > > +	snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
> > > +
> > > +	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
> > > +		return;
> > > +
> > > +	snapshot->reg.instdone.slice_common =
> > > +		xe_mmio_read32(gt, SLICE_COMMON_INSTDONE);
> > > +	snapshot->reg.instdone.slice_common_extra =
> > > +		xe_mmio_read32(gt, SLICE_COMMON_INSTDONE_EXTRA);
> > > +	snapshot->reg.instdone.slice_common_extra2 =
> > > +		xe_mmio_read32(gt, SLICE_COMMON_INSTDONE_EXTRA2);
> > > +
> > > +	for (dss = 0; dss < XE_MAX_DSS_FUSE_BITS; dss++) {
> > 
> > Drive-by comment:  Looping over all DSS is probably going to be needed
> > in other parts of the driver as well in the future (probably perf, eu
> > stall sampling, etc.).  It would be best if we could avoid open-coded
> > loops like this that require the calling code to know about
> > XE_MAX_DSS_FUSE_BITS (which is tied to our internal bitmap
> > representation of fusing).  I think it would be better if xe_gt_mcr.h
> > could provide a general DSS-looping iterator that hides those details
> > and can be more easily re-used elsewhere in the driver, similar to what
> > i915 does with the for_each_ss_steering() iterator.  Then if we change
> > the underlying bitmap representation, we only need to update the one
> > iterator instead of tracking down all the various loops that need to be
> > updated.
> 
> Is this enough?
> 
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
> index c0167d57de42a..16fa19cf5723c 100644
> --- a/drivers/gpu/drm/xe/xe_gt_mcr.h
> +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
> @@ -7,6 +7,7 @@
>  #define _XE_GT_MCR_H_
> 
>  #include "regs/xe_reg_defs.h"
> +#include "xe_gt_types.h"
> 
>  struct drm_printer;
>  struct xe_gt;
> @@ -30,4 +31,12 @@ xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, int *group,
> 
>  void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
> 
> +#define for_each_geometry_dss(gt, dss) \
> +       for (dss = 0; dss < XE_MAX_DSS_FUSE_BITS; dss++) \
> +               if (xe_gt_has_geometry_dss(gt, dss))

Maybe you can also make the iterator fill in the group/instance values
rather than requiring a separate xe_gt_mcr_get_dss_steering call inside
the loop?  E.g., something like

    #define xe_for_each_geometry_dss(gt, dss, grp, inst) \
        for (dss = 0, xe_gt_mcr_get_dss_steering(gt, 0, &grp, &inst); \
             dss < XE_MAX_DSS_FUSE_BITS; \
             dss++, xe_gt_mcr_get_dss_steering(gt, dss, &grp, &inst)) \
               for_each_if(xe_gt_has_geometry_dss(gt, dss))

    ...

    xe_for_each_geometry_dss(gt, dss, group, instance) {
            foo[dss] = xe_gt_mcr_unicast_read(gt, REGISTER, group, instance)


Matt

> +
> +#define for_each_compute_dss(gt, dss) \
> +       for (dss = 0; dss < XE_MAX_DSS_FUSE_BITS; dss++) \
> +               if (xe_gt_has_compute_dss(gt, dss))
> +
>  #endif /* _XE_GT_MCR_H_ */
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 6433016fa10f8..b5bccfd5a09da 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -676,12 +676,9 @@ xe_he_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
>         snapshot->reg.instdone.slice_common_extra2 =
>                 xe_mmio_read32(gt, SLICE_COMMON_INSTDONE_EXTRA2);
> 
> -       for (dss = 0; dss < XE_MAX_DSS_FUSE_BITS; dss++) {
> +       for_each_geometry_dss(gt, dss) {
>                 int group, instance;
> 
> -               if (!xe_gt_has_geometry_dss(gt, dss))
> -                       continue;
> -
>                 xe_gt_mcr_get_dss_steering(gt, dss, &group, &instance);
>                 snapshot->reg.instdone.sampler[dss] =
>                         xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
> @@ -817,10 +814,7 @@ xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, str
>         drm_printf(p, "\tSC_EXTRA2_INSTDONE: 0x%08x\n",
>                    snapshot->reg.instdone.slice_common_extra2);
> 
> -       for (dss = 0; dss < XE_MAX_DSS_FUSE_BITS; dss++) {
> -               if (!xe_gt_has_geometry_dss(gt, dss))
> -                       continue;
> -
> +       for_each_geometry_dss(gt, dss) {
>                 drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
>                            snapshot->reg.instdone.sampler[dss]);
>                 drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
> 
> 
> 
> > 
> > 
> > Matt
> > 
> > > +		int group, instance;
> > > +
> > > +		if (!xe_gt_has_geometry_dss(gt, dss))
> > > +			continue;
> > > +
> > > +		xe_gt_mcr_get_dss_steering(gt, dss, &group, &instance);
> > > +		snapshot->reg.instdone.sampler[dss] =
> > > +			xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
> > > +		snapshot->reg.instdone.row[dss] =
> > > +			xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
> > > +
> > > +		if (GRAPHICS_VERx100(xe) >= 1255)
> > > +			snapshot->reg.instdone.geom_svg[dss] =
> > > +				xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVG,
> > > +						       group, instance);
> > > +	}
> > > +}
> > > +
> > >  /**
> > >   * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
> > >   * @hwe: Xe HW Engine.
> > > @@ -679,10 +719,30 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
> > >  	if (!snapshot)
> > >  		return NULL;
> > >  
> > > +	/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
> > > +	 * includes xe_hw_engine_types.h the length of this 3 registers can't be
> > > +	 * set in struct xe_hw_engine_snapshot, so here doing additional
> > > +	 * allocations.
> > > +	 */
> > > +	len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
> > > +	snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
> > > +	snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
> > > +	snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
> > > +	if (!snapshot->reg.instdone.sampler ||
> > > +	    !snapshot->reg.instdone.row ||
> > > +	    !snapshot->reg.instdone.geom_svg) {
> > > +		xe_hw_engine_snapshot_free(snapshot);
> > > +		return NULL;
> > > +	}
> > > +
> > >  	len = strlen(hwe->name) + 1;
> > >  	snapshot->name = kzalloc(len, GFP_ATOMIC);
> > > -	if (snapshot->name)
> > > -		strscpy(snapshot->name, hwe->name, len);
> > > +	if (!snapshot->name) {
> > > +		xe_hw_engine_snapshot_free(snapshot);
> > > +		return NULL;
> > > +	}
> > > +
> > > +	strscpy(snapshot->name, hwe->name, len);
> > >  
> > >  	snapshot->hwe = hwe;
> > >  	snapshot->logical_instance = hwe->logical_instance;
> > > @@ -730,6 +790,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
> > >  	snapshot->reg.ring_dma_fadd =
> > >  		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
> > >  	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
> > > +	xe_he_engine_snapshot_instdone_capture(hwe, snapshot);
> > >  
> > >  	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
> > >  		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
> > > @@ -737,6 +798,40 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
> > >  	return snapshot;
> > >  }
> > >  
> > > +static void
> > > +xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
> > > +{
> > > +	struct xe_gt *gt = snapshot->hwe->gt;
> > > +	struct xe_device *xe = gt_to_xe(gt);
> > > +	unsigned int dss;
> > > +
> > > +	drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
> > > +
> > > +	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
> > > +		return;
> > > +
> > > +	drm_printf(p, "\tSC_INSTDONE: 0x%08x\n",
> > > +		   snapshot->reg.instdone.slice_common);
> > > +	drm_printf(p, "\tSC_EXTRA_INSTDONE: 0x%08x\n",
> > > +		   snapshot->reg.instdone.slice_common_extra);
> > > +	drm_printf(p, "\tSC_EXTRA2_INSTDONE: 0x%08x\n",
> > > +		   snapshot->reg.instdone.slice_common_extra2);
> > > +
> > > +	for (dss = 0; dss < XE_MAX_DSS_FUSE_BITS; dss++) {
> > > +		if (!xe_gt_has_geometry_dss(gt, dss))
> > > +			continue;
> > > +
> > > +		drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
> > > +			   snapshot->reg.instdone.sampler[dss]);
> > > +		drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
> > > +			   snapshot->reg.instdone.row[dss]);
> > > +
> > > +		if (GRAPHICS_VERx100(xe) >= 1255)
> > > +			drm_printf(p, "\tGEOM_SVGUNIT_INSTDONE[%u]: 0x%08x\n",
> > > +				   dss, snapshot->reg.instdone.geom_svg[dss]);
> > > +	}
> > > +}
> > > +
> > >  /**
> > >   * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
> > >   * @snapshot: Xe HW Engine snapshot object.
> > > @@ -785,10 +880,13 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
> > >  	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
> > >  		   snapshot->reg.ring_dma_fadd_udw,
> > >  		   snapshot->reg.ring_dma_fadd);
> > > -	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
> > > +	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
> > > +	xe_hw_engine_snapshot_instdone_print(snapshot, p);
> > > +
> > >  	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
> > >  		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
> > >  			   snapshot->reg.rcu_mode);
> > > +	drm_puts(p, "\n");
> > >  }
> > >  
> > >  /**
> > > @@ -803,6 +901,9 @@ void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
> > >  	if (!snapshot)
> > >  		return;
> > >  
> > > +	kfree(snapshot->reg.instdone.sampler);
> > > +	kfree(snapshot->reg.instdone.row);
> > > +	kfree(snapshot->reg.instdone.geom_svg);
> > >  	kfree(snapshot->name);
> > >  	kfree(snapshot);
> > >  }
> > > diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> > > index 71c0a0169e62a..0a10a79962c97 100644
> > > --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> > > @@ -221,6 +221,16 @@ struct xe_hw_engine_snapshot {
> > >  		u32 ipehr;
> > >  		/** @rcu_mode: RCU_MODE */
> > >  		u32 rcu_mode;
> > > +		/** @ring_instdone: RING_INSTDONE */
> > > +		struct {
> > > +			u32 ring;
> > > +			u32 slice_common;
> > > +			u32 slice_common_extra;
> > > +			u32 slice_common_extra2;
> > > +			u32 *sampler;
> > > +			u32 *row;
> > > +			u32 *geom_svg;
> > > +		} instdone;
> > >  	} reg;
> > >  };
> > >  
> > > -- 
> > > 2.42.0
> > > 
> > > 
> > 
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list