[PATCH 3/3] drm/xe: Add INSTDONE registers to devcoredump

Souza, Jose jose.souza at intel.com
Wed Mar 27 17:51:40 UTC 2024


On Wed, 2024-03-27 at 13:41 -0400, Dong, Zhanjun wrote:
> 
> On 2024-03-25 5:17 p.m., Dong, Zhanjun wrote:
> > See my comments inline below.
> > 
> > Regards,
> > Zhanjun Dong
> > 
> > On 2024-03-25 3:06 p.m., José Roberto de Souza wrote:
> > > This registers contains important information that can help with debug
> > > of GPU hangs.
> > > 
> > > While at it also fixing the double line jump at the end of engine
> > > registers for CCS engines.
> > > 
> > > v2:
> > > - print other INSTDONE registers
> > > 
> > > v3:
> > > - add for_each_geometry/compute_dss()
> > > 
> > > v4:
> > > - print one slice_common_instdone per glice in DG2+
> > > 
> > > Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > > Cc: Matt Roper <matthew.d.roper at intel.com>
> > > Cc: Zhanjun Dong <zhanjun.dong at intel.com>
> > > Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
> > > ---
> > >   drivers/gpu/drm/xe/regs/xe_engine_regs.h |   1 +
> > >   drivers/gpu/drm/xe/regs/xe_gt_regs.h     |  13 +++
> > >   drivers/gpu/drm/xe/xe_hw_engine.c        | 128 +++++++++++++++++++++++
> > >   drivers/gpu/drm/xe/xe_hw_engine_types.h  |  16 +++
> > >   4 files changed, 158 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h 
> > > b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > > index a08528d9c76b2..98d830013e96b 100644
> > > --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > > +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> > > @@ -65,6 +65,7 @@
> > >   #define RING_ACTHD_UDW(base)            XE_REG((base) + 0x5c)
> > >   #define RING_DMA_FADD_UDW(base)            XE_REG((base) + 0x60)
> > >   #define RING_IPEHR(base)            XE_REG((base) + 0x68)
> > > +#define RING_INSTDONE(base)            XE_REG((base) + 0x6c)
> > >   #define RING_ACTHD(base)            XE_REG((base) + 0x74)
> > >   #define RING_DMA_FADD(base)            XE_REG((base) + 0x78)
> > >   #define RING_HWS_PGA(base)            XE_REG((base) + 0x80)
> > > diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h 
> > > b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > > index 65af9fe95db5b..8bef88d6ab4e2 100644
> > > --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > > +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > > @@ -94,6 +94,8 @@
> > >   #define   FF_MODE2_TDS_TIMER_MASK        REG_GENMASK(23, 16)
> > >   #define   FF_MODE2_TDS_TIMER_128        
> > > REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
> > > +#define DG2_INSTDONE_GEOM_SVGUNIT        XE_REG_MCR(0x666c)
> 
> The same register has a different symbol name prefix in drm-tip?
> #define XEHPG_INSTDONE_GEOM_SVG			MCR_REG(0x666c)
> 
> We need the consistant name prefix unless you have strong reason.

It needs to match bspec name not follow what i915 has.

> 
> > > +
> > >   #define CACHE_MODE_1                XE_REG(0x7004, 
> > > XE_REG_OPTION_MASKED)
> > >   #define   MSAA_OPTIMIZATION_REDUC_DISABLE    REG_BIT(11)
> > > @@ -110,6 +112,14 @@
> > >   #define   FLSH_IGNORES_PSD            REG_BIT(10)
> > >   #define   FD_END_COLLECT            REG_BIT(5)
> > > +#define SC_INSTDONE                XE_REG(0x7100)
> > > +#define SC_INSTDONE_EXTRA            XE_REG(0x7104)
> > > +#define SC_INSTDONE_EXTRA2            XE_REG(0x7108)
> > > +
> > > +#define DG2_SC_INSTDONE                XE_REG_MCR(0x7100)
> > > +#define DG2_SC_INSTDONE_EXTRA            XE_REG_MCR(0x7104)
> > > +#define DG2_SC_INSTDONE_EXTRA2            XE_REG_MCR(0x7108)
> 
> Need to keep the same prefix of XEHPG_ ?

Also works but like I mentioned we don't need to follow what i915 has.

Will take a look at your other comments next week.

> 
> > > +
> > >   #define COMMON_SLICE_CHICKEN4            XE_REG(0x7300, 
> > > XE_REG_OPTION_MASKED)
> > >   #define   DISABLE_TDC_LOAD_BALANCING_CALC    REG_BIT(6)
> > > @@ -326,6 +336,9 @@
> > >   #define HALF_SLICE_CHICKEN5            XE_REG_MCR(0xe188, 
> > > XE_REG_OPTION_MASKED)
> > >   #define   DISABLE_SAMPLE_G_PERFORMANCE        REG_BIT(0)
> > > +#define SAMPLER_INSTDONE            XE_REG_MCR(0xe160)
> > > +#define ROW_INSTDONE                XE_REG_MCR(0xe164)
> > > +
> > >   #define SAMPLER_MODE                XE_REG_MCR(0xe18c, 
> > > XE_REG_OPTION_MASKED)
> > >   #define   ENABLE_SMALLPL            REG_BIT(15)
> > >   #define   SC_DISABLE_POWER_OPTIMIZATION_EBB    REG_BIT(9)
> > > diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c 
> > > b/drivers/gpu/drm/xe/xe_hw_engine.c
> > > index 6e75803c963ae..0720f779fa574 100644
> > > --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> > > +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> > > @@ -18,6 +18,7 @@
> > >   #include "xe_gt.h"
> > >   #include "xe_gt_ccs_mode.h"
> > >   #include "xe_gt_printk.h"
> > > +#include "xe_gt_mcr.h"
> > >   #include "xe_gt_topology.h"
> > >   #include "xe_hw_fence.h"
> > >   #include "xe_irq.h"
> > > @@ -770,6 +771,57 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine 
> > > *hwe, u16 intr_vec)
> > >           xe_hw_fence_irq_run(hwe->fence_irq);
> > >   }
> > > +static bool
> > > +is_slice_common_per_gslice(struct xe_device *xe)
> > > +{
> > > +    return GRAPHICS_VERx100(xe) >= 1255;
> > > +}
> > > +
> > > +static void
> > > +xe_he_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
> > > +                       struct xe_hw_engine_snapshot *snapshot)
> > > +{
> > > +    struct xe_gt *gt = hwe->gt;
> > > +    struct xe_device *xe = gt_to_xe(gt);
> > > +    unsigned int dss;
> > > +    u16 group, instance;
> > > +
> > > +    snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, 
> > > RING_INSTDONE(0));
> > > +
> > > +    if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
> > > +        return;
> > > +
> > > +    if (is_slice_common_per_gslice(xe) == false) {
> > > +        snapshot->reg.instdone.slice_common[0] =
> > > +            xe_mmio_read32(gt, SC_INSTDONE);
> > > +        snapshot->reg.instdone.slice_common_extra[0] =
> > > +            xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
> > > +        snapshot->reg.instdone.slice_common_extra2[0] =
> > > +            xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
> > > +    } else {
> > > +        for_each_geometry_dss(dss, gt, group, instance) {
> > > +            snapshot->reg.instdone.slice_common[dss] =
> > > +                xe_gt_mcr_unicast_read(gt, DG2_SC_INSTDONE, group, 
> > > instance);
> > > +            snapshot->reg.instdone.slice_common_extra[dss] =
> > > +                xe_gt_mcr_unicast_read(gt, DG2_SC_INSTDONE_EXTRA, 
> > > group, instance);
> > > +            snapshot->reg.instdone.slice_common_extra2[dss] =
> > > +                xe_gt_mcr_unicast_read(gt, DG2_SC_INSTDONE_EXTRA2, 
> > > group, instance);
> > > +        }
> > > +    }
> > > +
> > > +    for_each_geometry_dss(dss, gt, group, instance) {
> > > +        snapshot->reg.instdone.sampler[dss] =
> > > +            xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, 
> > > instance);
> > > +        snapshot->reg.instdone.row[dss] =
> > > +            xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
> > > +
> > > +        if (GRAPHICS_VERx100(xe) >= 1255)
> > > +            snapshot->reg.instdone.geom_svg[dss] =
> > > +                xe_gt_mcr_unicast_read(gt, DG2_INSTDONE_GEOM_SVGUNIT,
> > > +                               group, instance);
> > > +    }
> > > +}
> > > +
> > >   /**
> > >    * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW 
> > > Engine.
> > >    * @hwe: Xe HW Engine.
> > > @@ -784,6 +836,7 @@ struct xe_hw_engine_snapshot *
> > >   xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
> > >   {
> > >       struct xe_hw_engine_snapshot *snapshot;
> > > +    size_t len;
> > >       u64 val;
> > >       if (!xe_hw_engine_is_valid(hwe))
> > > @@ -794,6 +847,28 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine 
> > > *hwe)
> > >       if (!snapshot)
> > >           return NULL;
> > > +    /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
> > > +     * includes xe_hw_engine_types.h the length of this 3 registers 
> > > can't be
> > > +     * set in struct xe_hw_engine_snapshot, so here doing additional
> > > +     * allocations.
> > > +     */
> > > +    len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
> > > +    snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
> > 
> > Can we move XE_MAX_DSS_FUSE_BITS to a .h like "xe_reg_sr_types.h"? Might 
> > need Matthew to comment on this.
> > 
> > then we can avoid dynamic mem allocation and define it like:
> > 
> > u32 slice_common[XE_MAX_DSS_FUSE_BITS / sizeof(u32)];
> > 
> > Beaware that I guess you define the u32 arrary to hold 
> > XE_MAX_DSS_FUSE_BITS bit info, then it might be:
> > 
> >      XE_MAX_DSS_FUSE_BITS / sizeof(u32)
> > rather than:
> >      XE_MAX_DSS_FUSE_BITS * sizeof(u32)
> > 
> > 
> > > +    snapshot->reg.instdone.slice_common_extra = kzalloc(len, 
> > > GFP_ATOMIC);
> > > +    snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, 
> > > GFP_ATOMIC);
> > > +    snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
> > > +    snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
> > > +    snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
> > > +    if (!snapshot->reg.instdone.slice_common ||
> > > +        !snapshot->reg.instdone.slice_common_extra ||
> > > +        !snapshot->reg.instdone.slice_common_extra2 ||
> > > +        !snapshot->reg.instdone.sampler ||
> > > +        !snapshot->reg.instdone.row ||
> > > +        !snapshot->reg.instdone.geom_svg) {
> > > +        xe_hw_engine_snapshot_free(snapshot);
> > > +        return NULL;
> > > +    }
> > > +
> > >       snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
> > >       snapshot->hwe = hwe;
> > >       snapshot->logical_instance = hwe->logical_instance;
> > > @@ -845,6 +920,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine 
> > > *hwe)
> > >       snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
> > >       snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
> > >       snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
> > > +    xe_he_engine_snapshot_instdone_capture(hwe, snapshot);
> > >       if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
> > >           snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
> > > @@ -852,6 +928,49 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine 
> > > *hwe)
> > >       return snapshot;
> > >   }
> > > +static void
> > > +xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot 
> > > *snapshot, struct drm_printer *p)
> > > +{
> > > +    struct xe_gt *gt = snapshot->hwe->gt;
> > > +    struct xe_device *xe = gt_to_xe(gt);
> > > +    u16 group, instance;
> > > +    unsigned int dss;
> > > +
> > > +    drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", 
> > > snapshot->reg.instdone.ring);
> > > +
> > > +    if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
> > > +        return;
> > > +
> > > +    if (is_slice_common_per_gslice(xe) == false) {
> > > +        drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
> > > +               snapshot->reg.instdone.slice_common[0]);
> > > +        drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
> > > +               snapshot->reg.instdone.slice_common_extra[0]);
> > > +        drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
> > > +               snapshot->reg.instdone.slice_common_extra2[0]);
> > > +    } else {
> > > +        for_each_geometry_dss(dss, gt, group, instance) {
> > > +            drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
> > > +                   snapshot->reg.instdone.slice_common[dss]);
> > > +            drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
> > > +                   snapshot->reg.instdone.slice_common_extra[dss]);
> > > +            drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
> > > +                   snapshot->reg.instdone.slice_common_extra2[dss]);
> > > +        }
> > > +    }
> > > +
> > > +    for_each_geometry_dss(dss, gt, group, instance) {
> > > +        drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
> > > +               snapshot->reg.instdone.sampler[dss]);
> > > +        drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
> > > +               snapshot->reg.instdone.row[dss]);
> > > +
> > > +        if (GRAPHICS_VERx100(xe) >= 1255)
> > > +            drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
> > > +                   dss, snapshot->reg.instdone.geom_svg[dss]);
> > > +    }
> > > +}
> > > +
> > >   /**
> > >    * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine 
> > > snapshot.
> > >    * @snapshot: Xe HW Engine snapshot object.
> > > @@ -891,9 +1010,12 @@ void xe_hw_engine_snapshot_print(struct 
> > > xe_hw_engine_snapshot *snapshot,
> > >       drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
> > >       drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", 
> > > snapshot->reg.ring_dma_fadd);
> > >       drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
> > > +    xe_hw_engine_snapshot_instdone_print(snapshot, p);
> > > +
> > >       if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
> > >           drm_printf(p, "\tRCU_MODE: 0x%08x\n",
> > >                  snapshot->reg.rcu_mode);
> > > +    drm_puts(p, "\n");
> > >   }
> > >   /**
> > > @@ -908,6 +1030,12 @@ void xe_hw_engine_snapshot_free(struct 
> > > xe_hw_engine_snapshot *snapshot)
> > >       if (!snapshot)
> > >           return;
> > > +    kfree(snapshot->reg.instdone.slice_common);
> > > +    kfree(snapshot->reg.instdone.slice_common_extra);
> > > +    kfree(snapshot->reg.instdone.slice_common_extra2);
> > > +    kfree(snapshot->reg.instdone.sampler);
> > > +    kfree(snapshot->reg.instdone.row);
> > > +    kfree(snapshot->reg.instdone.geom_svg);
> > >       kfree(snapshot->name);
> > >       kfree(snapshot);
> > >   }
> > > diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h 
> > > b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> > > index 27deaa31efd31..9f9755e31b9fe 100644
> > > --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> > > @@ -211,6 +211,22 @@ struct xe_hw_engine_snapshot {
> > >           u32 ipehr;
> > >           /** @reg.rcu_mode: RCU_MODE */
> > >           u32 rcu_mode;
> > > +        struct {
> > > +            /** @reg.instdone.ring: RING_INSTDONE */
> > > +            u32 ring;
> > > +            /** @reg.instdone.slice_common: SC_INSTDONE */
> > > +            u32 *slice_common;
> > > +            /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */
> > > +            u32 *slice_common_extra;
> > > +            /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */
> > > +            u32 *slice_common_extra2;
> > > +            /** @reg.instdone.sampler: SAMPLER_INSTDONE */
> > > +            u32 *sampler;
> > > +            /** @reg.instdone.row: ROW_INSTDONE */
> > > +            u32 *row;
> > > +            /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */
> > > +            u32 *geom_svg;
> > > +        } instdone;
> > >       } reg;
> > >   };



More information about the Intel-xe mailing list