[Intel-xe] [PATCH v3 3/3] drm/xe: Add INSTDONE registers to devcoredump
Matt Roper
matthew.d.roper at intel.com
Thu Oct 12 17:46:16 UTC 2023
On Tue, Oct 10, 2023 at 11:41:51AM -0700, José Roberto de Souza wrote:
> This registers contains important information that can help with debug
> of GPU hangs.
>
> While at it also fixing the double line jump at the end of engine
> registers for CCS engines.
>
> v2:
> - print other INSTDONE registers
>
> v3:
> - add for_each_geometry/compute_dss()
>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 +
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 9 ++
> drivers/gpu/drm/xe/xe_hw_engine.c | 100 ++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_hw_engine_types.h | 10 +++
> 4 files changed, 117 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> index 35dd4837dd75f..32465e384fc24 100644
> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> @@ -32,6 +32,7 @@
> #define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c)
> #define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60)
> #define RING_IPEHR(base) XE_REG((base) + 0x68)
> +#define RING_INSTDONE(base) XE_REG((base) + 0x6c)
> #define RING_ACTHD(base) XE_REG((base) + 0x74)
> #define RING_DMA_FADD(base) XE_REG((base) + 0x78)
> #define RING_HWS_PGA(base) XE_REG((base) + 0x80)
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index cd1821d96a5d8..c6650124ae6a6 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -105,6 +105,8 @@
> #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16)
> #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
>
> +#define XEHPG_INSTDONE_GEOM_SVG XE_REG_MCR(0x666c)
> +
> #define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED)
> #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11)
>
> @@ -117,6 +119,10 @@
>
> #define COMMON_SLICE_CHICKEN1 XE_REG(0x7010)
>
> +#define SLICE_COMMON_INSTDONE XE_REG(0x7100)
> +#define SLICE_COMMON_INSTDONE_EXTRA XE_REG(0x7104)
> +#define SLICE_COMMON_INSTDONE_EXTRA2 XE_REG(0x7108)
These three are MCR registers on Xe_HP and beyond (one instance per
gslice).
Matt
> +
> #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
> #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
>
> @@ -299,6 +305,9 @@
> #define XE_OAG_BLT_BUSY_FREE XE_REG(0xdbbc)
> #define XE_OAG_RENDER_BUSY_FREE XE_REG(0xdbdc)
>
> +#define SAMPLER_INSTDONE XE_REG_MCR(0xe160)
> +#define ROW_INSTDONE XE_REG_MCR(0xe164)
> +
> #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
> #define ENABLE_SMALLPL REG_BIT(15)
> #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 37b3ee1268090..6e144859abb1a 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -16,6 +16,7 @@
> #include "xe_execlist.h"
> #include "xe_force_wake.h"
> #include "xe_gt.h"
> +#include "xe_gt_mcr.h"
> #include "xe_gt_topology.h"
> #include "xe_hw_fence.h"
> #include "xe_irq.h"
> @@ -655,6 +656,40 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
> xe_hw_fence_irq_run(hwe->fence_irq);
> }
>
> +static void
> +xe_he_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
> + struct xe_hw_engine_snapshot *snapshot)
> +{
> + struct xe_gt *gt = hwe->gt;
> + struct xe_device *xe = gt_to_xe(gt);
> + unsigned int dss;
> + int group, instance;
> +
> + snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
> +
> + if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
> + return;
> +
> + snapshot->reg.instdone.slice_common =
> + xe_mmio_read32(gt, SLICE_COMMON_INSTDONE);
> + snapshot->reg.instdone.slice_common_extra =
> + xe_mmio_read32(gt, SLICE_COMMON_INSTDONE_EXTRA);
> + snapshot->reg.instdone.slice_common_extra2 =
> + xe_mmio_read32(gt, SLICE_COMMON_INSTDONE_EXTRA2);
> +
> + for_each_geometry_dss(gt, dss, group, instance) {
> + snapshot->reg.instdone.sampler[dss] =
> + xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
> + snapshot->reg.instdone.row[dss] =
> + xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
> +
> + if (GRAPHICS_VERx100(xe) >= 1255)
> + snapshot->reg.instdone.geom_svg[dss] =
> + xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVG,
> + group, instance);
> + }
> +}
> +
> /**
> * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
> * @hwe: Xe HW Engine.
> @@ -679,10 +714,30 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
> if (!snapshot)
> return NULL;
>
> + /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
> + * includes xe_hw_engine_types.h the length of this 3 registers can't be
> + * set in struct xe_hw_engine_snapshot, so here doing additional
> + * allocations.
> + */
> + len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
> + snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
> + snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
> + snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
> + if (!snapshot->reg.instdone.sampler ||
> + !snapshot->reg.instdone.row ||
> + !snapshot->reg.instdone.geom_svg) {
> + xe_hw_engine_snapshot_free(snapshot);
> + return NULL;
> + }
> +
> len = strlen(hwe->name) + 1;
> snapshot->name = kzalloc(len, GFP_ATOMIC);
> - if (snapshot->name)
> - strscpy(snapshot->name, hwe->name, len);
> + if (!snapshot->name) {
> + xe_hw_engine_snapshot_free(snapshot);
> + return NULL;
> + }
> +
> + strscpy(snapshot->name, hwe->name, len);
>
> snapshot->hwe = hwe;
> snapshot->logical_instance = hwe->logical_instance;
> @@ -730,6 +785,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
> snapshot->reg.ring_dma_fadd =
> hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
> snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
> + xe_he_engine_snapshot_instdone_capture(hwe, snapshot);
>
> if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
> snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
> @@ -737,6 +793,38 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
> return snapshot;
> }
>
> +static void
> +xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
> +{
> + struct xe_gt *gt = snapshot->hwe->gt;
> + struct xe_device *xe = gt_to_xe(gt);
> + unsigned int dss;
> + int group, instance;
> +
> + drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
> +
> + if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
> + return;
> +
> + drm_printf(p, "\tSC_INSTDONE: 0x%08x\n",
> + snapshot->reg.instdone.slice_common);
> + drm_printf(p, "\tSC_EXTRA_INSTDONE: 0x%08x\n",
> + snapshot->reg.instdone.slice_common_extra);
> + drm_printf(p, "\tSC_EXTRA2_INSTDONE: 0x%08x\n",
> + snapshot->reg.instdone.slice_common_extra2);
> +
> + for_each_geometry_dss(gt, dss, group, instance) {
> + drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
> + snapshot->reg.instdone.sampler[dss]);
> + drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
> + snapshot->reg.instdone.row[dss]);
> +
> + if (GRAPHICS_VERx100(xe) >= 1255)
> + drm_printf(p, "\tGEOM_SVGUNIT_INSTDONE[%u]: 0x%08x\n",
> + dss, snapshot->reg.instdone.geom_svg[dss]);
> + }
> +}
> +
> /**
> * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
> * @snapshot: Xe HW Engine snapshot object.
> @@ -785,10 +873,13 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
> drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
> snapshot->reg.ring_dma_fadd_udw,
> snapshot->reg.ring_dma_fadd);
> - drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
> + drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
> + xe_hw_engine_snapshot_instdone_print(snapshot, p);
> +
> if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
> drm_printf(p, "\tRCU_MODE: 0x%08x\n",
> snapshot->reg.rcu_mode);
> + drm_puts(p, "\n");
> }
>
> /**
> @@ -803,6 +894,9 @@ void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
> if (!snapshot)
> return;
>
> + kfree(snapshot->reg.instdone.sampler);
> + kfree(snapshot->reg.instdone.row);
> + kfree(snapshot->reg.instdone.geom_svg);
> kfree(snapshot->name);
> kfree(snapshot);
> }
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> index 71c0a0169e62a..0a10a79962c97 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
> @@ -221,6 +221,16 @@ struct xe_hw_engine_snapshot {
> u32 ipehr;
> /** @rcu_mode: RCU_MODE */
> u32 rcu_mode;
> + /** @ring_instdone: RING_INSTDONE */
> + struct {
> + u32 ring;
> + u32 slice_common;
> + u32 slice_common_extra;
> + u32 slice_common_extra2;
> + u32 *sampler;
> + u32 *row;
> + u32 *geom_svg;
> + } instdone;
> } reg;
> };
>
> --
> 2.42.0
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list