[Intel-xe] [PATCH v4 04/11] drm/xe: Convert GuC CT print to snapshot capture and print.
Matthew Brost
matthew.brost at intel.com
Thu May 18 01:50:59 UTC 2023
On Tue, May 16, 2023 at 10:54:09AM -0400, Rodrigo Vivi wrote:
> The goal is to allow for a snapshot capture to be taken at the time
> of the crash, while the print out can happen at a later time through
> the exposed devcoredump virtual device.
>
> v2: Handle memory allocation failures. (Matthew)
> Do not use GFP_ATOMIC on cases like debugfs prints. (Matthew)
> v3: checkpatch fixes
> v4: Do not use atomic in the g2h_worker_func (Matthew)
>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
Reviewed-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc.c | 2 +-
> drivers/gpu/drm/xe/xe_guc_ct.c | 166 +++++++++++++++++++++++----
> drivers/gpu/drm/xe/xe_guc_ct.h | 8 +-
> drivers/gpu/drm/xe/xe_guc_ct_types.h | 26 +++++
> drivers/gpu/drm/xe/xe_guc_submit.c | 2 +-
> 5 files changed, 179 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index eb4af4c71124..b72407e24d09 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -857,6 +857,6 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
>
> xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
>
> - xe_guc_ct_print(&guc->ct, p);
> + xe_guc_ct_print(&guc->ct, p, false);
> xe_guc_submit_print(guc, p);
> }
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index e16e5fe37ed4..e8c2edb1359d 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -595,7 +595,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
>
> broken:
> drm_err(drm, "No forward process on H2G, reset required");
> - xe_guc_ct_print(ct, &p);
> + xe_guc_ct_print(ct, &p, true);
> ct->ctbs.h2g.info.broken = true;
>
> return -EDEADLK;
> @@ -1088,38 +1088,40 @@ static void g2h_worker_func(struct work_struct *w)
> struct drm_device *drm = &ct_to_xe(ct)->drm;
> struct drm_printer p = drm_info_printer(drm->dev);
>
> - xe_guc_ct_print(ct, &p);
> + xe_guc_ct_print(ct, &p, false);
> kick_reset(ct);
> }
> } while (ret == 1);
> xe_device_mem_access_put(ct_to_xe(ct));
> }
>
> -static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
> - struct drm_printer *p)
> +static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
> + struct guc_ctb_snapshot *snapshot,
> + bool atomic)
> {
> u32 head, tail;
>
> - drm_printf(p, "\tsize: %d\n", ctb->info.size);
> - drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space);
> - drm_printf(p, "\thead: %d\n", ctb->info.head);
> - drm_printf(p, "\ttail: %d\n", ctb->info.tail);
> - drm_printf(p, "\tspace: %d\n", ctb->info.space);
> - drm_printf(p, "\tbroken: %d\n", ctb->info.broken);
> + xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
> + sizeof(struct guc_ct_buffer_desc));
> + memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
>
> - head = desc_read(xe, ctb, head);
> - tail = desc_read(xe, ctb, tail);
> - drm_printf(p, "\thead (memory): %d\n", head);
> - drm_printf(p, "\ttail (memory): %d\n", tail);
> - drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
> + snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
> + atomic ? GFP_ATOMIC : GFP_KERNEL);
> +
> + if (!snapshot->cmds) {
> + drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
> + return;
> + }
> +
> + head = snapshot->desc.head;
> + tail = snapshot->desc.tail;
>
> if (head != tail) {
> struct iosys_map map =
> IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
>
> while (head != tail) {
> - drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
> - xe_map_rd(xe, &map, 0, u32));
> + snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
> ++head;
> if (head == ctb->info.size) {
> head = 0;
> @@ -1131,20 +1133,140 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
> }
> }
>
> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
> +static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
> + struct drm_printer *p)
> +{
> + u32 head, tail;
> +
> + drm_printf(p, "\tsize: %d\n", snapshot->info.size);
> + drm_printf(p, "\tresv_space: %d\n", snapshot->info.space);
> + drm_printf(p, "\thead: %d\n", snapshot->info.head);
> + drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
> + drm_printf(p, "\tspace: %d\n", snapshot->info.space);
> + drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
> + drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
> + drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
> + drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
> +
> + if (!snapshot->cmds)
> + return;
> +
> + head = snapshot->desc.head;
> + tail = snapshot->desc.tail;
> +
> + while (head != tail) {
> + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
> + snapshot->cmds[head]);
> + ++head;
> + if (head == snapshot->info.size)
> + head = 0;
> + }
> +}
> +
> +static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
> {
> + kfree(snapshot->cmds);
> +}
> +
> +/**
> + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
> + * @ct: GuC CT object.
> + * @atomic: Boolean to indicate if this is called from atomic context like
> + * reset or CTB handler or from some regular path like debugfs.
> + *
> + * This can be printed out in a later stage like during dev_coredump
> + * analysis.
> + *
> + * Returns: a GuC CT snapshot object that must be freed by the caller
> + * by using `xe_guc_ct_snapshot_free`.
> + */
> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
> + bool atomic)
> +{
> + struct xe_device *xe = ct_to_xe(ct);
> + struct xe_guc_ct_snapshot *snapshot;
> +
> + snapshot = kzalloc(sizeof(*snapshot),
> + atomic ? GFP_ATOMIC : GFP_KERNEL);
> +
> + if (!snapshot) {
> + drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
> + return NULL;
> + }
> +
> if (ct->enabled) {
> + snapshot->ct_enabled = true;
> + guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
> + &snapshot->h2g, atomic);
> + guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
> + &snapshot->g2h, atomic);
> + }
> +
> + return snapshot;
> +}
> +
> +/**
> + * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
> + * @snapshot: GuC CT snapshot object.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function prints out a given GuC CT snapshot object.
> + */
> +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
> + struct drm_printer *p)
> +{
> + if (!snapshot)
> + return;
> +
> + if (snapshot->ct_enabled) {
> drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
> - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
> + guc_ctb_snapshot_print(&snapshot->h2g, p);
>
> drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
> - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
> - drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
> + guc_ctb_snapshot_print(&snapshot->g2h, p);
> +
> + drm_printf(p, "\tg2h outstanding: %d\n",
> + snapshot->g2h_outstanding);
> } else {
> drm_puts(p, "\nCT disabled\n");
> }
> }
>
> +/**
> + * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
> + * @snapshot: GuC CT snapshot object.
> + *
> + * This function free all the memory that needed to be allocated at capture
> + * time.
> + */
> +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
> +{
> + if (!snapshot)
> + return;
> +
> + guc_ctb_snapshot_free(&snapshot->h2g);
> + guc_ctb_snapshot_free(&snapshot->g2h);
> + kfree(snapshot);
> +}
> +
> +/**
> + * xe_guc_ct_print - GuC CT Print.
> + * @ct: GuC CT.
> + * @p: drm_printer where it will be printed out.
> + * @atomic: Boolean to indicate if this is called from atomic context like
> + * reset or CTB handler or from some regular path like debugfs.
> + *
> + * This function quickly capture a snapshot and immediately print it out.
> + */
> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
> +{
> + struct xe_guc_ct_snapshot *snapshot;
> +
> + snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
> + xe_guc_ct_snapshot_print(snapshot, p);
> + xe_guc_ct_snapshot_free(snapshot);
> +}
> +
> #ifdef XE_GUC_CT_SELFTEST
> /*
> * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
> @@ -1166,7 +1288,7 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
> ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1);
> if (ret) {
> drm_printf(p, "Aborted pass %d, ret %d\n", i, ret);
> - xe_guc_ct_print(ct, p);
> + xe_guc_ct_print(ct, p, true);
> break;
> }
> }
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
> index 49fb74f91e4d..3e04ee64652c 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.h
> @@ -13,9 +13,15 @@ struct drm_printer;
> int xe_guc_ct_init(struct xe_guc_ct *ct);
> int xe_guc_ct_enable(struct xe_guc_ct *ct);
> void xe_guc_ct_disable(struct xe_guc_ct *ct);
> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
> void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
>
> +struct xe_guc_ct_snapshot *
> +xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
> +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
> + struct drm_printer *p);
> +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
> +
> static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
> {
> wake_up_all(&ct->wq);
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> index 64e3dd14d4b2..93046d95b009 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> @@ -48,6 +48,32 @@ struct guc_ctb {
> struct guc_ctb_info info;
> };
>
> +/**
> + * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
> + */
> +struct guc_ctb_snapshot {
> + /** @desc: snapshot of the CTB descriptor */
> + struct guc_ct_buffer_desc desc;
> + /** @cmds: snapshot of the CTB commands */
> + u32 *cmds;
> + /** @info: snapshot of the CTB info */
> + struct guc_ctb_info info;
> +};
> +
> +/**
> + * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
> + */
> +struct xe_guc_ct_snapshot {
> + /** @ct_enabled: CT enabled info at capture time. */
> + bool ct_enabled;
> + /** @g2h_outstanding: G2H outstanding info at the capture time */
> + u32 g2h_outstanding;
> + /** @g2h: G2H CTB snapshot */
> + struct guc_ctb_snapshot g2h;
> + /** @h2g: H2G CTB snapshot */
> + struct guc_ctb_snapshot h2g;
> +};
> +
> /**
> * struct xe_guc_ct - GuC command transport (CT) layer
> *
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 231fb4145297..9e2950f1fd0d 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -764,7 +764,7 @@ static void simple_error_capture(struct xe_engine *e)
> }
>
> xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
> - xe_guc_ct_print(&guc->ct, &p);
> + xe_guc_ct_print(&guc->ct, &p, true);
> guc_engine_print(e, &p);
> for_each_hw_engine(hwe, guc_to_gt(guc), id) {
> if (hwe->class != e->hwe->class ||
> --
> 2.39.2
>
More information about the Intel-xe
mailing list