[Intel-xe] [PATCH v3 10/11] drm/xe: Add HW Engine snapshot to xe_devcoredump.
Matthew Brost
matthew.brost at intel.com
Tue May 9 01:08:42 UTC 2023
On Mon, May 08, 2023 at 05:08:24PM -0400, Rodrigo Vivi wrote:
> Let's continue to add our existent simple logs to devcoredump one
> by one. Any format change should come on follow-up work.
>
> v2: remove unnecessary, and now duplicated, dma_fence annotation. (Matthew)
>
> Cc: Matthew Brost <matthew.brost at intel.com>
Reviewed-by: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> ---
> drivers/gpu/drm/xe/xe_devcoredump.c | 43 +++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_devcoredump_types.h | 4 +++
> 2 files changed, 47 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index b9f343c74828..2175f1ef0501 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -9,10 +9,13 @@
> #include <linux/devcoredump.h>
> #include <generated/utsrelease.h>
>
> +#include "xe_device.h"
> #include "xe_engine.h"
> +#include "xe_force_wake.h"
> #include "xe_gt.h"
> #include "xe_guc_ct.h"
> #include "xe_guc_submit.h"
> +#include "xe_hw_engine.h"
>
> /**
> * DOC: Xe device coredump
> @@ -62,6 +65,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> struct drm_printer p;
> struct drm_print_iterator iter;
> struct timespec64 ts;
> + struct xe_engine *e;
> + struct xe_hw_engine *hwe;
> + enum xe_hw_engine_id id;
>
> /* Our device is gone already... */
> if (!data || !coredump_to_xe(coredump))
> @@ -75,6 +81,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> mutex_lock(&coredump->lock);
>
> ss = &coredump->snapshot;
> + e = coredump->faulty_engine;
> p = drm_coredump_printer(&iter);
>
> drm_printf(&p, "**** Xe Device Coredump ****\n");
> @@ -92,6 +99,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
> xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p);
>
> + drm_printf(&p, "\n**** HW Engines ****\n");
> + for_each_hw_engine(hwe, e->gt, id)
> + xe_hw_engine_snapshot_print(coredump->snapshot.hwe[id], &p);
> +
> mutex_unlock(&coredump->lock);
>
> return count - iter.remain;
> @@ -100,6 +111,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> static void xe_devcoredump_free(void *data)
> {
> struct xe_devcoredump *coredump = data;
> + struct xe_hw_engine *hwe;
> + enum xe_hw_engine_id id;
>
> /* Our device is gone. Nothing to do... */
> if (!data || !coredump_to_xe(coredump))
> @@ -109,6 +122,8 @@ static void xe_devcoredump_free(void *data)
>
> xe_guc_ct_snapshot_free(coredump->snapshot.ct);
> xe_guc_engine_snapshot_free(coredump->snapshot.ge);
> + for_each_hw_engine(hwe, coredump->faulty_engine->gt, id)
> + xe_hw_engine_snapshot_free(coredump->snapshot.hwe[id]);
>
> coredump->faulty_engine = NULL;
> drm_info(&coredump_to_xe(coredump)->drm,
> @@ -122,13 +137,41 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump)
> struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
> struct xe_engine *e = coredump->faulty_engine;
> struct xe_guc *guc = engine_to_guc(e);
> + struct xe_hw_engine *hwe;
> + enum xe_hw_engine_id id;
> + u32 adj_logical_mask = e->logical_mask;
> + u32 width_mask = (0x1 << e->width) - 1;
> + int i;
>
> lockdep_assert_held(&coredump->lock);
> ss->snapshot_time = ktime_get_real();
> ss->boot_time = ktime_get_boottime();
>
> + for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> + if (adj_logical_mask & BIT(i)) {
> + adj_logical_mask |= width_mask << i;
> + i += e->width;
> + } else {
> + ++i;
> + }
> + }
> +
> + xe_force_wake_get(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
> +
> coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
> coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e, true);
> +
> + for_each_hw_engine(hwe, e->gt, id) {
> + if (hwe->class != e->hwe->class ||
> + !(BIT(hwe->logical_instance) & adj_logical_mask)) {
> + coredump->snapshot.hwe[id] = NULL;
> + continue;
> + }
> + coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe,
> + true);
> + }
> +
> + xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL);
> }
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> index e055b266af70..8b17ecf1b6e6 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> @@ -9,6 +9,8 @@
> #include <linux/ktime.h>
> #include <linux/mutex.h>
>
> +#include "xe_hw_engine_types.h"
> +
> struct xe_device;
>
> /**
> @@ -29,6 +31,8 @@ struct xe_devcoredump_snapshot {
> struct xe_guc_ct_snapshot *ct;
> /** @ge: Guc Engine snapshot */
> struct xe_guc_submit_engine_snapshot *ge;
> + /** @hwe: HW Engine snapshot array */
> + struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
> };
>
> /**
> --
> 2.39.2
>
More information about the Intel-xe
mailing list