[PATCH v4 3/3] drm/xe: Faster devcoredump
Cavitt, Jonathan
jonathan.cavitt at intel.com
Wed Jul 31 22:31:01 UTC 2024
-----Original Message-----
From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew Brost
Sent: Wednesday, July 31, 2024 2:32 PM
To: intel-xe at lists.freedesktop.org; dri-devel at lists.freedesktop.org
Cc: maarten.lankhorst at linux.intel.com; Vivi, Rodrigo <rodrigo.vivi at intel.com>
Subject: [PATCH v4 3/3] drm/xe: Faster devcoredump
>
> The current algorithm to read out devcoredump is O(N*N) where N is the
> size of coredump due to usage of the drm_coredump_printer in
> xe_devcoredump_read. Switch to a O(N) algorithm which prints the
> devcoredump into a readable format in snapshot work and update
> xe_devcoredump_read to memcpy from the readable format directly.
>
> v2:
> - Fix double free on devcoredump removal (Testing)
> - Set read_data_size after snap work flush
> - Adjust remaining in iterator upon realloc (Testing)
> - Set read_data upon realloc (Testing)
> v3:
> - Kernel doc
>
> Reported-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2408
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_devcoredump.c | 112 ++++++++++++++++------
> drivers/gpu/drm/xe/xe_devcoredump_types.h | 4 +
> 2 files changed, 85 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index d8d8ca2c19d3..b802a35c22f2 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
> return &q->gt->uc.guc;
> }
>
> -static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
> -{
> - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
> -
> - /* keep going if fw fails as we still want to save the memory and SW data */
> - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
> - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
> - xe_vm_snapshot_capture_delayed(ss->vm);
> - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
> - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
> -}
> -
> -static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> - size_t count, void *data, size_t datalen)
> +static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
> + struct xe_devcoredump *coredump)
> {
> - struct xe_devcoredump *coredump = data;
> struct xe_device *xe;
> struct xe_devcoredump_snapshot *ss;
> struct drm_printer p;
> @@ -89,18 +76,12 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> struct timespec64 ts;
> int i;
>
> - if (!coredump)
> - return -ENODEV;
> -
> xe = coredump_to_xe(coredump);
> ss = &coredump->snapshot;
>
> - /* Ensure delayed work is captured before continuing */
> - flush_work(&ss->work);
> -
> iter.data = buffer;
> iter.offset = 0;
> - iter.start = offset;
> + iter.start = 0;
> iter.remain = count;
>
> p = drm_coredump_printer(&iter);
> @@ -131,13 +112,86 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> drm_printf(&p, "\n**** VM state ****\n");
> xe_vm_snapshot_print(coredump->snapshot.vm, &p);
>
> - return count - iter.remain;
> + return iter.offset;
> +}
> +
> +static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
> +{
> + int i;
> +
> + xe_guc_ct_snapshot_free(ss->ct);
> + ss->ct = NULL;
> +
> + xe_guc_exec_queue_snapshot_free(ss->ge);
> + ss->ge = NULL;
> +
> + xe_sched_job_snapshot_free(ss->job);
> + ss->job = NULL;
> +
> + for (i = 0; i < XE_NUM_HW_ENGINES; i++)
> + if (ss->hwe[i]) {
> + xe_hw_engine_snapshot_free(ss->hwe[i]);
> + ss->hwe[i] = NULL;
> + }
> +
> + xe_vm_snapshot_free(ss->vm);
> + ss->vm = NULL;
> +}
> +
> +static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
> +{
> + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
> + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
> +
> + /* keep going if fw fails as we still want to save the memory and SW data */
> + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
> + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
> + xe_vm_snapshot_capture_delayed(ss->vm);
> + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
> + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
> +
> + /* Calculate devcoredump size */
> + ss->read_data_size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
> +
> + ss->read_data = kvmalloc(ss->read_data_size, GFP_USER);
> + if (!ss->read_data)
> + return;
> +
> + __xe_devcoredump_read(ss->read_data, ss->read_data_size, coredump);
> + xe_devcoredump_snapshot_free(ss);
> +}
> +
> +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> + size_t count, void *data, size_t datalen)
> +{
> + struct xe_devcoredump *coredump = data;
> + struct xe_devcoredump_snapshot *ss;
> + ssize_t byte_copied;
> +
> + if (!coredump)
> + return -ENODEV;
> +
> + ss = &coredump->snapshot;
> +
> + /* Ensure delayed work is captured before continuing */
> + flush_work(&ss->work);
> +
> + if (!ss->read_data)
> + return -ENODEV;
> +
> + if (offset >= ss->read_data_size)
> + return 0;
> +
> + byte_copied = count < ss->read_data_size - offset ? count :
> + ss->read_data_size - offset;
> + memcpy(buffer, ss->read_data + offset, byte_copied);
> +
> + return byte_copied;
> }
>
> static void xe_devcoredump_free(void *data)
> {
> struct xe_devcoredump *coredump = data;
> - int i;
>
> /* Our device is gone. Nothing to do... */
> if (!data || !coredump_to_xe(coredump))
> @@ -145,13 +199,8 @@ static void xe_devcoredump_free(void *data)
>
> cancel_work_sync(&coredump->snapshot.work);
>
> - xe_guc_ct_snapshot_free(coredump->snapshot.ct);
> - xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
> - xe_sched_job_snapshot_free(coredump->snapshot.job);
> - for (i = 0; i < XE_NUM_HW_ENGINES; i++)
> - if (coredump->snapshot.hwe[i])
> - xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
> - xe_vm_snapshot_free(coredump->snapshot.vm);
> + xe_devcoredump_snapshot_free(&coredump->snapshot);
> + kvfree(coredump->snapshot.read_data);
>
> /* To prevent stale data on next snapshot, clear everything */
> memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
> @@ -260,4 +309,5 @@ int xe_devcoredump_init(struct xe_device *xe)
> {
> return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
> }
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> index 923cdf72a816..0298037edae4 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> @@ -46,6 +46,10 @@ struct xe_devcoredump_snapshot {
> struct xe_sched_job_snapshot *job;
> /** @vm: Snapshot of VM state */
> struct xe_vm_snapshot *vm;
> + /** @read_data_size: size of read data */
> + ssize_t read_data_size;
> + /** @read_data: Read data */
> + void *read_data;
Perhaps the data size and read data can be stapled together under the same
struct?
struct {
ssize_t size;
void *data;
} read_data;
It's not that big of a deal if this is untouched, just something to consider.
Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
-Jonathan Cavitt
> };
>
> /**
> --
> 2.34.1
>
>
More information about the dri-devel
mailing list