[PATCH v4 3/3] drm/xe: Faster devcoredump
Matthew Brost
matthew.brost at intel.com
Wed Jul 31 21:32:21 UTC 2024
The current algorithm to read out devcoredump is O(N*N) where N is the
size of coredump due to usage of the drm_coredump_printer in
xe_devcoredump_read. Switch to a O(N) algorithm which prints the
devcoredump into a readable format in snapshot work and update
xe_devcoredump_read to memcpy from the readable format directly.
v2:
- Fix double free on devcoredump removal (Testing)
- Set read_data_size after snap work flush
- Adjust remaining in iterator upon realloc (Testing)
- Set read_data upon realloc (Testing)
v3:
- Kernel doc
Reported-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2408
Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_devcoredump.c | 112 ++++++++++++++++------
drivers/gpu/drm/xe/xe_devcoredump_types.h | 4 +
2 files changed, 85 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index d8d8ca2c19d3..b802a35c22f2 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
return &q->gt->uc.guc;
}
-static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
-{
- struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
-
- /* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
- xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
- xe_vm_snapshot_capture_delayed(ss->vm);
- xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
- xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
-}
-
-static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
- size_t count, void *data, size_t datalen)
+static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
+ struct xe_devcoredump *coredump)
{
- struct xe_devcoredump *coredump = data;
struct xe_device *xe;
struct xe_devcoredump_snapshot *ss;
struct drm_printer p;
@@ -89,18 +76,12 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
struct timespec64 ts;
int i;
- if (!coredump)
- return -ENODEV;
-
xe = coredump_to_xe(coredump);
ss = &coredump->snapshot;
- /* Ensure delayed work is captured before continuing */
- flush_work(&ss->work);
-
iter.data = buffer;
iter.offset = 0;
- iter.start = offset;
+ iter.start = 0;
iter.remain = count;
p = drm_coredump_printer(&iter);
@@ -131,13 +112,86 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
drm_printf(&p, "\n**** VM state ****\n");
xe_vm_snapshot_print(coredump->snapshot.vm, &p);
- return count - iter.remain;
+ return iter.offset;
+}
+
+static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
+{
+ int i;
+
+ xe_guc_ct_snapshot_free(ss->ct);
+ ss->ct = NULL;
+
+ xe_guc_exec_queue_snapshot_free(ss->ge);
+ ss->ge = NULL;
+
+ xe_sched_job_snapshot_free(ss->job);
+ ss->job = NULL;
+
+ for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+ if (ss->hwe[i]) {
+ xe_hw_engine_snapshot_free(ss->hwe[i]);
+ ss->hwe[i] = NULL;
+ }
+
+ xe_vm_snapshot_free(ss->vm);
+ ss->vm = NULL;
+}
+
+static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
+{
+ struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
+ struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
+
+ /* keep going if fw fails as we still want to save the memory and SW data */
+ if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+ xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+ xe_vm_snapshot_capture_delayed(ss->vm);
+ xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
+ xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+
+ /* Calculate devcoredump size */
+ ss->read_data_size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
+
+ ss->read_data = kvmalloc(ss->read_data_size, GFP_USER);
+ if (!ss->read_data)
+ return;
+
+ __xe_devcoredump_read(ss->read_data, ss->read_data_size, coredump);
+ xe_devcoredump_snapshot_free(ss);
+}
+
+static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
+ size_t count, void *data, size_t datalen)
+{
+ struct xe_devcoredump *coredump = data;
+ struct xe_devcoredump_snapshot *ss;
+ ssize_t byte_copied;
+
+ if (!coredump)
+ return -ENODEV;
+
+ ss = &coredump->snapshot;
+
+ /* Ensure delayed work is captured before continuing */
+ flush_work(&ss->work);
+
+ if (!ss->read_data)
+ return -ENODEV;
+
+ if (offset >= ss->read_data_size)
+ return 0;
+
+ byte_copied = count < ss->read_data_size - offset ? count :
+ ss->read_data_size - offset;
+ memcpy(buffer, ss->read_data + offset, byte_copied);
+
+ return byte_copied;
}
static void xe_devcoredump_free(void *data)
{
struct xe_devcoredump *coredump = data;
- int i;
/* Our device is gone. Nothing to do... */
if (!data || !coredump_to_xe(coredump))
@@ -145,13 +199,8 @@ static void xe_devcoredump_free(void *data)
cancel_work_sync(&coredump->snapshot.work);
- xe_guc_ct_snapshot_free(coredump->snapshot.ct);
- xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
- xe_sched_job_snapshot_free(coredump->snapshot.job);
- for (i = 0; i < XE_NUM_HW_ENGINES; i++)
- if (coredump->snapshot.hwe[i])
- xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
- xe_vm_snapshot_free(coredump->snapshot.vm);
+ xe_devcoredump_snapshot_free(&coredump->snapshot);
+ kvfree(coredump->snapshot.read_data);
/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
@@ -260,4 +309,5 @@ int xe_devcoredump_init(struct xe_device *xe)
{
return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 923cdf72a816..0298037edae4 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -46,6 +46,10 @@ struct xe_devcoredump_snapshot {
struct xe_sched_job_snapshot *job;
/** @vm: Snapshot of VM state */
struct xe_vm_snapshot *vm;
+ /** @read_data_size: size of read data */
+ ssize_t read_data_size;
+ /** @read_data: Read data */
+ void *read_data;
};
/**
--
2.34.1
More information about the dri-devel
mailing list