[PATCH v2] drm/xe: Faster devcoredump

Zanoni, Paulo R paulo.r.zanoni at intel.com
Fri Jul 26 22:01:17 UTC 2024


On Thu, 2024-07-25 at 22:21 -0700, Matthew Brost wrote:
> The current algorithm to read out devcoredump is O(N*N) where N is the
> size of coredump due to usage of the drm_coredump_printer in
> xe_devcoredump_read. Switch to a O(N) algorithm which prints the
> devcoredump into a readable format in snapshot work and update
> xe_devcoredump_read to memcpy from the readable format directly.

I just tested this:

root at martianriver:~# time cp /sys/class/drm/card0/device/devcoredump/data gpu-hang.data

real	0m0.313s
user	0m0.008s
sys	0m0.298s
root at martianriver:~# ls -lh gpu-hang.data 
-rw------- 1 root root 221M Jul 26 14:47 gpu-hang.data

Going from an estimated 221 minutes to 0.3 seconds, I'd say it's an improvement.

> 
> v2:
>  - Fix double free on devcoredump removal (Testing)
>  - Set read_data_size after snap work flush
>  - Adjust remaining in iterator upon realloc (Testing)
>  - Set read_data upon realloc (Testing)
> 
> Reported-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2408
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_devcoredump.c       | 140 +++++++++++++++++-----
>  drivers/gpu/drm/xe/xe_devcoredump.h       |  13 ++
>  drivers/gpu/drm/xe/xe_devcoredump_types.h |   4 +
>  drivers/gpu/drm/xe/xe_vm.c                |   9 +-
>  drivers/gpu/drm/xe/xe_vm.h                |   4 +-
>  5 files changed, 136 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index d8d8ca2c19d3..6af161250a9e 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
>  	return &q->gt->uc.guc;
>  }
>  
> -static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
> +static void __xe_devcoredump_read(char *buffer, size_t count,
> +				  struct xe_devcoredump *coredump)
>  {
> -	struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
> -
> -	/* keep going if fw fails as we still want to save the memory and SW data */
> -	if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
> -		xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
> -	xe_vm_snapshot_capture_delayed(ss->vm);
> -	xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
> -	xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
> -}
> -
> -static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> -				   size_t count, void *data, size_t datalen)
> -{
> -	struct xe_devcoredump *coredump = data;
>  	struct xe_device *xe;
>  	struct xe_devcoredump_snapshot *ss;
>  	struct drm_printer p;
> @@ -89,18 +76,12 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>  	struct timespec64 ts;
>  	int i;
>  
> -	if (!coredump)
> -		return -ENODEV;
> -
>  	xe = coredump_to_xe(coredump);
>  	ss = &coredump->snapshot;
>  
> -	/* Ensure delayed work is captured before continuing */
> -	flush_work(&ss->work);
> -
>  	iter.data = buffer;
>  	iter.offset = 0;
> -	iter.start = offset;
> +	iter.start = 0;
>  	iter.remain = count;
>  
>  	p = drm_coredump_printer(&iter);
> @@ -129,15 +110,86 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>  			xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
>  						    &p);
>  	drm_printf(&p, "\n**** VM state ****\n");
> -	xe_vm_snapshot_print(coredump->snapshot.vm, &p);
> +	xe_vm_snapshot_print(ss, coredump->snapshot.vm, &p);
>  
> -	return count - iter.remain;
> +	ss->read_data_size = iter.offset;
> +}
> +
> +static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
> +{
> +	int i;
> +
> +	xe_guc_ct_snapshot_free(ss->ct);
> +	ss->ct = NULL;
> +
> +	xe_guc_exec_queue_snapshot_free(ss->ge);
> +	ss->ge = NULL;
> +
> +	xe_sched_job_snapshot_free(ss->job);
> +	ss->job = NULL;
> +
> +	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
> +		if (ss->hwe[i]) {
> +			xe_hw_engine_snapshot_free(ss->hwe[i]);
> +			ss->hwe[i] = NULL;
> +		}
> +
> +	xe_vm_snapshot_free(ss->vm);
> +	ss->vm = NULL;
> +}
> +
> +static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
> +{
> +	struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
> +	struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
> +
> +	/* keep going if fw fails as we still want to save the memory and SW data */
> +	if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
> +		xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
> +	xe_vm_snapshot_capture_delayed(ss->vm);
> +	xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
> +	xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
> +
> +	ss->read_data = kvmalloc(SZ_16M, GFP_USER);
> +	if (!ss->read_data)
> +		return;
> +
> +	ss->read_data_size = SZ_16M;
> +	__xe_devcoredump_read(ss->read_data, SZ_16M, coredump);
> +	xe_devcoredump_snapshot_free(ss);
> +}
> +
> +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> +				   size_t count, void *data, size_t datalen)
> +{
> +	struct xe_devcoredump *coredump = data;
> +	struct xe_devcoredump_snapshot *ss;
> +	ssize_t byte_copied;
> +
> +	if (!coredump)
> +		return -ENODEV;
> +
> +	ss = &coredump->snapshot;
> +
> +	/* Ensure delayed work is captured before continuing */
> +	flush_work(&ss->work);
> +
> +	if (!ss->read_data)
> +		return -ENODEV;
> +
> +	if (offset >= ss->read_data_size)
> +		return 0;
> +
> +	byte_copied = count < ss->read_data_size - offset ? count :
> +		ss->read_data_size - offset;
> +	memcpy(buffer, ss->read_data + offset, byte_copied);
> +
> +	return byte_copied;
>  }
>  
>  static void xe_devcoredump_free(void *data)
>  {
>  	struct xe_devcoredump *coredump = data;
> -	int i;
>  
>  	/* Our device is gone. Nothing to do... */
>  	if (!data || !coredump_to_xe(coredump))
> @@ -145,13 +197,8 @@ static void xe_devcoredump_free(void *data)
>  
>  	cancel_work_sync(&coredump->snapshot.work);
>  
> -	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
> -	xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
> -	xe_sched_job_snapshot_free(coredump->snapshot.job);
> -	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
> -		if (coredump->snapshot.hwe[i])
> -			xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
> -	xe_vm_snapshot_free(coredump->snapshot.vm);
> +	xe_devcoredump_snapshot_free(&coredump->snapshot);
> +	kvfree(coredump->snapshot.read_data);
>  
>  	/* To prevent stale data on next snapshot, clear everything */
>  	memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
> @@ -260,4 +307,33 @@ int xe_devcoredump_init(struct xe_device *xe)
>  {
>  	return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
>  }
> +
> +int xe_devcoredump_check_iterator(struct xe_devcoredump_snapshot *ss,
> +				  struct drm_printer *p, size_t len)
> +{
> +	struct drm_print_iterator *iterator = p->arg;
> +	size_t new_size;
> +	void *new_data;
> +
> +	if (ss->read_data_size > iterator->offset + len + SZ_4K)
> +		return 0;
> +
> +	new_size = roundup_pow_of_two(iterator->offset + len + SZ_4K);
> +
> +	new_data = kvmalloc(new_size, GFP_USER);
> +	if (!new_data)
> +		return -ENOMEM;
> +
> +	memcpy(new_data, iterator->data, iterator->offset);
> +
> +	kvfree(iterator->data);
> +
> +	iterator->data = new_data;
> +	iterator->remain += new_size - ss->read_data_size;
> +	ss->read_data_size = new_size;
> +	ss->read_data = new_data;
> +
> +	return 0;
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
> index e2fa65ce0932..8bbef2244d5c 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.h
> @@ -6,12 +6,18 @@
>  #ifndef _XE_DEVCOREDUMP_H_
>  #define _XE_DEVCOREDUMP_H_
>  
> +#include <linux/types.h>
> +
> +struct drm_printer;
> +struct xe_devcoredump_snapshot;
>  struct xe_device;
>  struct xe_sched_job;
>  
>  #ifdef CONFIG_DEV_COREDUMP
>  void xe_devcoredump(struct xe_sched_job *job);
>  int xe_devcoredump_init(struct xe_device *xe);
> +int xe_devcoredump_check_iterator(struct xe_devcoredump_snapshot *ss,
> +				  struct drm_printer *p, size_t len);
>  #else
>  static inline void xe_devcoredump(struct xe_sched_job *job)
>  {
> @@ -21,6 +27,13 @@ static inline int xe_devcoredump_init(struct xe_device *xe)
>  {
>  	return 0;
>  }
> +
> +static inline int
> +xe_devcoredump_check_iterator(struct xe_devcoredump_snapshot *ss,
> +			      struct drm_printer *p, size_t len)
> +{
> +	return 0;
> +}
>  #endif
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> index 923cdf72a816..884802e46a98 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> @@ -46,6 +46,10 @@ struct xe_devcoredump_snapshot {
>  	struct xe_sched_job_snapshot *job;
>  	/** @vm: Snapshot of VM state */
>  	struct xe_vm_snapshot *vm;
> +	/** @read_data_size: size of read data */
> +	size_t read_data_size;
> +	/** @read_data: Read data */
> +	void *read_data;
>  };
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 12d43c35978c..5cf45cb83676 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -25,6 +25,7 @@
>  #include "xe_assert.h"
>  #include "xe_bo.h"
>  #include "xe_device.h"
> +#include "xe_devcoredump.h"
>  #include "xe_drm_client.h"
>  #include "xe_exec_queue.h"
>  #include "xe_gt_pagefault.h"
> @@ -3366,9 +3367,11 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
>  	}
>  }
>  
> -void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
> +void xe_vm_snapshot_print(struct xe_devcoredump_snapshot *ss,
> +			  struct xe_vm_snapshot *snap, struct drm_printer *p)
>  {
>  	unsigned long i, j;
> +	int err;
>  
>  	if (IS_ERR_OR_NULL(snap)) {
>  		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
> @@ -3384,6 +3387,10 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
>  			continue;
>  		}
>  
> +		err = xe_devcoredump_check_iterator(ss, p, snap->snap[i].len);
> +		if (err < 0)
> +			return;
> +
>  		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
>  
>  		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index c864dba35e1d..4537420b9c94 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -19,6 +19,7 @@ struct drm_file;
>  struct ttm_buffer_object;
>  struct ttm_validate_buffer;
>  
> +struct xe_devcoredump_snapshot;
>  struct xe_exec_queue;
>  struct xe_file;
>  struct xe_sync_entry;
> @@ -279,5 +280,6 @@ static inline void vm_dbg(const struct drm_device *dev,
>  
>  struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm);
>  void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
> -void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
> +void xe_vm_snapshot_print(struct xe_devcoredump_snapshot *ss,
> +			  struct xe_vm_snapshot *snap, struct drm_printer *p);
>  void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);



More information about the Intel-xe mailing list