[PATCH v1 1/7] drm/amdgpu: make devcoredump reading fast

Christian König christian.koenig at amd.com
Wed May 21 12:51:18 UTC 2025


On 5/21/25 11:49, Pierre-Eric Pelloux-Prayer wrote:
> Update the way drm_coredump_printer is used based on its documentation
> and Xe's code: the main idea is to generate the final version in one go
> and then use memcpy to return the chunks requested by the caller of
> amdgpu_devcoredump_read.
> 
> This cuts the time to copy the dump from 40s to ~0s on my machine.

Well that indeed strongly sounds like something is going wrong, but I'm pretty sure that copying things into a temporary buffer is not the right solution.

My educated guess is that userspace reads the core dump in chunks of pages (or similar). So we end up generating the same dump over and over again and then returning only like 4k of it at a time.

How do you copy the core dump on your box?

Regards,
Christian.

> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c  | 43 +++++++++++++++++--
>  .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h  |  7 +++
>  2 files changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
> index 7b50741dc097..de70747a099d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
> @@ -34,6 +34,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
>  }
>  #else
>  
> +#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)
> +
>  const char *hw_ip_names[MAX_HWIP] = {
>  	[GC_HWIP]		= "GC",
>  	[HDP_HWIP]		= "HDP",
> @@ -196,11 +198,9 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
>  }
>  
>  static ssize_t
> -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
> -			void *data, size_t datalen)
> +__amdgpu_devcoredump_read(char *buffer, size_t count, struct amdgpu_coredump_info *coredump)
>  {
>  	struct drm_printer p;
> -	struct amdgpu_coredump_info *coredump = data;
>  	struct drm_print_iterator iter;
>  	struct amdgpu_vm_fault_info *fault_info;
>  	struct amdgpu_ip_block *ip_block;
> @@ -208,7 +208,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>  
>  	iter.data = buffer;
>  	iter.offset = 0;
> -	iter.start = offset;
>  	iter.remain = count;
>  
>  	p = drm_coredump_printer(&iter);
> @@ -321,8 +320,44 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>  	return count - iter.remain;
>  }
>  
> +static ssize_t
> +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
> +			void *data, size_t datalen)
> +{
> +	struct amdgpu_coredump_info *coredump = data;
> +	ssize_t byte_copied;
> +
> +	if (!coredump)
> +		return -ENODEV;
> +
> +	if (!coredump->read.buffer) {
> +		/* Do a one-time preparation of the coredump output because
> +		 * repeatingly calling drm_coredump_printer is very slow.
> +		 */
> +		coredump->read.size =
> +			__amdgpu_devcoredump_read(NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump);
> +		coredump->read.buffer = kvmalloc(coredump->read.size, GFP_USER);
> +		if (!coredump->read.buffer)
> +			return -ENODEV;
> +
> +		__amdgpu_devcoredump_read(coredump->read.buffer, coredump->read.size, coredump);
> +	}
> +
> +	if (offset >= coredump->read.size)
> +		return 0;
> +
> +	byte_copied = count < coredump->read.size - offset ? count :
> +		coredump->read.size - offset;
> +	memcpy(buffer, coredump->read.buffer + offset, byte_copied);
> +
> +	return byte_copied;
> +}
> +
>  static void amdgpu_devcoredump_free(void *data)
>  {
> +	struct amdgpu_coredump_info *coredump = data;
> +
> +	kvfree(coredump->read.buffer);
>  	kfree(data);
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
> index ef9772c6bcc9..33f2f6fdfcf7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
> @@ -38,6 +38,13 @@ struct amdgpu_coredump_info {
>  	bool                            skip_vram_check;
>  	bool                            reset_vram_lost;
>  	struct amdgpu_ring              *ring;
> +	/* Readable form of coredevdump, generate once to speed up
> +	 * reading it (see drm_coredump_printer's documentation).
> +	 */
> +	struct {
> +		ssize_t size;
> +		char *buffer;
> +	} read;
>  };
>  #endif
>  



More information about the amd-gfx mailing list