[PATCH v5 1/4] drm/xe: Add devcoredump chunking

Wed Apr 16 21:35:42 UTC 2025

On 4/16/2025 1:35 PM, Matthew Brost wrote:
> On Wed, Apr 16, 2025 at 12:30:27PM -0700, John Harrison wrote:
>> On 4/15/2025 10:44 PM, Matthew Brost wrote:
>>> On Tue, Apr 15, 2025 at 04:18:29PM -0700, John Harrison wrote:
>>>> On 4/15/2025 11:39 AM, Matthew Brost wrote:
>>>>> Chunk devcoredump into 1.5G pieces to avoid hitting the kvmalloc limit
>>>>> of 2G. Simple algorithm reads 1.5G at time in xe_devcoredump_read
>>>>> callback as needed.
>>>>>
>>>>> Some memory allocations are changed to GFP_ATOMIC as they done in
>>>>> xe_devcoredump_read which holds lock in the path of reclaim. The
>>>>> allocations are small, so in practice should never fail.
>>>>>
>>>>> v2:
>>>>>     - Update commit message wrt gfp atomic (John H)
>>>>>
>>>>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>>>>> Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/xe/xe_devcoredump.c       | 59 ++++++++++++++++++-----
>>>>>     drivers/gpu/drm/xe/xe_devcoredump_types.h |  2 +
>>>>>     drivers/gpu/drm/xe/xe_guc_hwconfig.c      |  2 +-
>>>>>     3 files changed, 50 insertions(+), 13 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
>>>>> index 81b9d9bb3f57..a9e618abf8ac 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
>>>>> @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
>>>>>     	return &q->gt->uc.guc;
>>>>>     }
>>>>> -static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
>>>>> +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count,
>>>>> +				     ssize_t start,
>>>>>     				     struct xe_devcoredump *coredump)
>>>>>     {
>>>>>     	struct xe_device *xe;
>>>>> @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
>>>>>     	ss = &coredump->snapshot;
>>>>>     	iter.data = buffer;
>>>>> -	iter.start = 0;
>>>>> +	iter.start = start;
>>>>>     	iter.remain = count;
>>>>>     	p = drm_coredump_printer(&iter);
>>>>> @@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
>>>>>     	ss->vm = NULL;
>>>>>     }
>>>>> +#define XE_DEVCOREDUMP_CHUNK_MAX	(SZ_512M + SZ_1G)
>>>>> +
>>>>>     static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>>>>>     				   size_t count, void *data, size_t datalen)
>>>>>     {
>>>>> @@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>>>>>     	/* Ensure delayed work is captured before continuing */
>>>>>     	flush_work(&ss->work);
>>>>> +	if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
>>>>> +		xe_pm_runtime_get(gt_to_xe(ss->gt));
>>>>> +
>>>>>     	mutex_lock(&coredump->lock);
>>>>>     	if (!ss->read.buffer) {
>>>>> @@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>>>>>     		return 0;
>>>>>     	}
>>>>> +	if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX ||
>>>>> +	    offset < ss->read.chunk_position) {
>>>>> +		ss->read.chunk_position =
>>>>> +			ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX);
>>>>> +
>>>>> +		__xe_devcoredump_read(ss->read.buffer,
>>>>> +				      XE_DEVCOREDUMP_CHUNK_MAX,
>>>>> +				      ss->read.chunk_position, coredump);
>>>>> +	}
>>>>> +
>>>>>     	byte_copied = count < ss->read.size - offset ? count :
>>>>>     		ss->read.size - offset;
>>>>> -	memcpy(buffer, ss->read.buffer + offset, byte_copied);
>>>>> +	memcpy(buffer, ss->read.buffer +
>>>>> +	       (offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied);
>>>>>     	mutex_unlock(&coredump->lock);
>>>>> +	if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
>>>>> +		xe_pm_runtime_put(gt_to_xe(ss->gt));
>>>>> +
>>>>>     	return byte_copied;
>>>>>     }
>>>>> @@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
>>>>>     	xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
>>>>>     	xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
>>>>> -	xe_pm_runtime_put(xe);
>>>>> +	ss->read.chunk_position = 0;
>>>>>     	/* Calculate devcoredump size */
>>>>> -	ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
>>>>> -
>>>>> -	ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
>>>>> -	if (!ss->read.buffer)
>>>>> -		return;
>>>>> +	ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump);
>>>>> +
>>>>> +	if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) {
>>>>> +		ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX,
>>>>> +					   GFP_USER);
>>>>> +		if (!ss->read.buffer)
>>>>> +			goto put_pm;
>>>>> +
>>>>> +		__xe_devcoredump_read(ss->read.buffer,
>>>>> +				      XE_DEVCOREDUMP_CHUNK_MAX,
>>>>> +				      0, coredump);
>>>>> +	} else {
>>>>> +		ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
>>>>> +		if (!ss->read.buffer)
>>>>> +			goto put_pm;
>>>>> +
>>>>> +		__xe_devcoredump_read(ss->read.buffer, ss->read.size, 0,
>>>>> +				      coredump);
>>>>> +		xe_devcoredump_snapshot_free(ss);
>>>>> +	}
>>>>> -	__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
>>>>> -	xe_devcoredump_snapshot_free(ss);
>>>>> +put_pm:
>>>>> +	xe_pm_runtime_put(xe);
>>>>>     }
>>>>>     static void devcoredump_snapshot(struct xe_devcoredump *coredump,
>>>>> @@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi
>>>>>     	if (offset & 3)
>>>>>     		drm_printf(p, "Offset not word aligned: %zu", offset);
>>>>> -	line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
>>>>> +	line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC);
>>>>>     	if (!line_buff) {
>>>>>     		drm_printf(p, "Failed to allocate line buffer\n");
>>>>>     		return;
>>>>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
>>>>> index 1a1d16a96b2d..a174385a6d83 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
>>>>> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
>>>>> @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot {
>>>>>     	struct {
>>>>>     		/** @read.size: size of devcoredump in human readable format */
>>>>>     		ssize_t size;
>>>>> +		/** @read.chunk_position: position of devcoredump chunk */
>>>>> +		ssize_t chunk_position;
>>>>>     		/** @read.buffer: buffer of devcoredump in human readable format */
>>>>>     		char *buffer;
>>>>>     	} read;
>>>>> diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
>>>>> index af2c817d552c..21403a250834 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
>>>>> @@ -175,7 +175,7 @@ int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val)
>>>>>     	if (num_dw == 0)
>>>>>     		return -EINVAL;
>>>>> -	hwconfig = kzalloc(size, GFP_KERNEL);
>>>>> +	hwconfig = kzalloc(size, GFP_ATOMIC);
>>>> As per earlier comments, this should not be happening while dumping a core
>>>> dump. The only code which uses this is steering related. That implies we are
>>>> reading EU registers while printing out a previously saved core dump. That
>>>> either means we are unnecessarily reading registers during the print or we
>>>> are not capturing state in advance and only actually reading it out when
>>>> printing. Neither of which is good.
>>>>
>>> Ok, I don't have bandwidth to fix that. I'll let this series die then.
>>>
>>> Matt
>> Or maybe just add a FIXME comment? I've been meaning to take a look at this
>> myself but just haven't had the time.
>>
> I guess that is a better option. I can add a FIXME when merging.
>
>> If we need the large file support to allow people to debug issues then we
>> should get it in. Your patch is not causing the bad register accesses (I
> Yes, this is an ask. And no, this patch isn't causing bad register
> access rather exposing it via a lockdep splat.
>
>> don't think), that is something that is already broken in the driver. But I
>> don't think you should make this change with a comment suggesting it is all
>> fine and happy. The commit description should be more like "FIXME: This
>> should not not be happening, but until the unexpected register reads are
>> removed/moved make the alloc atomic so it doesn't go boom" and add a similar
>> comment to the allocation itself.
>>
> Ok, can fix commit too when merging.
>
> Matt
Sounds good.

Although I've worked out where the dodgy reads are coming from. It's the 
GuC backend for the register capture code. And it's not actually doing 
register reads, but just re-calculating the steering for the register 
value that it had previously read at capture time. It should be possible 
to cache that information as well, rather than re-calculate it at dump 
time. And that will get rid of the need for the atomic alloc. Will try 
to get something posted later today. Up to you if you want to hold off 
merging for another day or not.

John.

>
>> John.
>>
>>>> Rather than making allocations atomic when they don't need to be, we should
>>>> be fixing why the coredump is not doing a clean separation of save versus
>>>> print.
>>>>
>>>> John.
>>>>
>>>>
>>>>>     	if (!hwconfig)
>>>>>     		return -ENOMEM;