[PATCH 2/4] drm/xe/devcoredump: Print errno if VM snapshot was not captured

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Thu Mar 7 10:50:53 UTC 2024



On 2024-03-04 15:05, José Roberto de Souza wrote:
> My testing machine has only 8GB of RAM and while running piglit tests
> I can reach the OOM cache in xe_vm_snapshot_capture() snap allocaiton
> sometimes.
> 
> So to differentiate the OOM from race between capture and UMDs
> unbinbind VMs here I'm adding a '[0].error: -12' to devcoredump.
> 
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_devcoredump.c |  6 ++----
>   drivers/gpu/drm/xe/xe_vm.c          | 13 ++++++++++---
>   2 files changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 0fcd306803236..4ab0feca55cdd 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -117,10 +117,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>   		if (coredump->snapshot.hwe[i])
>   			xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
>   						    &p);
> -	if (coredump->snapshot.vm) {
> -		drm_printf(&p, "\n**** VM state ****\n");
> -		xe_vm_snapshot_print(coredump->snapshot.vm, &p);
> -	}
> +	drm_printf(&p, "\n**** VM state ****\n");
> +	xe_vm_snapshot_print(coredump->snapshot.vm, &p);
>   
>   	return count - iter.remain;
>   }
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index df9360a4c9e8e..f7d20bf9b33a9 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3336,8 +3336,10 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
>   
>   	if (num_snaps)
>   		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
> -	if (!snap)
> +	if (!snap) {
> +		snap = num_snaps ? ERR_PTR(-ENODEV) : ERR_PTR(-ENOMEM);
>   		goto out_unlock;
> +	}

You inverted -ENODEV and -ENOMEM here. Perhaps return earlier for 
!num_snaps instead of a ternary?


>   
>   	snap->num_snaps = num_snaps;
>   	i = 0;
> @@ -3377,7 +3379,7 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
>   
>   void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
>   {
> -	if (!snap)
> +	if (IS_ERR(snap))
>   		return;
>   
>   	for (int i = 0; i < snap->num_snaps; i++) {
> @@ -3434,6 +3436,11 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
>   {
>   	unsigned long i, j;
>   
> +	if (IS_ERR(snap)) {
> +		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
> +		return;
> +	}
> +
>   	for (i = 0; i < snap->num_snaps; i++) {
>   		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
>   
> @@ -3460,7 +3467,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
>   {
>   	unsigned long i;
>   
> -	if (!snap)
> +	if (IS_ERR(snap))
>   		return;
>   
>   	for (i = 0; i < snap->num_snaps; i++) {


More information about the Intel-xe mailing list