[Intel-xe] [PATCH 13/14] drm/xe: Convert VM print to snapshot capture and print.

Matthew Brost matthew.brost at intel.com
Tue May 2 08:07:31 UTC 2023


On Wed, Apr 26, 2023 at 04:57:12PM -0400, Rodrigo Vivi wrote:
> The goal is to allow for a snapshot capture to be taken at the time
> of the crash, while the print out can happen at a later time through
> the exposed devcoredump virtual device.
> 
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>

Also thinking out loud here, at some point we are going to need a hook
to dump the entire contexts of the VMAs...

I can think of a few options.

1. Flag on the VM creation, dump the entire VM.
2. Flag on VM binds, dump VMAs with the flag set.
3. Have both options.

Thoughts?

Matt

> ---
>  drivers/gpu/drm/xe/xe_guc_submit.c |   2 +-
>  drivers/gpu/drm/xe/xe_vm.c         | 137 +++++++++++++++++++++++++----
>  drivers/gpu/drm/xe/xe_vm.h         |   6 +-
>  drivers/gpu/drm/xe/xe_vm_types.h   |  18 ++++
>  4 files changed, 143 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 74659d0a69b3..ac98bc1843e8 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -753,7 +753,7 @@ static void simple_error_capture(struct xe_engine *e)
>  				continue;
>  			xe_hw_engine_print(hwe, &p);
>  		}
> -		xe_analyze_vm(&p, e->vm, e->gt->info.id);
> +		xe_vm_print(&p, e->vm, e->gt->info.id);
>  		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
>  		dma_fence_end_signalling(cookie);
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 4cffdb84680a..075640dbdff0 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3369,38 +3369,139 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
>  	return 0;
>  }
>  
> -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
> +/**
> + * xe_vm_snapshot_capture - Take a quick snapshot of the HW Engine.
> + * @vm: Xe VM
> + * @gt_id: GT id number
> + *
> + * This can be printed out in a later stage like during dev_coredump
> + * analysis.
> + *
> + * Returns: a Xe VM snapshot object that must be freed by the
> + * 	    caller, using `xe_vm_snapshot_free`.
> + */
> +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id)
>  {
> +	struct xe_vm_snapshot *snapshot;
>  	struct rb_node *node;
> -	bool is_vram;
> -	uint64_t addr;
> +	int i = 0;
> +
> +	snapshot = kzalloc(sizeof(struct xe_vm_snapshot), GFP_ATOMIC);
> +
> +	if (!down_read_trylock(&vm->lock))
> +		return snapshot;
> +
> +	snapshot->acquired = true;
> +
> +	for (node = rb_first(&vm->vmas); node; node = rb_next(node))
> +		snapshot->num_nodes++;
> +
> +	snapshot->vm_nodes = kmalloc_array(snapshot->num_nodes,
> +					   sizeof(struct vm_node_snapshot),
> +					   GFP_ATOMIC);
>  
> -	if (!down_read_trylock(&vm->lock)) {
> -		drm_printf(p, " Failed to acquire VM lock to dump capture");
> -		return 0;
> -	}
>  	if (vm->pt_root[gt_id]) {
> -		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram);
> -		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
> +		snapshot->vm_root = kzalloc(sizeof(struct vm_node_snapshot),
> +				      GFP_ATOMIC);
> +		snapshot->vm_root->addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0,
> +					       GEN8_PAGE_SIZE,
> +					       &snapshot->vm_root->is_vram);
>  	}
>  
>  	for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
>  		struct xe_vma *vma = to_xe_vma(node);
> -		bool is_userptr = xe_vma_is_userptr(vma);
> +		snapshot->vm_nodes[i].is_userptr = xe_vma_is_userptr(vma);
>  
> -		if (is_userptr) {
> +		if (snapshot->vm_nodes[i].is_userptr) {
>  			struct xe_res_cursor cur;
>  
> -			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
> -			addr = xe_res_dma(&cur);
> +			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE,
> +					&cur);
> +			snapshot->vm_nodes[i].addr = xe_res_dma(&cur);
>  		} else {
> -			addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram);
> +			snapshot->vm_nodes[i].addr = xe_bo_addr(vma->bo, 0,
> +							  GEN8_PAGE_SIZE,
> +							  &snapshot->vm_nodes[i].is_vram);
>  		}
> -		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
> -			   vma->start, vma->end, vma->end - vma->start + 1ull,
> -			   addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
> +		snapshot->vm_nodes[i].vma.start = vma->start;
> +		snapshot->vm_nodes[i].vma.end = vma->end;
> +		i++;
>  	}
>  	up_read(&vm->lock);
>  
> -	return 0;
> +	return snapshot;
> +}
> +
> +/**
> + * xe_vm_snapshot_print - Print out a given Xe HW Engine snapshot.
> + * @snapshot: Xe VM snapshot object.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function prints out a given Xe HW Engine snapshot object.
> + */
> +void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot,
> +			  struct drm_printer *p)
> +{
> +	int i;
> +
> +	if (!snapshot)
> +		return;
> +
> +	if (!snapshot->acquired) {
> +		drm_printf(p, " Failed to acquire VM lock to dump capture");
> +		return;
> +	}
> +
> +	if (snapshot->vm_root) {
> +		drm_printf(p, " VM root: A:0x%llx %s\n",
> +			   snapshot->vm_root->addr,
> +			   snapshot->vm_root->is_vram ? "VRAM" : "SYS");
> +	}
> +
> +	for (i = 0; snapshot->vm_nodes && i < snapshot->num_nodes; i++)
> +		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
> +			   snapshot->vm_nodes[i].vma.start,
> +			   snapshot->vm_nodes[i].vma.end,
> +			   snapshot->vm_nodes[i].vma.end -
> +			   snapshot->vm_nodes[i].vma.start + 1ull,
> +			   snapshot->vm_nodes[i].addr,
> +			   snapshot->vm_nodes[i].is_userptr ?
> +			   "USR" : snapshot->vm_nodes[i].is_vram ?
> +			   "VRAM" : "SYS");
> +}
> +
> +/**
> + * xe_vm_snapshot_free - Free all allocated objects for a given snapshot.
> + * @snapshot: Xe VM snapshot object.
> + *
> + * This function free all the memory that needed to be allocated at capture
> + * time.
> + */
> +void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot)
> +{
> +	if (!snapshot)
> +		return;
> +
> +	if (snapshot->vm_root)
> +		kfree(snapshot->vm_root);
> +	if (snapshot->vm_nodes)
> +		kfree(snapshot->vm_nodes);
> +	kfree(snapshot);
> +}
> +
> +/**
> + * xe_vm_print - Xe VM Print.
> + * @p: drm_printer
> + * @vm: Xe VM
> + * @gt_id: GT id number
> + *
> + * This function quickly capture a snapshot and immediately print it out.
> + */
> +void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id)
> +{
> +	struct xe_vm_snapshot *snapshot;
> +
> +	snapshot = xe_vm_snapshot_capture(vm, gt_id);
> +	xe_vm_snapshot_print(snapshot, p);
> +	xe_vm_snapshot_free(snapshot);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 748dc16ebed9..924884b36469 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -145,7 +145,11 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
>  void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
>  			     enum dma_resv_usage usage);
>  
> -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
> +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id);
> +void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot,
> +			  struct drm_printer *p);
> +void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot);
> +void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id);
>  
>  #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
>  #define vm_dbg drm_dbg
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index fada7896867f..18e79b6a2182 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -149,6 +149,24 @@ struct xe_vma {
>  	} extobj;
>  };
>  
> +
> +struct vm_node_snapshot {
> +	bool is_userptr;
> +	bool is_vram;
> +	struct {
> +		u64 start;
> +		u64 end;
> +	} vma;
> +	u64 addr;
> +};
> +
> +struct xe_vm_snapshot {
> +	bool acquired;
> +	struct vm_node_snapshot *vm_root;
> +	struct vm_node_snapshot *vm_nodes;
> +	int num_nodes;
> +};
> +
>  struct xe_device;
>  
>  #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
> -- 
> 2.39.2
> 


More information about the Intel-xe mailing list