[Intel-xe] [PATCH 13/14] drm/xe: Convert VM print to snapshot capture and print.
Matthew Brost
matthew.brost at intel.com
Tue May 2 07:50:22 UTC 2023
On Wed, Apr 26, 2023 at 04:57:12PM -0400, Rodrigo Vivi wrote:
> The goal is to allow for a snapshot capture to be taken at the time
> of the crash, while the print out can happen at a later time through
> the exposed devcoredump virtual device.
>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
This is an example of a patch I'd like staged behind the GPUVA changes
as GPUVA will change all of this...
Matt
> ---
> drivers/gpu/drm/xe/xe_guc_submit.c | 2 +-
> drivers/gpu/drm/xe/xe_vm.c | 137 +++++++++++++++++++++++++----
> drivers/gpu/drm/xe/xe_vm.h | 6 +-
> drivers/gpu/drm/xe/xe_vm_types.h | 18 ++++
> 4 files changed, 143 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 74659d0a69b3..ac98bc1843e8 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -753,7 +753,7 @@ static void simple_error_capture(struct xe_engine *e)
> continue;
> xe_hw_engine_print(hwe, &p);
> }
> - xe_analyze_vm(&p, e->vm, e->gt->info.id);
> + xe_vm_print(&p, e->vm, e->gt->info.id);
> xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
> dma_fence_end_signalling(cookie);
> }
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 4cffdb84680a..075640dbdff0 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3369,38 +3369,139 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
> return 0;
> }
>
> -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
> +/**
> + * xe_vm_snapshot_capture - Take a quick snapshot of the HW Engine.
> + * @vm: Xe VM
> + * @gt_id: GT id number
> + *
> + * This can be printed out in a later stage like during dev_coredump
> + * analysis.
> + *
> + * Returns: a Xe VM snapshot object that must be freed by the
> + * caller, using `xe_vm_snapshot_free`.
> + */
> +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id)
> {
> + struct xe_vm_snapshot *snapshot;
> struct rb_node *node;
> - bool is_vram;
> - uint64_t addr;
> + int i = 0;
> +
> + snapshot = kzalloc(sizeof(struct xe_vm_snapshot), GFP_ATOMIC);
> +
> + if (!down_read_trylock(&vm->lock))
> + return snapshot;
> +
> + snapshot->acquired = true;
> +
> + for (node = rb_first(&vm->vmas); node; node = rb_next(node))
> + snapshot->num_nodes++;
> +
> + snapshot->vm_nodes = kmalloc_array(snapshot->num_nodes,
> + sizeof(struct vm_node_snapshot),
> + GFP_ATOMIC);
>
> - if (!down_read_trylock(&vm->lock)) {
> - drm_printf(p, " Failed to acquire VM lock to dump capture");
> - return 0;
> - }
> if (vm->pt_root[gt_id]) {
> - addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram);
> - drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
> + snapshot->vm_root = kzalloc(sizeof(struct vm_node_snapshot),
> + GFP_ATOMIC);
> + snapshot->vm_root->addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0,
> + GEN8_PAGE_SIZE,
> + &snapshot->vm_root->is_vram);
> }
>
> for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
> struct xe_vma *vma = to_xe_vma(node);
> - bool is_userptr = xe_vma_is_userptr(vma);
> + snapshot->vm_nodes[i].is_userptr = xe_vma_is_userptr(vma);
>
> - if (is_userptr) {
> + if (snapshot->vm_nodes[i].is_userptr) {
> struct xe_res_cursor cur;
>
> - xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
> - addr = xe_res_dma(&cur);
> + xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE,
> + &cur);
> + snapshot->vm_nodes[i].addr = xe_res_dma(&cur);
> } else {
> - addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram);
> + snapshot->vm_nodes[i].addr = xe_bo_addr(vma->bo, 0,
> + GEN8_PAGE_SIZE,
> + &snapshot->vm_nodes[i].is_vram);
> }
> - drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
> - vma->start, vma->end, vma->end - vma->start + 1ull,
> - addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
> + snapshot->vm_nodes[i].vma.start = vma->start;
> + snapshot->vm_nodes[i].vma.end = vma->end;
> + i++;
> }
> up_read(&vm->lock);
>
> - return 0;
> + return snapshot;
> +}
> +
> +/**
> + * xe_vm_snapshot_print - Print out a given Xe HW Engine snapshot.
> + * @snapshot: Xe VM snapshot object.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function prints out a given Xe HW Engine snapshot object.
> + */
> +void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot,
> + struct drm_printer *p)
> +{
> + int i;
> +
> + if (!snapshot)
> + return;
> +
> + if (!snapshot->acquired) {
> + drm_printf(p, " Failed to acquire VM lock to dump capture");
> + return;
> + }
> +
> + if (snapshot->vm_root) {
> + drm_printf(p, " VM root: A:0x%llx %s\n",
> + snapshot->vm_root->addr,
> + snapshot->vm_root->is_vram ? "VRAM" : "SYS");
> + }
> +
> + for (i = 0; snapshot->vm_nodes && i < snapshot->num_nodes; i++)
> + drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
> + snapshot->vm_nodes[i].vma.start,
> + snapshot->vm_nodes[i].vma.end,
> + snapshot->vm_nodes[i].vma.end -
> + snapshot->vm_nodes[i].vma.start + 1ull,
> + snapshot->vm_nodes[i].addr,
> + snapshot->vm_nodes[i].is_userptr ?
> + "USR" : snapshot->vm_nodes[i].is_vram ?
> + "VRAM" : "SYS");
> +}
> +
> +/**
> + * xe_vm_snapshot_free - Free all allocated objects for a given snapshot.
> + * @snapshot: Xe VM snapshot object.
> + *
> + * This function free all the memory that needed to be allocated at capture
> + * time.
> + */
> +void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot)
> +{
> + if (!snapshot)
> + return;
> +
> + if (snapshot->vm_root)
> + kfree(snapshot->vm_root);
> + if (snapshot->vm_nodes)
> + kfree(snapshot->vm_nodes);
> + kfree(snapshot);
> +}
> +
> +/**
> + * xe_vm_print - Xe VM Print.
> + * @p: drm_printer
> + * @vm: Xe VM
> + * @gt_id: GT id number
> + *
> + * This function quickly capture a snapshot and immediately print it out.
> + */
> +void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id)
> +{
> + struct xe_vm_snapshot *snapshot;
> +
> + snapshot = xe_vm_snapshot_capture(vm, gt_id);
> + xe_vm_snapshot_print(snapshot, p);
> + xe_vm_snapshot_free(snapshot);
> }
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 748dc16ebed9..924884b36469 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -145,7 +145,11 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
> void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
> enum dma_resv_usage usage);
>
> -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
> +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id);
> +void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot,
> + struct drm_printer *p);
> +void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot);
> +void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id);
>
> #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
> #define vm_dbg drm_dbg
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index fada7896867f..18e79b6a2182 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -149,6 +149,24 @@ struct xe_vma {
> } extobj;
> };
>
> +
> +struct vm_node_snapshot {
> + bool is_userptr;
> + bool is_vram;
> + struct {
> + u64 start;
> + u64 end;
> + } vma;
> + u64 addr;
> +};
> +
> +struct xe_vm_snapshot {
> + bool acquired;
> + struct vm_node_snapshot *vm_root;
> + struct vm_node_snapshot *vm_nodes;
> + int num_nodes;
> +};
> +
> struct xe_device;
>
> #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
> --
> 2.39.2
>
More information about the Intel-xe
mailing list