[Intel-xe] [PATCH 13/14] drm/xe: Convert VM print to snapshot capture and print.

Rodrigo Vivi rodrigo.vivi at intel.com
Wed Apr 26 20:57:12 UTC 2023


The goal is to allow for a snapshot capture to be taken at the time
of the crash, while the print out can happen at a later time through
the exposed devcoredump virtual device.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c |   2 +-
 drivers/gpu/drm/xe/xe_vm.c         | 137 +++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_vm.h         |   6 +-
 drivers/gpu/drm/xe/xe_vm_types.h   |  18 ++++
 4 files changed, 143 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 74659d0a69b3..ac98bc1843e8 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -753,7 +753,7 @@ static void simple_error_capture(struct xe_engine *e)
 				continue;
 			xe_hw_engine_print(hwe, &p);
 		}
-		xe_analyze_vm(&p, e->vm, e->gt->info.id);
+		xe_vm_print(&p, e->vm, e->gt->info.id);
 		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
 		dma_fence_end_signalling(cookie);
 	}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4cffdb84680a..075640dbdff0 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3369,38 +3369,139 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	return 0;
 }
 
-int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+/**
+ * xe_vm_snapshot_capture - Take a quick snapshot of the HW Engine.
+ * @vm: Xe VM
+ * @gt_id: GT id number
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a Xe VM snapshot object that must be freed by the
+ * 	    caller, using `xe_vm_snapshot_free`.
+ */
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id)
 {
+	struct xe_vm_snapshot *snapshot;
 	struct rb_node *node;
-	bool is_vram;
-	uint64_t addr;
+	int i = 0;
+
+	snapshot = kzalloc(sizeof(struct xe_vm_snapshot), GFP_ATOMIC);
+
+	if (!down_read_trylock(&vm->lock))
+		return snapshot;
+
+	snapshot->acquired = true;
+
+	for (node = rb_first(&vm->vmas); node; node = rb_next(node))
+		snapshot->num_nodes++;
+
+	snapshot->vm_nodes = kmalloc_array(snapshot->num_nodes,
+					   sizeof(struct vm_node_snapshot),
+					   GFP_ATOMIC);
 
-	if (!down_read_trylock(&vm->lock)) {
-		drm_printf(p, " Failed to acquire VM lock to dump capture");
-		return 0;
-	}
 	if (vm->pt_root[gt_id]) {
-		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram);
-		drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS");
+		snapshot->vm_root = kzalloc(sizeof(struct vm_node_snapshot),
+				      GFP_ATOMIC);
+		snapshot->vm_root->addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0,
+					       GEN8_PAGE_SIZE,
+					       &snapshot->vm_root->is_vram);
 	}
 
 	for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
 		struct xe_vma *vma = to_xe_vma(node);
-		bool is_userptr = xe_vma_is_userptr(vma);
+		snapshot->vm_nodes[i].is_userptr = xe_vma_is_userptr(vma);
 
-		if (is_userptr) {
+		if (snapshot->vm_nodes[i].is_userptr) {
 			struct xe_res_cursor cur;
 
-			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
-			addr = xe_res_dma(&cur);
+			xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE,
+					&cur);
+			snapshot->vm_nodes[i].addr = xe_res_dma(&cur);
 		} else {
-			addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram);
+			snapshot->vm_nodes[i].addr = xe_bo_addr(vma->bo, 0,
+							  GEN8_PAGE_SIZE,
+							  &snapshot->vm_nodes[i].is_vram);
 		}
-		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
-			   vma->start, vma->end, vma->end - vma->start + 1ull,
-			   addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS");
+		snapshot->vm_nodes[i].vma.start = vma->start;
+		snapshot->vm_nodes[i].vma.end = vma->end;
+		i++;
 	}
 	up_read(&vm->lock);
 
-	return 0;
+	return snapshot;
+}
+
+/**
+ * xe_vm_snapshot_print - Print out a given Xe HW Engine snapshot.
+ * @snapshot: Xe VM snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given Xe HW Engine snapshot object.
+ */
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot,
+			  struct drm_printer *p)
+{
+	int i;
+
+	if (!snapshot)
+		return;
+
+	if (!snapshot->acquired) {
+		drm_printf(p, " Failed to acquire VM lock to dump capture");
+		return;
+	}
+
+	if (snapshot->vm_root) {
+		drm_printf(p, " VM root: A:0x%llx %s\n",
+			   snapshot->vm_root->addr,
+			   snapshot->vm_root->is_vram ? "VRAM" : "SYS");
+	}
+
+	for (i = 0; snapshot->vm_nodes && i < snapshot->num_nodes; i++)
+		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
+			   snapshot->vm_nodes[i].vma.start,
+			   snapshot->vm_nodes[i].vma.end,
+			   snapshot->vm_nodes[i].vma.end -
+			   snapshot->vm_nodes[i].vma.start + 1ull,
+			   snapshot->vm_nodes[i].addr,
+			   snapshot->vm_nodes[i].is_userptr ?
+			   "USR" : snapshot->vm_nodes[i].is_vram ?
+			   "VRAM" : "SYS");
+}
+
+/**
+ * xe_vm_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: Xe VM snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	if (snapshot->vm_root)
+		kfree(snapshot->vm_root);
+	if (snapshot->vm_nodes)
+		kfree(snapshot->vm_nodes);
+	kfree(snapshot);
+}
+
+/**
+ * xe_vm_print - Xe VM Print.
+ * @p: drm_printer
+ * @vm: Xe VM
+ * @gt_id: GT id number
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+	struct xe_vm_snapshot *snapshot;
+
+	snapshot = xe_vm_snapshot_capture(vm, gt_id);
+	xe_vm_snapshot_print(snapshot, p);
+	xe_vm_snapshot_free(snapshot);
 }
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 748dc16ebed9..924884b36469 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -145,7 +145,11 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm,
 void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
 			     enum dma_resv_usage usage);
 
-int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id);
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot,
+			  struct drm_printer *p);
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot);
+void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id);
 
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
 #define vm_dbg drm_dbg
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index fada7896867f..18e79b6a2182 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -149,6 +149,24 @@ struct xe_vma {
 	} extobj;
 };
 
+
+struct vm_node_snapshot {
+	bool is_userptr;
+	bool is_vram;
+	struct {
+		u64 start;
+		u64 end;
+	} vma;
+	u64 addr;
+};
+
+struct xe_vm_snapshot {
+	bool acquired;
+	struct vm_node_snapshot *vm_root;
+	struct vm_node_snapshot *vm_nodes;
+	int num_nodes;
+};
+
 struct xe_device;
 
 #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
-- 
2.39.2



More information about the Intel-xe mailing list