[PATCH v2 2/3] drm/xe/devcoredump: Print errno if VM snapshot was not captured

José Roberto de Souza jose.souza at intel.com
Thu Mar 7 13:52:28 UTC 2024


My testing machine has only 8GB of RAM and while running piglit tests
I can reach the OOM cache in xe_vm_snapshot_capture() snap allocaiton
sometimes.

So to differentiate the OOM from race between capture and UMDs
unbinbind VMs here I'm adding a '[0].error: -12' to devcoredump.

v2:
- fix returned errno values

Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
Signed-off-by: José Roberto de Souza <jose.souza at intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c |  6 ++----
 drivers/gpu/drm/xe/xe_vm.c          | 13 ++++++++++---
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 0fcd306803236..4ab0feca55cdd 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -117,10 +117,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 		if (coredump->snapshot.hwe[i])
 			xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
 						    &p);
-	if (coredump->snapshot.vm) {
-		drm_printf(&p, "\n**** VM state ****\n");
-		xe_vm_snapshot_print(coredump->snapshot.vm, &p);
-	}
+	drm_printf(&p, "\n**** VM state ****\n");
+	xe_vm_snapshot_print(coredump->snapshot.vm, &p);
 
 	return count - iter.remain;
 }
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index df9360a4c9e8e..41066e99230ab 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3336,8 +3336,10 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
 
 	if (num_snaps)
 		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
-	if (!snap)
+	if (!snap) {
+		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
 		goto out_unlock;
+	}
 
 	snap->num_snaps = num_snaps;
 	i = 0;
@@ -3377,7 +3379,7 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
 
 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
 {
-	if (!snap)
+	if (IS_ERR(snap))
 		return;
 
 	for (int i = 0; i < snap->num_snaps; i++) {
@@ -3434,6 +3436,11 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
 {
 	unsigned long i, j;
 
+	if (IS_ERR(snap)) {
+		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
+		return;
+	}
+
 	for (i = 0; i < snap->num_snaps; i++) {
 		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
 
@@ -3460,7 +3467,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
 {
 	unsigned long i;
 
-	if (!snap)
+	if (IS_ERR(snap))
 		return;
 
 	for (i = 0; i < snap->num_snaps; i++) {
-- 
2.44.0



More information about the Intel-xe mailing list