[PATCH v2 3/4] drm/amdgpu: update the error logging for more information

Sunil Khatri sunil.khatri at amd.com
Tue Apr 15 11:25:49 UTC 2025


add process and pid information in the userqueue error
logging to make it more useful in resolving the error
by logs.

Sample log:
[   42.444297] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] *ERROR* Timed out waiting for fence f=000000001c74d978 for comm:Xwayland pid:3427
[   42.444669] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not suspending userqueue, timeout waiting for comm:Xwayland pid:3427
[   42.824729] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] *ERROR* Timed out waiting for fence f=0000000074407d3e for comm:systemd-logind pid:1058
[   42.825082] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not suspending userqueue, timeout waiting for comm:systemd-logind pid:1058

Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 47 +++++++++++++++----
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 1867520ba258..eb96c12e02e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -38,12 +38,17 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
 	struct amdgpu_device *adev = uq_mgr->adev;
 	const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
 	struct dma_fence *f = queue->last_fence;
+	struct drm_file *file;
+	char proc_log[50];
 	int ret;
 
 	if (f && !dma_fence_is_signaled(f)) {
 		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
 		if (ret <= 0) {
-			DRM_ERROR("Timed out waiting for fence f=%p\n", f);
+			file = uq_mgr->file;
+			drm_process_info(file, proc_log, sizeof(proc_log));
+			DRM_ERROR("Timed out waiting for fence f=%p for %s\n",
+				  f, proc_log);
 			return;
 		}
 	}
@@ -431,6 +436,8 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr)
 	const struct amdgpu_userq_funcs *userq_funcs;
 	struct amdgpu_usermode_queue *queue;
 	int queue_id;
+	struct drm_file *file;
+	char proc_log[50];
 	int ret = 0;
 
 	/* Resume all the queues for this process */
@@ -439,8 +446,13 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr)
 		ret |= userq_funcs->map(uq_mgr, queue);
 	}
 
-	if (ret)
-		DRM_ERROR("Failed to map all the queues\n");
+	if (ret) {
+		file = uq_mgr->file;
+		drm_process_info(file, proc_log, sizeof(proc_log));
+		DRM_ERROR("Failed to map all the queue for %s\n",
+			  proc_log);
+		}
+
 	return ret;
 }
 
@@ -589,6 +601,8 @@ amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)
 	const struct amdgpu_userq_funcs *userq_funcs;
 	struct amdgpu_usermode_queue *queue;
 	int queue_id;
+	struct drm_file *file;
+	char proc_log[50];
 	int ret = 0;
 
 	/* Try to unmap all the queues in this process ctx */
@@ -597,8 +611,13 @@ amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)
 		ret += userq_funcs->unmap(uq_mgr, queue);
 	}
 
-	if (ret)
-		DRM_ERROR("Couldn't unmap all the queues\n");
+	if (ret) {
+		file = uq_mgr->file;
+		drm_process_info(file, proc_log, sizeof(proc_log));
+		DRM_ERROR("Couldn't unmap all the queues for %s\n",
+			  proc_log);
+		}
+
 	return ret;
 }
 
@@ -606,6 +625,8 @@ static int
 amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
 {
 	struct amdgpu_usermode_queue *queue;
+	struct drm_file *file;
+	char proc_log[50];
 	int queue_id, ret;
 
 	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
@@ -615,7 +636,10 @@ amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
 			continue;
 		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
 		if (ret <= 0) {
-			DRM_ERROR("Timed out waiting for fence f=%p\n", f);
+			file = uq_mgr->file;
+			drm_process_info(file, proc_log, sizeof(proc_log));
+			DRM_ERROR("Timed out waiting for fence f=%p for %s\n",
+				  f, proc_log);
 			return -ETIMEDOUT;
 		}
 	}
@@ -628,19 +652,26 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr,
 			 struct amdgpu_eviction_fence *ev_fence)
 {
 	int ret;
+	struct drm_file *file;
+	char proc_log[50];
 	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
 	struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
 
 	/* Wait for any pending userqueue fence work to finish */
 	ret = amdgpu_userqueue_wait_for_signal(uq_mgr);
 	if (ret) {
-		DRM_ERROR("Not suspending userqueue, timeout waiting for work\n");
+		file = uq_mgr->file;
+		drm_process_info(file, proc_log, sizeof(proc_log));
+		DRM_ERROR("Not suspending userqueue, timeout waiting for %s\n",
+			  proc_log);
 		return;
 	}
 
 	ret = amdgpu_userqueue_suspend_all(uq_mgr);
 	if (ret) {
-		DRM_ERROR("Failed to evict userqueue\n");
+		file = uq_mgr->file;
+		drm_process_info(file, proc_log, sizeof(proc_log));
+		DRM_ERROR("Failed to evict userqueue for %s\n", proc_log);
 		return;
 	}
 
-- 
2.34.1



More information about the dri-devel mailing list