[PATCH] drm/xe: Do not access xe file when updating exec queue runtime

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Fri May 24 18:49:58 UTC 2024


The current code is running into a use after free case where xe file is
closed before the exec queue runtime can be updated. This is occuring in
the xe_file_close path. To fix that, do not access xe file when updating
the exec queue runtime. Instead store the exec queue runtime locally in
the exec queue object and accumulate it when the user dumps the drm
client stats. We know that the xe file is valid when user is dumping the
runtime stats for the drm client, so this effectively removes the
dependency on xe file object in xe_exec_queue_update_runtime().

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1908
Fixes: 6109f24f87d7 ("drm/xe: Add helper to accumulate exec queue runtime")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
---
 drivers/gpu/drm/xe/xe_drm_client.c       | 4 +++-
 drivers/gpu/drm/xe/xe_exec_queue.c       | 5 +----
 drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 ++
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index af404c9e5cc0..a6a15562b8f7 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -251,8 +251,10 @@ static void show_runtime(struct drm_printer *p, struct drm_file *file)
 
 	/* Accumulate all the exec queues from this client */
 	mutex_lock(&xef->exec_queue.lock);
-	xa_for_each(&xef->exec_queue.xa, i, q)
+	xa_for_each(&xef->exec_queue.xa, i, q) {
 		xe_exec_queue_update_runtime(q);
+		xef->runtime[q->class] += q->runtime - xef->runtime[q->class];
+	}
 	mutex_unlock(&xef->exec_queue.lock);
 
 	/* Get the total GPU cycles */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 0fd61fb4d104..a0775a96efb1 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -765,7 +765,6 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
  */
 void xe_exec_queue_update_runtime(struct xe_exec_queue *q)
 {
-	struct xe_file *xef;
 	struct xe_lrc *lrc;
 	u32 old_ts, new_ts;
 
@@ -777,8 +776,6 @@ void xe_exec_queue_update_runtime(struct xe_exec_queue *q)
 	if (!q->vm || !q->vm->xef)
 		return;
 
-	xef = q->vm->xef;
-
 	/*
 	 * Only sample the first LRC. For parallel submission, all of them are
 	 * scheduled together and we compensate that below by multiplying by
@@ -789,7 +786,7 @@ void xe_exec_queue_update_runtime(struct xe_exec_queue *q)
 	 */
 	lrc = &q->lrc[0];
 	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
-	xef->runtime[q->class] += (new_ts - old_ts) * q->width;
+	q->runtime += (new_ts - old_ts) * q->width;
 }
 
 void xe_exec_queue_kill(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index ee78d497d838..085a7dce7565 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -151,6 +151,8 @@ struct xe_exec_queue {
 	 * Protected by @vm's resv. Unused if @vm == NULL.
 	 */
 	u64 tlb_flush_seqno;
+	/** @runtime: hw engine class runtime in ticks for this exec queue*/
+	u64 runtime;
 	/** @lrc: logical ring context for this exec queue */
 	struct xe_lrc lrc[];
 };
-- 
2.34.1



More information about the Intel-xe mailing list