[PATCH 1/2] drm/xe: Fix fault on fd close after unbind

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Thu Dec 19 20:24:32 UTC 2024


On Tue, Dec 17, 2024 at 09:31:21PM -0800, Lucas De Marchi wrote:
>If userspace holds an fd open, unbinds the device and then closes it,
>the driver shouldn't try to access the hardware. Protect it by using
>drm_dev_enter()/drm_dev_exit(). This fixes the following page fault:
>
><6> [IGT] xe_wedged: exiting, ret=98
><1> BUG: unable to handle page fault for address: ffffc901bc5e508c
><1> #PF: supervisor read access in kernel mode
><1> #PF: error_code(0x0000) - not-present page
>...
><4>   xe_lrc_update_timestamp+0x1c/0xd0 [xe]
><4>   xe_exec_queue_update_run_ticks+0x50/0xb0 [xe]
><4>   xe_exec_queue_fini+0x16/0xb0 [xe]
><4>   __guc_exec_queue_fini_async+0xc4/0x190 [xe]
><4>   guc_exec_queue_fini_async+0xa0/0xe0 [xe]
><4>   guc_exec_queue_fini+0x23/0x40 [xe]
><4>   xe_exec_queue_destroy+0xb3/0xf0 [xe]
><4>   xe_file_close+0xd4/0x1a0 [xe]
><4>   drm_file_free+0x210/0x280 [drm]
><4>   drm_close_helper.isra.0+0x6d/0x80 [drm]
><4>   drm_release_noglobal+0x20/0x90 [drm]
>
>Fixes: 83db047d9425 ("drm/xe: Stop accumulating LRC timestamp on job_free")
>Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3421
>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>

LGTM,

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>

Thanks,
Umesh
>---
> drivers/gpu/drm/xe/xe_exec_queue.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
>diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
>index 9c94be5719008..8005530b5e51e 100644
>--- a/drivers/gpu/drm/xe/xe_exec_queue.c
>+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>@@ -8,6 +8,7 @@
> #include <linux/nospec.h>
>
> #include <drm/drm_device.h>
>+#include <drm/drm_drv.h>
> #include <drm/drm_file.h>
> #include <uapi/drm/xe_drm.h>
>
>@@ -768,9 +769,11 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
>  */
> void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
> {
>+	struct xe_device *xe = gt_to_xe(q->gt);
> 	struct xe_file *xef;
> 	struct xe_lrc *lrc;
> 	u32 old_ts, new_ts;
>+	int idx;
>
> 	/*
> 	 * Jobs that are run during driver load may use an exec_queue, but are
>@@ -780,6 +783,10 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
> 	if (!q->vm || !q->vm->xef)
> 		return;
>
>+	/* Synchronize with unbind while holding the xe file open */
>+	if (!drm_dev_enter(&xe->drm, &idx))
>+		return;
>+
> 	xef = q->vm->xef;
>
> 	/*
>@@ -793,6 +800,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
> 	lrc = q->lrc[0];
> 	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
> 	xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
>+
>+	drm_dev_exit(idx);
> }
>
> /**
>-- 
>2.47.0
>


More information about the Intel-xe mailing list