[PATCH v2 4/4] drm/xe: Fix use after free when client stats are captured

Matthew Brost matthew.brost at intel.com
Tue Jul 9 20:46:14 UTC 2024


On Mon, Jul 08, 2024 at 05:28:35PM -0700, Umesh Nerlige Ramappa wrote:
> xe_file_close triggers an asynchronous queue cleanup and then frees up
> the xef object. Since queue cleanup flushes all pending jobs and the KMD
> stores client usage stats into the xef object after jobs are flushed, we
> see a use-after-free for the xef object. Resolve this by taking a
> reference to xef from xe_exec_queue.
> 
> While at it, revert an earlier change that contained a partial work
> around for this issue.
> 
> v2:
> - Take a ref to xef even for the VM bind queue (Matt)
> - Squash patches relevant to that fix and work around (Lucas)
> 
> Fixes: ce62827bc294 ("drm/xe: Do not access xe file when updating exec queue run_ticks")
> Fixes: 6109f24f87d7 ("drm/xe: Add helper to accumulate exec queue runtime")
> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1908

I haven't look at Lucas's suggestions for the Fixes tags / patch
ordering, so I'd double check with him that this meets his expectations.

The patch itself LGTM. With that:
Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_drm_client.c       |  5 +----
>  drivers/gpu/drm/xe/xe_exec_queue.c       | 10 +++++++++-
>  drivers/gpu/drm/xe/xe_exec_queue_types.h |  7 +++----
>  3 files changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
> index 6a26923fa10e..7ddd59908334 100644
> --- a/drivers/gpu/drm/xe/xe_drm_client.c
> +++ b/drivers/gpu/drm/xe/xe_drm_client.c
> @@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
>  
>  	/* Accumulate all the exec queues from this client */
>  	mutex_lock(&xef->exec_queue.lock);
> -	xa_for_each(&xef->exec_queue.xa, i, q) {
> +	xa_for_each(&xef->exec_queue.xa, i, q)
>  		xe_exec_queue_update_run_ticks(q);
> -		xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
> -		q->old_run_ticks = q->run_ticks;
> -	}
>  	mutex_unlock(&xef->exec_queue.lock);
>  
>  	/* Get the total GPU cycles */
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 3336a01a1006..69867a7b7c77 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
>  {
>  	if (q->vm)
>  		xe_vm_put(q->vm);
> +
> +	if (q->xef)
> +		xe_file_put(q->xef);
> +
>  	kfree(q);
>  }
>  
> @@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>  		goto kill_exec_queue;
>  
>  	args->exec_queue_id = id;
> +	q->xef = xe_file_get(xef);
>  
>  	return 0;
>  
> @@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
>   */
>  void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
>  {
> +	struct xe_file *xef;
>  	struct xe_lrc *lrc;
>  	u32 old_ts, new_ts;
>  
> @@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
>  	if (!q->vm || !q->vm->xef)
>  		return;
>  
> +	xef = q->vm->xef;
> +
>  	/*
>  	 * Only sample the first LRC. For parallel submission, all of them are
>  	 * scheduled together and we compensate that below by multiplying by
> @@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
>  	 */
>  	lrc = q->lrc[0];
>  	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
> -	q->run_ticks += (new_ts - old_ts) * q->width;
> +	xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
>  }
>  
>  void xe_exec_queue_kill(struct xe_exec_queue *q)
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index ded9f9396429..b5343cdd0632 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -38,6 +38,9 @@ enum xe_exec_queue_priority {
>   * a kernel object.
>   */
>  struct xe_exec_queue {
> +	/** @xef: Back pointer to xe file is this is user created exec queue */
> +	struct xe_file *xef;
> +
>  	/** @gt: graphics tile this exec queue can submit to */
>  	struct xe_gt *gt;
>  	/**
> @@ -139,10 +142,6 @@ struct xe_exec_queue {
>  	 * Protected by @vm's resv. Unused if @vm == NULL.
>  	 */
>  	u64 tlb_flush_seqno;
> -	/** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
> -	u64 old_run_ticks;
> -	/** @run_ticks: hw engine class run time in ticks for this exec queue */
> -	u64 run_ticks;
>  	/** @lrc: logical ring context for this exec queue */
>  	struct xe_lrc *lrc[];
>  };
> -- 
> 2.38.1
> 


More information about the Intel-xe mailing list