[PATCH 3/7] drm/xe: Relax runtime pm protection during execution

Mon May 6 11:57:22 UTC 2024

On Fri, 2024-05-03 at 15:13 -0400, Rodrigo Vivi wrote:
> In the regular use case scenario, user space will create an
> exec queue, and keep it alive to reuse that until it is done
> with that kind of workload.
> 
> For the regular desktop cases, it means that the exec_queue
> is alive even on idle scenarios where display goes off. This
> is unacceptable since this would entirely block runtime PM
> indefinitely, blocking deeper Package-C state. This would be
> a waste drainage of power.
> 
> So, let's limit the protection for the moments where we are
> really sending jobs for execution.
> 
> This patch also introduces a protection to guc submit fini,
> which is not protected anymore by the exec_queue life.

Nit: Imperative language ^^^

> 
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Francois Dugast <francois.dugast at intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>

Otherwise LGTM.
Reviewed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

> ---
>  drivers/gpu/drm/xe/xe_exec_queue.c | 14 --------------
>  drivers/gpu/drm/xe/xe_guc_submit.c |  3 +++
>  drivers/gpu/drm/xe/xe_sched_job.c  | 10 +++-------
>  3 files changed, 6 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 395de93579fa..33b03605a1d1 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -106,7 +106,6 @@ static struct xe_exec_queue
> *__xe_exec_queue_alloc(struct xe_device *xe,
>  
>  static int __xe_exec_queue_init(struct xe_exec_queue *q)
>  {
> -	struct xe_device *xe = gt_to_xe(q->gt);
>  	int i, err;
>  
>  	for (i = 0; i < q->width; ++i) {
> @@ -119,17 +118,6 @@ static int __xe_exec_queue_init(struct
> xe_exec_queue *q)
>  	if (err)
>  		goto err_lrc;
>  
> -	/*
> -	 * Normally the user vm holds an rpm ref to keep the device
> -	 * awake, and the context holds a ref for the vm, however
> for
> -	 * some engines we use the kernels migrate vm underneath
> which offers no
> -	 * such rpm ref, or we lack a vm. Make sure we keep a ref
> here, so we
> -	 * can perform GuC CT actions when needed. Caller is
> expected to have
> -	 * already grabbed the rpm ref outside any sensitive locks.
> -	 */
> -	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags &
> EXEC_QUEUE_FLAG_VM || !q->vm))
> -		xe_pm_runtime_get_noresume(xe);
> -
>  	return 0;
>  
>  err_lrc:
> @@ -216,8 +204,6 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
>  
>  	for (i = 0; i < q->width; ++i)
>  		xe_lrc_finish(q->lrc + i);
> -	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags &
> EXEC_QUEUE_FLAG_VM || !q->vm))
> -		xe_pm_runtime_put(gt_to_xe(q->gt));
>  	__xe_exec_queue_free(q);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c
> b/drivers/gpu/drm/xe/xe_guc_submit.c
> index d274a139010b..86baebd49c19 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -35,6 +35,7 @@
>  #include "xe_macros.h"
>  #include "xe_map.h"
>  #include "xe_mocs.h"
> +#include "xe_pm.h"
>  #include "xe_ring_ops_types.h"
>  #include "xe_sched_job.h"
>  #include "xe_trace.h"
> @@ -1071,6 +1072,7 @@ static void __guc_exec_queue_fini_async(struct
> work_struct *w)
>  	struct xe_exec_queue *q = ge->q;
>  	struct xe_guc *guc = exec_queue_to_guc(q);
>  
> +	xe_pm_runtime_get(guc_to_xe(guc));
>  	trace_xe_exec_queue_destroy(q);
>  
>  	if (xe_exec_queue_is_lr(q))
> @@ -1081,6 +1083,7 @@ static void __guc_exec_queue_fini_async(struct
> work_struct *w)
>  
>  	kfree(ge);
>  	xe_exec_queue_fini(q);
> +	xe_pm_runtime_put(guc_to_xe(guc));
>  }
>  
>  static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c
> b/drivers/gpu/drm/xe/xe_sched_job.c
> index cd8a2fba5438..a4e030f5e019 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -158,11 +158,7 @@ struct xe_sched_job *xe_sched_job_create(struct
> xe_exec_queue *q,
>  	for (i = 0; i < width; ++i)
>  		job->batch_addr[i] = batch_addr[i];
>  
> -	/* All other jobs require a VM to be open which has a ref */
> -	if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
> -		xe_pm_runtime_get_noresume(job_to_xe(job));
> -	xe_device_assert_mem_access(job_to_xe(job));
> -
> +	xe_pm_runtime_get_noresume(job_to_xe(job));
>  	trace_xe_sched_job_create(job);
>  	return job;
>  
> @@ -191,13 +187,13 @@ void xe_sched_job_destroy(struct kref *ref)
>  {
>  	struct xe_sched_job *job =
>  		container_of(ref, struct xe_sched_job, refcount);
> +	struct xe_device *xe = job_to_xe(job);
>  
> -	if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
> -		xe_pm_runtime_put(job_to_xe(job));
>  	xe_exec_queue_put(job->q);
>  	dma_fence_put(job->fence);
>  	drm_sched_job_cleanup(&job->drm);
>  	job_free(job);
> +	xe_pm_runtime_put(xe);
>  }
>  
>  void xe_sched_job_set_error(struct xe_sched_job *job, int error)