[PATCH v2 1/1] drm/xe: Build PM into GuC CT layer

Thu Jul 18 17:12:10 UTC 2024

On 18/07/2024 16:52, Matthew Brost wrote:
> Take PM ref when a G2H is outstanding, drop when it is received.
> 
> To safely ensure we have PM ref when in the GuC CT layer, a PM ref needs
> to be held when scheduler messages are pending too.
> 
> v2:
>   - Add outer PM protections to xe_file_close (CI)
> 
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Nirmoy Das <nirmoy.das at intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_device.c     | 4 ++++
>   drivers/gpu/drm/xe/xe_guc_ct.c     | 5 +++++
>   drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++++
>   3 files changed, 13 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 06cebaffb451..b68ab474e1a0 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -101,6 +101,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
>   	struct xe_exec_queue *q;
>   	unsigned long idx;
>   
> +	xe_pm_runtime_get(xe);
> +
>   	/*
>   	 * No need for exec_queue.lock here as there is no contention for it
>   	 * when FD is closing as IOCTLs presumably can't be modifying the
> @@ -126,6 +128,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
>   
>   	xe_drm_client_put(xef->client);
>   	kfree(xef);
> +
> +	xe_pm_runtime_put(xe);
>   }
>   
>   static const struct drm_ioctl_desc xe_ioctls[] = {
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 7d2e937da1d8..560f7e4bf290 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
>   	xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
>   		     state == XE_GUC_CT_STATE_STOPPED);
>   
> +	while (ct->g2h_outstanding--)
> +		xe_pm_runtime_put(ct_to_xe(ct));
>   	ct->g2h_outstanding = 0;
>   	ct->state = state;
>   
> @@ -501,6 +503,8 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
>   
>   		ct->ctbs.g2h.info.space -= g2h_len;
>   		ct->g2h_outstanding += num_g2h;
> +		while (num_g2h--)
> +			xe_pm_runtime_get_noresume(ct_to_xe(ct));

Could potentially just grab on the 0->1 and only release on the 1->0? 
There is a device level lock being grabbed in the rpm code on every 
call, but not sure how big of a deal that is here.

>   	}
>   }
>   
> @@ -512,6 +516,7 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
>   
>   	ct->ctbs.g2h.info.space += g2h_len;
>   	--ct->g2h_outstanding;
> +	xe_pm_runtime_put(ct_to_xe(ct));
>   }
>   
>   static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 860405527115..993d0344dc88 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -1402,6 +1402,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
>   	default:
>   		XE_WARN_ON("Unknown message type");
>   	}
> +
> +	xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data)));
>   }
>   
>   static const struct drm_sched_backend_ops drm_sched_ops = {
> @@ -1492,6 +1494,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q)
>   static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
>   				   u32 opcode)
>   {
> +	xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
> +
>   	INIT_LIST_HEAD(&msg->link);
>   	msg->opcode = opcode;
>   	msg->private_data = q;