[PATCH v2 2/2] drm/xe/guc: Allow CTB G2H processing without G2H IRQ

Matthew Brost matthew.brost at intel.com
Thu Jun 6 16:14:16 UTC 2024


On Thu, Jun 06, 2024 at 03:06:39PM +0200, Michal Wajdeczko wrote:
> During early initialization, in the xe_guc_min_load_for_hwconfig()
> function, we are successfully enabling CTB communication, but it
> will only allow us to send non-blocking H2G messages, as due to
> not yet enabled IRQs, including G2H IRQs, we will not notice any
> new G2H message sent by the GuC, including replies to our blocking
> H2G request messages. And those successful replies are mandatory
> for the VF drivers to continue normal operations.
> 
> As attempt to workaround this driver initialization ordering issue,
> introduce special safe-mode CTB worker, that will periodically
> trigger G2H processing, like original IRQ handler, in case no
> MSI/MSIX IRQs were enabled on the driver yet. Once we detect that
> IRQ were enabled, we will stop this worker.
> 
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>

This seems like a fairly reasonable workaround to carry until if / when
we can get the driver load ordering correct. Also it has potential use
on platform bringup if IRQs are broken.

With all of this:
Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> ---
> v2: use code from g2h worker instead of g2h irq handler (Matt)
> ---
>  drivers/gpu/drm/xe/xe_guc_ct.c       | 43 ++++++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_guc_ct_types.h |  2 ++
>  2 files changed, 45 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 08cf355e2e56..936b63483e96 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -126,7 +126,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
>  	xa_destroy(&ct->fence_lookup);
>  }
>  
> +static void receive_g2h(struct xe_guc_ct *ct);
>  static void g2h_worker_func(struct work_struct *w);
> +static void safe_mode_worker_func(struct work_struct *w);
>  
>  static void primelockdep(struct xe_guc_ct *ct)
>  {
> @@ -155,6 +157,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
>  	spin_lock_init(&ct->fast_lock);
>  	xa_init(&ct->fence_lookup);
>  	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
> +	INIT_DELAYED_WORK(&ct->safe_mode_worker,  safe_mode_worker_func);
>  	init_waitqueue_head(&ct->wq);
>  	init_waitqueue_head(&ct->g2h_fence_wq);
>  
> @@ -321,6 +324,42 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
>  	mutex_unlock(&ct->lock);
>  }
>  
> +static bool ct_needs_safe_mode(struct xe_guc_ct *ct)
> +{
> +	return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev));
> +}
> +
> +static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct)
> +{
> +	if (!ct_needs_safe_mode(ct))
> +		return false;
> +
> +	queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10);
> +	return true;
> +}
> +
> +static void safe_mode_worker_func(struct work_struct *w)
> +{
> +	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work);
> +
> +	receive_g2h(ct);
> +
> +	if (!ct_restart_safe_mode_worker(ct))
> +		xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n");
> +}
> +
> +static void ct_enter_safe_mode(struct xe_guc_ct *ct)
> +{
> +	if (ct_restart_safe_mode_worker(ct))
> +		xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n");
> +}
> +
> +static void ct_exit_safe_mode(struct xe_guc_ct *ct)
> +{
> +	if (cancel_delayed_work_sync(&ct->safe_mode_worker))
> +		xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n");
> +}
> +
>  int xe_guc_ct_enable(struct xe_guc_ct *ct)
>  {
>  	struct xe_device *xe = ct_to_xe(ct);
> @@ -350,6 +389,9 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
>  	wake_up_all(&ct->wq);
>  	xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
>  
> +	if (ct_needs_safe_mode(ct))
> +		ct_enter_safe_mode(ct);
> +
>  	return 0;
>  
>  err_out:
> @@ -373,6 +415,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct)
>  void xe_guc_ct_disable(struct xe_guc_ct *ct)
>  {
>  	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
> +	ct_exit_safe_mode(ct);
>  	stop_g2h_handler(ct);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> index fede4c6e93cb..761cb9031298 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> @@ -110,6 +110,8 @@ struct xe_guc_ct {
>  	u32 g2h_outstanding;
>  	/** @g2h_worker: worker to process G2H messages */
>  	struct work_struct g2h_worker;
> +	/** @safe_mode_worker: worker to check G2H messages with IRQ disabled */
> +	struct delayed_work safe_mode_worker;
>  	/** @state: CT state */
>  	enum xe_guc_ct_state state;
>  	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
> -- 
> 2.43.0
> 


More information about the Intel-xe mailing list