[PATCH v2 07/11] drm/xe: Add xe_guc_tlb_invalidation layer

Wed Jul 10 04:02:02 UTC 2024

On Tue, Jul 09, 2024 at 11:31:59PM +0200, Michal Wajdeczko wrote:
> Hi Matt,
> 
> few nits (as usual)
> 
> On 08.07.2024 06:03, Matthew Brost wrote:
> > Move GuC specific TLB invalidation into dedicated layer.
> > 
> > v2:
> >  - Remove XXX comment
> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/Makefile                   |   1 +
> >  drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c   | 121 +--------------
> >  drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h   |   4 +-
> >  .../gpu/drm/xe/xe_gt_tlb_invalidation_types.h |  13 ++
> >  drivers/gpu/drm/xe/xe_gt_types.h              |   2 +
> >  drivers/gpu/drm/xe/xe_guc_ct.c                |   2 +-
> >  drivers/gpu/drm/xe/xe_guc_tlb_invalidation.c  | 143 ++++++++++++++++++
> >  drivers/gpu/drm/xe/xe_guc_tlb_invalidation.h  |  18 +++
> >  8 files changed, 186 insertions(+), 118 deletions(-)
> >  create mode 100644 drivers/gpu/drm/xe/xe_guc_tlb_invalidation.c
> >  create mode 100644 drivers/gpu/drm/xe/xe_guc_tlb_invalidation.h
> > 
> > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> > index 628c245c4822..21f4ddf1a946 100644
> > --- a/drivers/gpu/drm/xe/Makefile
> > +++ b/drivers/gpu/drm/xe/Makefile
> > @@ -83,6 +83,7 @@ xe-y += xe_bb.o \
> >  	xe_guc_log.o \
> >  	xe_guc_pc.o \
> >  	xe_guc_submit.o \
> > +	xe_guc_tlb_invalidation.o \
> >  	xe_heci_gsc.o \
> >  	xe_hw_engine.o \
> >  	xe_hw_engine_class_sysfs.o \
> > diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> > index 79d1ed138db5..4b7b5f8205f9 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> > @@ -7,13 +7,12 @@
> >  
> >  #include "xe_gt_tlb_invalidation.h"
> >  
> > -#include "abi/guc_actions_abi.h"
> >  #include "xe_device.h"
> >  #include "xe_force_wake.h"
> >  #include "xe_gt.h"
> >  #include "xe_gt_printk.h"
> > -#include "xe_guc.h"
> >  #include "xe_guc_ct.h"
> > +#include "xe_guc_tlb_invalidation.h"
> >  #include "xe_mmio.h"
> >  #include "xe_sriov.h"
> >  #include "xe_trace.h"
> > @@ -85,6 +84,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
> >  	spin_lock_init(&gt->tlb_invalidation.fence_lock);
> >  	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
> >  			  xe_gt_tlb_fence_timeout);
> > +	gt->tlb_invalidation.ops = xe_guc_tlb_invalidation_get_ops(&gt->uc.guc);
> 
> what about force_execlist=true mode ?
> 

I do this in the follow up patch [1] in which I add support for multiple
TLB invalidaion clients and allow null ops or individual ops. Open to
reworking how I do that, but take a look their first. 

[1] https://patchwork.freedesktop.org/patch/602561/?series=135809&rev=2

> shouldn't we have nop_ops (or direct_ops) and let the GuC code replace
> ops when selected/in use ?
> 
> >  
> >  	return drmm_mutex_init(&gt_to_xe(gt)->drm,
> >  			       &gt->tlb_invalidation.seqno_lock);
> > @@ -157,97 +157,16 @@ static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
> >  	return seqno_recv >= seqno;
> >  }
> >  
> > -static int send_tlb_invalidation(struct xe_guc *guc, u32 *action, int len)
> > -{
> > -	struct xe_gt *gt = guc_to_gt(guc);
> > -
> > -	xe_gt_assert(gt, action[1]);	/* Seqno */
> > -	lockdep_assert_held(&gt->tlb_invalidation.seqno_lock);
> > -
> > -	/*
> > -	 * XXX: The seqno algorithm relies on TLB invalidation being processed
> > -	 * in order which they currently are, if that changes the algorithm will
> > -	 * need to be updated.
> > -	 */
> > -
> > -	return xe_guc_ct_send(&guc->ct, action, len,
> > -			      G2H_LEN_DW_TLB_INVALIDATE, 1);
> > -}
> > -
> > -#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
> > -		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
> > -		XE_GUC_TLB_INVAL_FLUSH_CACHE)
> > -
> >  static int send_tlb_invalidation_ggtt(struct xe_gt *gt, int seqno)
> >  {
> > -	u32 action[] = {
> > -		XE_GUC_ACTION_TLB_INVALIDATION,
> > -		seqno,
> > -		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
> > -	};
> > -
> > -	return send_tlb_invalidation(&gt->uc.guc, action, ARRAY_SIZE(action));
> > +	return gt->tlb_invalidation.ops->tlb_invalidation_ggtt(gt, seqno);
> >  }
> >  
> >  static int send_tlb_invalidation_ppgtt(struct xe_gt *gt, u64 start, u64 end,
> >  				       u32 asid, int seqno)
> >  {
> > -#define MAX_TLB_INVALIDATION_LEN	7
> > -	u32 action[MAX_TLB_INVALIDATION_LEN];
> > -	int len = 0;
> > -
> > -	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
> > -	action[len++] = seqno;
> > -	if (!gt_to_xe(gt)->info.has_range_tlb_invalidation) {
> > -		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
> > -	} else {
> > -		u64 orig_start = start;
> > -		u64 length = end - start;
> > -		u64 align;
> > -
> > -		if (length < SZ_4K)
> > -			length = SZ_4K;
> > -
> > -		/*
> > -		 * We need to invalidate a higher granularity if start address
> > -		 * is not aligned to length. When start is not aligned with
> > -		 * length we need to find the length large enough to create an
> > -		 * address mask covering the required range.
> > -		 */
> > -		align = roundup_pow_of_two(length);
> > -		start = ALIGN_DOWN(start, align);
> > -		end = ALIGN(end, align);
> > -		length = align;
> > -		while (start + length < end) {
> > -			length <<= 1;
> > -			start = ALIGN_DOWN(orig_start, length);
> > -		}
> > -
> > -		/*
> > -		 * Minimum invalidation size for a 2MB page that the hardware
> > -		 * expects is 16MB
> > -		 */
> > -		if (length >= SZ_2M) {
> > -			length = max_t(u64, SZ_16M, length);
> > -			start = ALIGN_DOWN(orig_start, length);
> > -		}
> > -
> > -		xe_gt_assert(gt, length >= SZ_4K);
> > -		xe_gt_assert(gt, is_power_of_2(length));
> > -		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> > -						    ilog2(SZ_2M) + 1)));
> > -		xe_gt_assert(gt, IS_ALIGNED(start, length));
> > -
> > -		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
> > -		action[len++] = asid;
> > -		action[len++] = lower_32_bits(start);
> > -		action[len++] = upper_32_bits(start);
> > -		action[len++] = ilog2(length) - ilog2(SZ_4K);
> > -	}
> > -
> > -	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
> > -
> > -	return send_tlb_invalidation(&gt->uc.guc, action, len);
> > +	return gt->tlb_invalidation.ops->tlb_invalidation_ppgtt(gt, start, end,
> > +								asid, seqno);
> >  }
> >  
> >  static void xe_gt_tlb_invalidation_fence_prep(struct xe_gt *gt,
> > @@ -278,10 +197,6 @@ static void xe_gt_tlb_invalidation_fence_prep(struct xe_gt *gt,
> >  		gt->tlb_invalidation.seqno = 1;
> >  }
> >  
> > -#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
> > -		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
> > -		XE_GUC_TLB_INVAL_FLUSH_CACHE)
> > -
> >  static int __xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt,
> >  					 struct xe_gt_tlb_invalidation_fence *fence)
> >  {
> > @@ -420,7 +335,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> >   *
> >   * Update recv seqno, signal any GT TLB invalidation fences, and restart TDR
> >   */
> > -static void xe_gt_tlb_invalidation_done_handler(struct xe_gt *gt, int seqno)
> > +void xe_gt_tlb_invalidation_done_handler(struct xe_gt *gt, int seqno)
> >  {
> >  	struct xe_device *xe = gt_to_xe(gt);
> >  	struct xe_gt_tlb_invalidation_fence *fence, *next;
> > @@ -469,30 +384,6 @@ static void xe_gt_tlb_invalidation_done_handler(struct xe_gt *gt, int seqno)
> >  	spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
> >  }
> >  
> > -/**
> > - * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
> > - * @guc: guc
> > - * @msg: message indicating TLB invalidation done
> > - * @len: length of message
> > - *
> > - * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
> > - * invalidation fences for seqno. Algorithm for this depends on seqno being
> > - * received in-order and asserts this assumption.
> > - *
> > - * Return: 0 on success, -EPROTO for malformed messages.
> > - */
> > -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> > -{
> > -	struct xe_gt *gt = guc_to_gt(guc);
> > -
> > -	if (unlikely(len != 1))
> > -		return -EPROTO;
> > -
> > -	xe_gt_tlb_invalidation_done_handler(gt, msg[0]);
> > -
> > -	return 0;
> > -}
> > -
> >  static const char *
> >  invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
> >  {
> > diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> > index cbf49b3d0265..ee532ad64aac 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> > @@ -11,7 +11,6 @@
> >  #include "xe_gt_tlb_invalidation_types.h"
> >  
> >  struct xe_gt;
> > -struct xe_guc;
> >  struct xe_vma;
> >  
> >  int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
> > @@ -23,11 +22,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> >  int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> >  				 struct xe_gt_tlb_invalidation_fence *fence,
> >  				 u64 start, u64 end, u32 asid);
> > -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
> >  
> >  void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
> >  				       struct xe_gt_tlb_invalidation_fence *fence);
> >  
> > +void xe_gt_tlb_invalidation_done_handler(struct xe_gt *gt, int seqno);
> > +
> >  static inline void
> >  xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
> >  {
> > diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
> > index 934c828efe31..1abb8692d14b 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
> > @@ -8,6 +8,8 @@
> >  
> >  #include <linux/dma-fence.h>
> >  
> > +struct xe_gt;
> > +
> >  /**
> >   * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
> >   *
> > @@ -25,4 +27,15 @@ struct xe_gt_tlb_invalidation_fence {
> >  	ktime_t invalidation_time;
> >  };
> >  
> > +/**
> > + * struct xe_gt_tlb_invalidation_ops - Xe GT TLB invalidation operations
> > + */
> > +struct xe_gt_tlb_invalidation_ops {
> > +	/** @tlb_invalidation_ggtt: TLB invalidation GGTT */
> > +	int (*tlb_invalidation_ggtt)(struct xe_gt *gt, int seqno);
> > +	/** @tlb_invalidation_ppgtt: TLB invalidation PPGTT */
> > +	int (*tlb_invalidation_ppgtt)(struct xe_gt *gt, u64 start, u64 end,
> > +				      u32 asid, int seqno);
> 
> do we need full "tlb_invalidation" prefix here ?
> maybe just:
> 
> 	int (*ggtt)(struct xe_gt *gt, int seqno);
> 	int (*ppgtt)(struct xe_gt *gt, u64 start, u64 end,
> 
> as this will used like:
> 
> 	return gt->tlb_invalidation.ops->ggtt(gt, seqno);
> or
> 	return gt->tlb_invalidation.ops->ppgtt(gt, start, end,...
> 

Good idea, will change.

> 
> > +};
> > +
> >  #endif
> > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > index fb280f058cdc..4b9740a68457 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > @@ -169,6 +169,8 @@ struct xe_gt {
> >  
> >  	/** @tlb_invalidation: TLB invalidation state */
> >  	struct {
> > +		/** @tlb_invalidation.ops: TLB invalidation ops */
> > +		const struct xe_gt_tlb_invalidation_ops *ops;
> >  		/** @tlb_invalidation.seqno_lock: TLB invalidation seqno lock */
> >  		struct mutex seqno_lock;
> >  		/**
> > diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> > index 7d2e937da1d8..b09423590f62 100644
> > --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> > +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> > @@ -23,10 +23,10 @@
> >  #include "xe_gt_printk.h"
> >  #include "xe_gt_sriov_pf_control.h"
> >  #include "xe_gt_sriov_pf_monitor.h"
> > -#include "xe_gt_tlb_invalidation.h"
> >  #include "xe_guc.h"
> >  #include "xe_guc_relay.h"
> >  #include "xe_guc_submit.h"
> > +#include "xe_guc_tlb_invalidation.h"
> >  #include "xe_map.h"
> >  #include "xe_pm.h"
> >  #include "xe_trace_guc.h"
> > diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_guc_tlb_invalidation.c
> > new file mode 100644
> > index 000000000000..b6fd61bb77ba
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_guc_tlb_invalidation.c
> > @@ -0,0 +1,143 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +#include "abi/guc_actions_abi.h"
> > +
> > +#include "xe_assert.h"
> > +#include "xe_device.h"
> > +#include "xe_gt.h"
> > +#include "xe_gt_printk.h"
> > +#include "xe_gt_tlb_invalidation.h"
> > +#include "xe_guc.h"
> > +#include "xe_guc_ct.h"
> > +#include "xe_guc_tlb_invalidation.h"
> > +
> > +static int send_tlb_invalidation(struct xe_guc *guc, u32 *action, int len)
> 
> const u32 *action
> 
> and len should be unsigned
> 

Yep.

> > +{
> > +	struct xe_gt *gt = guc_to_gt(guc);
> > +
> > +	xe_gt_assert(gt, action[1]);	/* Seqno */
> 
> it would be nice to have that "1" as part of the GuC ABI,
> maybe at least as:
> 
> #define H2G_TLB_INVALIDATE_MSG_1_SEQNO	GUC_HXG_REQUEST_MSG_n_DATAn
> 
> xe_gt_assert(gt, FIELD_GET(H2G_TLB_INVALIDATE_MSG_1_SEQNO, action[1]);
> 

Yep.

> > +	lockdep_assert_held(&gt->tlb_invalidation.seqno_lock);
> > +
> > +	return xe_guc_ct_send(&guc->ct, action, len,
> > +			      G2H_LEN_DW_TLB_INVALIDATE, 1);
> > +}
> > +
> > +#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
> > +		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
> > +		XE_GUC_TLB_INVAL_FLUSH_CACHE)
> 
> maybe it's time to convert SHIFT-based GuC ABI definitions into
> mask-based and use FIELD_PREP as we do elsewhere
> 

Can do this. Will likely do this in a patch after this one as existing
code uses this. Can always squash that patch into this one if preferred
too.

> > +
> > +static int send_tlb_invalidation_ggtt(struct xe_gt *gt, int seqno)
> 
> shouldn't we take "guc" instead of "gt" ? see below
>

Nope as this a vfunc.

> > +{
> > +	u32 action[] = {
> > +		XE_GUC_ACTION_TLB_INVALIDATION,
> > +		seqno,
> > +		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
> > +	};
> > +
> > +	return send_tlb_invalidation(&gt->uc.guc, action, ARRAY_SIZE(action));
> > +}
> > +
> > +static int send_tlb_invalidation_ppgtt(struct xe_gt *gt, u64 start, u64 end,
> > +				       u32 asid, int seqno)
> > +{
> > +#define MAX_TLB_INVALIDATION_LEN	7
> 
> this should be defined as part of the GuC ABI
>

Sure. 

> > +	u32 action[MAX_TLB_INVALIDATION_LEN];
> > +	int len = 0;
> > +
> > +	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
> > +	action[len++] = seqno;
> > +	if (!gt_to_xe(gt)->info.has_range_tlb_invalidation) {
> > +		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
> > +	} else {
> > +		u64 orig_start = start;
> > +		u64 length = end - start;
> > +		u64 align;
> > +
> > +		if (length < SZ_4K)
> > +			length = SZ_4K;
> > +
> > +		/*
> > +		 * We need to invalidate a higher granularity if start address
> > +		 * is not aligned to length. When start is not aligned with
> > +		 * length we need to find the length large enough to create an
> > +		 * address mask covering the required range.
> > +		 */
> > +		align = roundup_pow_of_two(length);
> > +		start = ALIGN_DOWN(start, align);
> > +		end = ALIGN(end, align);
> > +		length = align;
> > +		while (start + length < end) {
> > +			length <<= 1;
> > +			start = ALIGN_DOWN(orig_start, length);
> > +		}
> > +
> > +		/*
> > +		 * Minimum invalidation size for a 2MB page that the hardware
> > +		 * expects is 16MB
> > +		 */
> > +		if (length >= SZ_2M) {
> > +			length = max_t(u64, SZ_16M, length);
> > +			start = ALIGN_DOWN(orig_start, length);
> > +		}
> > +
> > +		xe_gt_assert(gt, length >= SZ_4K);
> > +		xe_gt_assert(gt, is_power_of_2(length));
> > +		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> > +						    ilog2(SZ_2M) + 1)));
> > +		xe_gt_assert(gt, IS_ALIGNED(start, length));
> > +
> > +		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
> > +		action[len++] = asid;
> > +		action[len++] = lower_32_bits(start);
> > +		action[len++] = upper_32_bits(start);
> > +		action[len++] = ilog2(length) - ilog2(SZ_4K);
> > +	}
> > +
> > +	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
> > +
> > +	return send_tlb_invalidation(&gt->uc.guc, action, len);
> > +}
> > +
> > +static const struct xe_gt_tlb_invalidation_ops guc_tlb_invalidation_ops = {
> > +	.tlb_invalidation_ppgtt = send_tlb_invalidation_ppgtt,
> > +	.tlb_invalidation_ggtt = send_tlb_invalidation_ggtt,
> > +};
> 
> maybe it's overkill, but what about using the following code layout:
> 
> // pure GuC code
> static int guc_send_tlb_invalidation(guc, action, len)
> {
> }
> 
> // pure GuC code
> static int guc_send_tlb_invalidation_ggtt(guc, seqno)
> {
> 	return guc_send_tlb_invalidation(guc, ...);
> }
> 
> // ops glue code
> static int tlb_invalidation_ggtt_by_guc(gt, seqno)
> {
> 	return guc_send_tlb_invalidation_ggtt(guc, ...);
> }
> 
> const struct xe_gt_tlb_invalidation_ops guc_tlb_invalidation_ops = {
> 	.ggtt = tlb_invalidation_ggtt_by_guc,
> };
> 

I'm going lean towards overkill.

> 
> > +
> > +/**
> > + * xe_guc_tlb_invalidation_get_ops - Get Guc TLB invalidation ops
> > + * @guc: guc
> > + *
> > + * Return: xe_gt_tlb_invalidation_ops for GuC TLB invalidation
> > + */
> > +const struct xe_gt_tlb_invalidation_ops*
> > +xe_guc_tlb_invalidation_get_ops(struct xe_guc *guc)
> 
> since we don't use "guc" here and don't have any logic behind this
> function, so maybe easier/cleaner would be just to expose ops directly
> in the header as:
> 
> extern const struct xe_gt_tlb_invalidation_ops guc_tlb_invalidation_ops;
>

No. I think my idea was in this layer we could detect the GuC version
and potentially install a different set of vfuncs.

As I type this we probably want a different set of vfunc based on
xe->info.has_range_tlb_invalidation in the current code base.

> or since we may want to postpone GuC ops registration until we know we
> will use the GuC, expose function in xe_gt_tlb_invalidation.h:
> 
> void xe_gt_tlb_invalidation_set_ops(
> 	struct xe_gt *gt,
> 	const struct xe_gt_tlb_invalidation_ops *ops);
> 

I think this probably overkill but something to keep in mind if we need
to decouple setting the ops from TLB invalidation init (e.g. we don't
know which set of ops to install until a later point in the driver
load).

> > +{
> > +	return &guc_tlb_invalidation_ops;
> > +}
> > +
> > +/**
> > + * xe_guc_tlb_invalidation_done_handler - GuC TLB invalidation done handler
> > + * @guc: guc
> > + * @msg: message indicating TLB invalidation done
> > + * @len: length of message
> > + *
> > + * Parse seqno of TLB invalidation, 
> 
> only this part seems to be valid, all below belongs more to the
> description of the xe_gt_tlb_invalidation_done_handler() that maybe we
> should explicitly refer to ?
>

Let me rewrite this kernel doc as it is just copy / paste that doesn't
really apply.

> > wake any waiters for seqno, and signal any
> > + * invalidation fences for seqno. Algorithm for this depends on seqno being
> > + * received in-order and asserts this assumption.
> > + *
> > + * Return: 0 on success, -EPROTO for malformed messages.
> > + */
> > +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> > +{
> > +	struct xe_gt *gt = guc_to_gt(guc);
> > +
> > +	if (unlikely(len != 1))
> 
> this magic "1" should be part of the GuC ABI
>

Sure.

> > +		return -EPROTO;
> > +
> > +	xe_gt_tlb_invalidation_done_handler(gt, msg[0]);
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_guc_tlb_invalidation.h
> > new file mode 100644
> > index 000000000000..44408aae6955
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_guc_tlb_invalidation.h
> > @@ -0,0 +1,18 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +#ifndef _XE_GUC_TLB_INVALIDATION_H_
> > +#define _XE_GUC_TLB_INVALIDATION_H_
> > +
> > +struct xe_guc;
> 
> forward decls should be _after_ any includes
>

Yep.

> > +
> > +#include <linux/types.h>
> > +
> > +const struct xe_gt_tlb_invalidation_ops*
> > +xe_guc_tlb_invalidation_get_ops(struct xe_guc *guc);
> > +
> > +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
> > +
> > +#endif	/* _XE_GUC_TLB_INVALIDATION_H_ */
> 
> IIRC in Xe we don't use decorations for closing #endif of include guard

Wiil delete.

Matt