[Intel-gfx] [PATCH v8 3/7] drm/i915: Define and use GuC and CTB TLB invalidation routines

Mon Oct 9 11:40:54 UTC 2023

Hi,

...

> > @@ -131,11 +132,23 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
> >   		return;
> >   	with_intel_gt_pm_if_awake(gt, wakeref) {
> > +		struct intel_guc *guc = &gt->uc.guc;
> > +
> >   		mutex_lock(&gt->tlb.invalidate_lock);
> >   		if (tlb_seqno_passed(gt, seqno))
> >   			goto unlock;
> > -		mmio_invalidate_full(gt);
> > +		if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
> > +			/*
> > +			 * Only perform GuC TLB invalidation if GuC is ready.
> > +			 * If GuC is not ready, then there are no TLBs to
> > +			 * invalidate.  Ergo, skip invalidation.
> > +			 */
> > +			if (intel_guc_is_ready(guc))
> > +				intel_guc_invalidate_tlb_engines(guc);
> 
> What was the answer to John's question on why it is okay to just skip and
> not maybe fall back to mmio?

this maybe can be written as:

	if (HAS_GUC_TLB_INVALIDATION(gt->i915) &&
	    intel_guc_is_ready(guc))
		intel_guc_invalidate_tlb_engines(guc);
	else
		mmio_invalidate_full(gt);

> > +		} else {
> > +			mmio_invalidate_full(gt);
> > +		}
> >   		write_seqcount_invalidate(&gt->tlb.seqno);
> >   unlock:

...

> > +	/*
> > +	 * The full GT reset will have cleared the TLB caches and flushed the
> > +	 * G2H message queue; we can release all the blocked waiters.
> > +	 *
> > +	 * This is safe to do unlocked because the xarray is not dependent
> > +	 * on the GT reset, and there's a separate execution path for TLB
> > +	 * invalidations on GT reset, and there's a large window of time
> > +	 * between the GT reset and GuC becoming available.
> > +	 */
> > +	xa_for_each(&guc->tlb_lookup, i, wait)
> > +		wake_up(&wait->wq);
> 
> If you are confident there can be no failures to wake up someone, who maybe
> just added themselves to the xarray (via put pages for instance), while
> reset in ongoing. Or even removed themselves after say timing out the wait
> and so freed their entry...

I guess yuo are suggesting here to spinlock around this. The
reset is protected by the uncore->spinlock, but I don't really
see it colliding with reset, to be honest.

> >   }
> >   static void guc_cancel_context_requests(struct intel_context *ce)
> > @@ -1948,6 +1962,50 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
> >   static void destroyed_worker_func(struct work_struct *w);
> >   static void reset_fail_worker_func(struct work_struct *w);
> > +static int init_tlb_lookup(struct intel_guc *guc)
> > +{
> > +	struct intel_guc_tlb_wait *wait;
> > +	int err;
> > +
> > +	if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915))
> > +		return 0;
> > +
> > +	xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC);
> > +
> > +	wait = kzalloc(sizeof(*wait), GFP_KERNEL);
> > +	if (!wait)
> > +		return -ENOMEM;
> > +
> > +	init_waitqueue_head(&wait->wq);
> > +
> > +	/* Preallocate a shared id for use under memory pressure. */
> > +	err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait,
> > +				  xa_limit_32b, &guc->next_seqno, GFP_KERNEL);
> > +	/* Only error if out of memory, not when busy (list full)*/
> > +	if (err == -ENOMEM) {
> > +		kfree(wait);
> > +		return err;
> > +	}
> 
> I agreed with John here that only looking at ENOMEM reads odd and I did not
> see that answered. Did I miss it?

xa_alloc_cyclic_irq() can also fail with -EBUSY... so that I
think this is a matter...

> Otherwise, I _know_ it is not likely to get any other error having *just*
> created a new xarray, but still, why not simply catch all? It is not like
> the slot fallback code at runtime would handle guc->serial_slot being
> empty?! It appears it would just explode in guc_send_invalidate_tlb if it
> would hit it..

... of what we want to do for such errors. E.g. Jonathan decided
here not to fail, but ignore the error. Should we fail for every
error?

> > +
> > +	return 0;
> > +}
> > +
> > +static void fini_tlb_lookup(struct intel_guc *guc)
> > +{
> > +	struct intel_guc_tlb_wait *wait;
> > +
> > +	if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915))
> > +		return;
> > +
> > +	wait = xa_load(&guc->tlb_lookup, guc->serial_slot);
> > +	if (wait) {
> > +		guc_dbg(guc, "fini_tlb_lookup: Unexpected item in tlb_lookup\n");
> 
> Hm wait, why is this unexpected when init_tlb_lookup() pre-allocated that
> entry? Who frees it? guc_send_invalidate_tlb() does not appear to.

I think this links with my answer above, right? With th
refactoring of the if's for not skipping tlb invalidation.

> > +		kfree(wait);
> > +	}
> > +
> > +	xa_destroy(&guc->tlb_lookup);
> > +}
> > +
> >   /*
> >    * Set up the memory resources to be shared with the GuC (via the GGTT)
> >    * at firmware loading time.

...

> > +int intel_guc_tlb_invalidation_done(struct intel_guc *guc, u32 size, u32 len, u32 seqno)
> > +{
> > +	/* Check for underflow */
> > +	if (unlikely(len < 1 || len > size))
> > +		return -EPROTO;
> 
> These check are not valid for any message/action type ct_process_request()
> can receive?

You mean discriminating by payload? Jonathan... you konw the
details here?