[PATCH v3 2/2] drm/xe: Use dma-fence array for media GT TLB invalidations in PT code

Matthew Brost matthew.brost at intel.com
Fri Aug 23 15:38:54 UTC 2024


On Fri, Aug 23, 2024 at 08:40:40AM +0200, Christian König wrote:
> Am 23.08.24 um 06:54 schrieb Matthew Brost:
> > Using a chain fence is problematic as these cannot be installed in
> > timeout drm sync objects. Use a dma-fence-array instead at the cost of
> > an extra failure point.
> 
> Mhm, IIRC we converted chain objects into dma-fence-arrays while installing
> them into a timeline.
> 
> Doesn't that work any more?
> 

Thanks for the quick feedback.

As is, installing a dma-fence-chain into a timeline sync doesn't work.

The 'fence' returned from 'xe_pt_update_ops_run' is installed here [1]
as the 'fence' argument. This blows up here [2] [3]. It does suggest in
[3] to use a dma-fence-array which is what I'm doing. 

The issue with using a dma-fence array as is it adds another failure
point if dma_fence_array_create is used as is after collecting multiple
fences from TLB invalidations. Also we have lock in xe_pt_update_ops_run
which is in the path reclaim so calling dma_fence_array_create isn't
allowed under that lock.

I suppose we could drop that lock and directly wait TLB invalidation
fences if dma_fence_array_create fails but to me it makes more sense to
prealloc the dma-fence-array and populate it later. Saw your response to
my first patch about how this could be problematic, a little confused on
that so responding there too.

Matt

[1] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/xe/xe_sync.c#L233
[2] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/drm_syncobj.c#L349
[3] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/dma-buf/dma-fence-chain.c#L275

> Regards,
> Christian.
> 
> > 
> > Also fixup reserve fence count to include media GT invalidation fence.
> > 
> > v2:
> >   - Fix reserve fence count (Casey Bowman)
> > v3:
> >   - Prealloc dma fence array (CI)
> > 
> > Fixes: 40520283e0fd ("drm/xe: Invalidate media_gt TLBs in PT code")
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >   drivers/gpu/drm/xe/xe_pt.c | 34 ++++++++++++++++++++++++----------
> >   1 file changed, 24 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index 6c6714af3d5d..2e35444a85b0 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -3,7 +3,7 @@
> >    * Copyright © 2022 Intel Corporation
> >    */
> > -#include <linux/dma-fence-chain.h>
> > +#include <linux/dma-fence-array.h>
> >   #include "xe_pt.h"
> > @@ -1629,9 +1629,11 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
> >   static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
> >   {
> > +	int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
> > +
> >   	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> >   		return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
> > -					       xe->info.tile_count);
> > +					       xe->info.tile_count << shift);
> >   	return 0;
> >   }
> > @@ -1818,6 +1820,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   	struct xe_vm_pgtable_update_ops *pt_update_ops =
> >   		&vops->pt_update_ops[tile->id];
> >   	struct xe_vma_op *op;
> > +	int shift = tile->media_gt ? 1 : 0;
> >   	int err;
> >   	lockdep_assert_held(&vops->vm->lock);
> > @@ -1826,7 +1829,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   	xe_pt_update_ops_init(pt_update_ops);
> >   	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
> > -				      tile_to_xe(tile)->info.tile_count);
> > +				      tile_to_xe(tile)->info.tile_count << shift);
> >   	if (err)
> >   		return err;
> > @@ -1983,7 +1986,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   		&vops->pt_update_ops[tile->id];
> >   	struct dma_fence *fence;
> >   	struct invalidation_fence *ifence = NULL, *mfence = NULL;
> > -	struct dma_fence_chain *chain_fence = NULL;
> > +	struct dma_fence **fences = NULL;
> > +	struct dma_fence_array *cf = NULL;
> >   	struct xe_range_fence *rfence;
> >   	struct xe_vma_op *op;
> >   	int err = 0, i;
> > @@ -2022,8 +2026,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   				err = -ENOMEM;
> >   				goto free_ifence;
> >   			}
> > -			chain_fence = dma_fence_chain_alloc();
> > -			if (!chain_fence) {
> > +			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
> > +			if (!fences) {
> > +				err = -ENOMEM;
> > +				goto free_ifence;
> > +			}
> > +			cf = dma_fence_array_alloc(2);
> > +			if (!cf) {
> >   				err = -ENOMEM;
> >   				goto free_ifence;
> >   			}
> > @@ -2068,9 +2077,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   			invalidation_fence_init(tile->media_gt, mfence, fence,
> >   						pt_update_ops->start,
> >   						pt_update_ops->last, vm->usm.asid);
> > -			dma_fence_chain_init(chain_fence, &ifence->base.base,
> > -					     &mfence->base.base, 0);
> > -			fence = &chain_fence->base;
> > +			fences[0] = &ifence->base.base;
> > +			fences[1] = &mfence->base.base;
> > +			dma_fence_array_arm(cf, 2, fences,
> > +					    vm->composite_fence_ctx,
> > +					    vm->composite_fence_seqno++,
> > +					    false);
> > +			fence = &cf->base;
> >   		} else {
> >   			fence = &ifence->base.base;
> >   		}
> > @@ -2108,7 +2121,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   free_rfence:
> >   	kfree(rfence);
> >   free_ifence:
> > -	dma_fence_chain_free(chain_fence);
> > +	kfree(cf);
> > +	kfree(fences);
> >   	kfree(mfence);
> >   	kfree(ifence);
> >   kill_vm_tile1:
> 


More information about the Intel-xe mailing list