[PATCH v3 3/5] drm/xe: Convert multiple bind ops into single job

Thu May 30 00:49:49 UTC 2024

On Wed, May 29, 2024 at 06:32:12PM -0600, Zanoni, Paulo R wrote:
> On Wed, 2024-05-29 at 11:31 -0700, Matthew Brost wrote:
> > This aligns with the uAPI of an array of binds or single bind that
> > results in multiple GPUVA ops to be considered a single atomic
> > operations.
> > 
> > The implemenation is roughly:
> > - xe_vma_ops is a list of xe_vma_op (GPUVA op)
> > - each xe_vma_op resolves to 0-3 PT ops
> > - xe_vma_ops creates a single job
> > - if at any point during binding a failure occurs, xe_vma_ops contains
> >   the information necessary unwind the PT and VMA (GPUVA) state
> > 
> > v2:
> >  - add missing dma-resv slot reservation (CI, testing)
> > 
> > Cc: Oak Zeng <oak.zeng at intel.com>
> > Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_bo_types.h |    2 +
> >  drivers/gpu/drm/xe/xe_migrate.c  |  296 ++++----
> >  drivers/gpu/drm/xe/xe_migrate.h  |   32 +-
> >  drivers/gpu/drm/xe/xe_pt.c       | 1108 +++++++++++++++++++-----------
> >  drivers/gpu/drm/xe/xe_pt.h       |   14 +-
> >  drivers/gpu/drm/xe/xe_pt_types.h |   36 +
> >  drivers/gpu/drm/xe/xe_vm.c       |  519 +++-----------
> >  drivers/gpu/drm/xe/xe_vm.h       |    2 +
> >  drivers/gpu/drm/xe/xe_vm_types.h |   45 +-
> >  9 files changed, 1032 insertions(+), 1022 deletions(-)
> > 
> 
> (snip)
> 
> > 
> > -/**
> > - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
> > - * address range.
> > - * @tile: The tile to bind for.
> > - * @vma: The vma to bind.
> > - * @q: The exec_queue with which to do pipelined page-table updates.
> > - * @syncs: Entries to sync on before binding the built tree to the live vm tree.
> > - * @num_syncs: Number of @sync entries.
> > - * @rebind: Whether we're rebinding this vma to the same address range without
> > - * an unbind in-between.
> > - *
> > - * This function builds a page-table tree (see xe_pt_stage_bind() for more
> > - * information on page-table building), and the xe_vm_pgtable_update entries
> > - * abstracting the operations needed to attach it to the main vm tree. It
> > - * then takes the relevant locks and updates the metadata side of the main
> > - * vm tree and submits the operations for pipelined attachment of the
> > - * gpu page-table to the vm main tree, (which can be done either by the
> > - * cpu and the GPU).
> > - *
> > - * Return: A valid dma-fence representing the pipelined attachment operation
> > - * on success, an error pointer on error.
> > - */
> > -struct dma_fence *
> > -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
> > -		 struct xe_sync_entry *syncs, u32 num_syncs,
> > -		 bool rebind)
> > -{
> > -	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
> > -	struct xe_pt_migrate_pt_update bind_pt_update = {
> > -		.base = {
> > -			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
> > -			.vma = vma,
> > -			.tile_id = tile->id,
> > -		},
> > -		.bind = true,
> > -	};
> > -	struct xe_vm *vm = xe_vma_vm(vma);
> > -	u32 num_entries;
> > -	struct dma_fence *fence;
> > -	struct invalidation_fence *ifence = NULL;
> > -	struct xe_range_fence *rfence;
> > -	int err;
> > -
> > -	bind_pt_update.locked = false;
> > -	xe_bo_assert_held(xe_vma_bo(vma));
> > -	xe_vm_assert_held(vm);
> > -
> > -	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> > -	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
> > -	       xe_vma_start(vma), xe_vma_end(vma), q);
> > -
> > -	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
> > -	if (err)
> > -		goto err;
> > -
> > -	err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
> > -	if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> > -		err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
> > -	if (err)
> > -		goto err;
> > -
> > -	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
> > -
> > -	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
> > -	xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
> > -				   num_entries);
> > -
> > -	/*
> > -	 * If rebind, we have to invalidate TLB on !LR vms to invalidate
> > -	 * cached PTEs point to freed memory. on LR vms this is done
> > -	 * automatically when the context is re-enabled by the rebind worker,
> > -	 * or in fault mode it was invalidated on PTE zapping.
> > -	 *
> > -	 * If !rebind, and scratch enabled VMs, there is a chance the scratch
> > -	 * PTE is already cached in the TLB so it needs to be invalidated.
> > -	 * on !LR VMs this is done in the ring ops preceding a batch, but on
> > -	 * non-faulting LR, in particular on user-space batch buffer chaining,
> > -	 * it needs to be done here.
> > -	 */
> > -	if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
> > -		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> > -		if (!ifence)
> > -			return ERR_PTR(-ENOMEM);
> > -	} else if (rebind && !xe_vm_in_lr_mode(vm)) {
> > -		/* We bump also if batch_invalidate_tlb is true */
> > -		vm->tlb_flush_seqno++;
> 
> This was the only thing actually setting a value of "tlb_flush_seqno"
> to anything in the driver. Now the only thing that remains setting any
> value to tlb_flush_seqno is xe_sched_job_arm() where we have:
> 
> q->tlb_flush_seqno = vm->tlb_flush_seqno;
> 
> (but there seems to be no line ever initializing vm->tlb_flush_seqno)
> 

This is initialized to zero on the zalloc of the xe_vm structure.

> Something may be wrong here. We're not setting initial values but we're
> copying values and checking them in an "if" statement.
> 
> The following compiles:
>

Sure but this doesn't correctly set job->ring_ops_flush_tlb now. The
idea here is if we can defer the TLB flush to next job rather than a GuC
command after we change the page tables we do that. Roughly, on binds in
dma-fence mode we can always defer, in some bind cases in LR mode we can
defer, and on unbinds we always use the GuC.

This patch broke this in the rebase. The fix in bind_op_prepare should
look like:

                /*
                 * If rebind, we have to invalidate TLB on !LR vms to invalidate
                 * cached PTEs point to freed memory. on LR vms this is done
-                * automatically when the context is re-enabled by the rebind
-                * worker, or in fault mode it was invalidated on PTE zapping.
+                * automatically when the context is re-enabled by the rebind worker,
+                * or in fault mode it was invalidated on PTE zapping.
                 *
-                * If !rebind, and scratch enabled VMs, there is a chance the
-                * scratch PTE is already cached in the TLB so it needs to be
-                * invalidated. on !LR VMs this is done in the ring ops
-                * preceding a batch, but on non-faulting LR, in particular on
-                * user-space batch buffer chaining, it needs to be done here.
+                * If !rebind, and scratch enabled VMs, there is a chance the scratch
+                * PTE is already cached in the TLB so it needs to be invalidated.
+                * on !LR VMs this is done in the ring ops preceding a batch, but on
+                * non-faulting LR, in particular on user-space batch buffer chaining,
+                * it needs to be done here.
                 */
-               pt_update_ops->needs_invalidation |=
-                       (pt_op->rebind && !xe_vm_in_lr_mode(vm) &&
-                       !vm->batch_invalidate_tlb) ||
-                       (!pt_op->rebind && vm->scratch_pt[tile->id] &&
-                        xe_vm_in_preempt_fence_mode(vm));
+               if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
+                    xe_vm_in_preempt_fence_mode(vm)))
+                       pt_update_ops->needs_invalidation = true;
+               else if (rebind && !xe_vm_in_lr_mode(vm))
+                       /* We bump also if batch_invalidate_tlb is true */
+                       vm->tlb_flush_seqno++;

Will fix in next rev.

Matt

> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index e81704c7c030a..dd51b59e4433f 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -136,11 +136,6 @@ struct xe_exec_queue {
>         const struct xe_ring_ops *ring_ops;
>         /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
>         struct drm_sched_entity *entity;
> -       /**
> -        * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
> -        * Protected by @vm's resv. Unused if @vm == NULL.
> -        */
> -       u64 tlb_flush_seqno;
>         /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
>         u64 old_run_ticks;
>         /** @run_ticks: hw engine class run time in ticks for this exec queue */
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
> index 29f3201d7dfac..6c3cb7e295ac7 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -254,9 +254,8 @@ void xe_sched_job_arm(struct xe_sched_job *job)
>         }
>  
>         if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
> -           (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
> +           vm->batch_invalidate_tlb) {
>                 xe_vm_assert_held(vm);
> -               q->tlb_flush_seqno = vm->tlb_flush_seqno;
>                 job->ring_ops_flush_tlb = true;
>         }
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 27d651093d307..03e839efb234e 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -263,11 +263,6 @@ struct xe_vm {
>                 bool capture_once;
>         } error_capture;
>  
> -       /**
> -        * @tlb_flush_seqno: Required TLB flush seqno for the next exec.
> -        * protected by the vm resv.
> -        */
> -       u64 tlb_flush_seqno;
>         /** @batch_invalidate_tlb: Always invalidate TLB before batch start */
>         bool batch_invalidate_tlb;
>         /** @xef: XE file handle for tracking this VM's drm client */
> 
> 
>