[PATCH v4 4/7] drm/xe: Convert multiple bind ops into single job

Matthew Brost matthew.brost at intel.com
Fri Jun 21 17:06:30 UTC 2024


On Fri, Jun 21, 2024 at 04:23:59PM +0100, Matthew Auld wrote:
> On 18/06/2024 18:15, Matthew Brost wrote:
> > This aligns with the uAPI of an array of binds or single bind that
> > results in multiple GPUVA ops to be considered a single atomic
> > operations.
> > 
> > The implemenation is roughly:
> > - xe_vma_ops is a list of xe_vma_op (GPUVA op)
> > - each xe_vma_op resolves to 0-3 PT ops
> > - xe_vma_ops creates a single job
> > - if at any point during binding a failure occurs, xe_vma_ops contains
> >    the information necessary unwind the PT and VMA (GPUVA) state
> > 
> > v2:
> >   - add missing dma-resv slot reservation (CI, testing)
> > v4:
> >   - Fix TLB invalidation (Paulo)
> >   - Add missing xe_sched_job_last_fence_add/test_dep check (Inspection)
> > 
> > Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> 
> <snip>
> 
> > +
> > +static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
> > +			   struct xe_vm_pgtable_update_ops *pt_update_ops,
> > +			   struct xe_vma *vma)
> > +{
> > +	u32 current_op = pt_update_ops->current_op;
> > +	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
> > +	struct llist_head *deferred = &pt_update_ops->deferred;
> > +	int err;
> >   	xe_bo_assert_held(xe_vma_bo(vma));
> > -	xe_vm_assert_held(vm);
> >   	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> > -	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
> > -	       xe_vma_start(vma), xe_vma_end(vma), q);
> > -
> > -	num_entries = xe_pt_stage_unbind(tile, vma, entries);
> > -	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
> > +	       "Preparing bind, with range [%llx...%llx)\n",
> > +	       xe_vma_start(vma), xe_vma_end(vma) - 1);
> > -	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
> > -	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
> > -				   num_entries);
> > +	pt_op->vma = NULL;
> > +	pt_op->bind = true;
> > +	pt_op->rebind = BIT(tile->id) & vma->tile_present;
> > -	err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
> > -	if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> > -		err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
> > +	err = vma_reserve_fences(tile_to_xe(tile), vma);
> >   	if (err)
> > -		return ERR_PTR(err);
> > +		return err;
> > -	ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> > -	if (!ifence)
> > -		return ERR_PTR(-ENOMEM);
> > +	err = xe_pt_prepare_bind(tile, vma, pt_op->entries,
> > +				 &pt_op->num_entries);
> > +	if (!err) {
> > +		xe_tile_assert(tile, pt_op->num_entries <=
> > +			       ARRAY_SIZE(pt_op->entries));
> > +		xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
> > +					pt_op->num_entries, true);
> > -	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> > -	if (!rfence) {
> > -		kfree(ifence);
> > -		return ERR_PTR(-ENOMEM);
> > +		xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
> > +		++pt_update_ops->current_op;
> > +		pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
> > +
> > +
> > +		/*
> > +		 * If rebind, we have to invalidate TLB on !LR vms to invalidate
> > +		 * cached PTEs point to freed memory. on LR vms this is done
> 
> s/on/On/
> 

Yep.

> > +		 * automatically when the context is re-enabled by the rebind worker,
> > +		 * or in fault mode it was invalidated on PTE zapping.
> > +		 *
> > +		 * If !rebind, and scratch enabled VMs, there is a chance the scratch
> > +		 * PTE is already cached in the TLB so it needs to be invalidated.
> > +		 * on !LR VMs this is done in the ring ops preceding a batch, but on
> 
> ditto
> 

Yep.

> > +		 * non-faulting LR, in particular on user-space batch buffer chaining,
> > +		 * it needs to be done here.
> > +		 */
> > +		if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
> > +		     xe_vm_in_preempt_fence_mode(vm)))
> > +			pt_update_ops->needs_invalidation = true;
> > +		else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
> > +			/* We bump also if batch_invalidate_tlb is true */
> > +			vm->tlb_flush_seqno++;
> > +
> > +		/* FIXME: Don't commit right away */
> > +		vma->tile_staged |= BIT(tile->id);
> > +		pt_op->vma = vma;
> > +		xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries,
> > +				  pt_op->rebind, deferred);
> >   	}
> > +	return err;
> > +}
> > +
> > +static int unbind_op_prepare(struct xe_tile *tile,
> > +			     struct xe_vm_pgtable_update_ops *pt_update_ops,
> > +			     struct xe_vma *vma)
> > +{
> > +	u32 current_op = pt_update_ops->current_op;
> > +	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
> > +	struct llist_head *deferred = &pt_update_ops->deferred;
> > +	int err;
> > +
> > +	if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id)))
> > +		return 0;
> > +
> > +	xe_bo_assert_held(xe_vma_bo(vma));
> > +
> > +	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> > +	       "Preparing unbind, with range [%llx...%llx)\n",
> > +	       xe_vma_start(vma), xe_vma_end(vma) - 1);
> > +
> >   	/*
> > -	 * Even if we were already evicted and unbind to destroy, we need to
> > -	 * clear again here. The eviction may have updated pagetables at a
> > -	 * lower level, because it needs to be more conservative.
> > +	 * Wait for invalidation to complete. Can corrupt internal page table
> > +	 * state if an invalidation is running while preparing an unbind.
> >   	 */
> > -	fence = xe_migrate_update_pgtables(tile->migrate,
> > -					   vm, NULL, q ? q :
> > -					   vm->q[tile->id],
> > -					   entries, num_entries,
> > -					   syncs, num_syncs,
> > -					   &unbind_pt_update.base);
> > -	if (!IS_ERR(fence)) {
> > -		int err;
> > -
> > -		err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
> > -					    &xe_range_fence_kfree_ops,
> > -					    unbind_pt_update.base.start,
> > -					    unbind_pt_update.base.last, fence);
> > -		if (err)
> > -			dma_fence_wait(fence, false);
> > +	if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma)))
> > +		mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier);
> > -		/* TLB invalidation must be done before signaling unbind */
> > -		err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> > -					      xe_vma_start(vma),
> > -					      xe_vma_end(vma),
> > -					      xe_vma_vm(vma)->usm.asid);
> > -		if (err) {
> > -			dma_fence_put(fence);
> > -			kfree(ifence);
> > -			return ERR_PTR(err);
> > +	pt_op->vma = vma;
> > +	pt_op->bind = false;
> > +	pt_op->rebind = false;
> > +
> > +	err = vma_reserve_fences(tile_to_xe(tile), vma);
> > +	if (err)
> > +		return err;
> > +
> > +	pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries);
> > +
> > +	xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
> > +				pt_op->num_entries, false);
> > +	xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
> > +	++pt_update_ops->current_op;
> > +	pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
> > +	pt_update_ops->needs_invalidation = true;
> > +
> > +	/* FIXME: Don't commit right away */
> > +	xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries,
> > +			    deferred);
> > +
> > +	return 0;
> > +}
> > +
> > +static int op_prepare(struct xe_vm *vm,
> > +		      struct xe_tile *tile,
> > +		      struct xe_vm_pgtable_update_ops *pt_update_ops,
> > +		      struct xe_vma_op *op)
> > +{
> > +	int err = 0;
> > +
> > +	xe_vm_assert_held(vm);
> > +
> > +	switch (op->base.op) {
> > +	case DRM_GPUVA_OP_MAP:
> > +		if (!op->map.immediate && xe_vm_in_fault_mode(vm))
> > +			break;
> > +
> > +		err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
> > +		pt_update_ops->wait_vm_kernel = true;
> > +		break;
> > +	case DRM_GPUVA_OP_REMAP:
> > +		err = unbind_op_prepare(tile, pt_update_ops,
> > +					gpuva_to_vma(op->base.remap.unmap->va));
> > +
> > +		if (!err && op->remap.prev) {
> > +			err = bind_op_prepare(vm, tile, pt_update_ops,
> > +					      op->remap.prev);
> > +			pt_update_ops->wait_vm_bookkeep = true;
> >   		}
> > -		fence = &ifence->base.base;
> > +		if (!err && op->remap.next) {
> > +			err = bind_op_prepare(vm, tile, pt_update_ops,
> > +					      op->remap.next);
> > +			pt_update_ops->wait_vm_bookkeep = true;
> > +		}
> > +		break;
> > +	case DRM_GPUVA_OP_UNMAP:
> > +		err = unbind_op_prepare(tile, pt_update_ops,
> > +					gpuva_to_vma(op->base.unmap.va));
> > +		break;
> > +	case DRM_GPUVA_OP_PREFETCH:
> > +		err = bind_op_prepare(vm, tile, pt_update_ops,
> > +				      gpuva_to_vma(op->base.prefetch.va));
> > +		pt_update_ops->wait_vm_kernel = true;
> > +		break;
> > +	default:
> > +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > +	}
> > -		/* add shared fence now for pagetable delayed destroy */
> > -		dma_resv_add_fence(xe_vm_resv(vm), fence,
> > -				   DMA_RESV_USAGE_BOOKKEEP);
> > +	return err;
> > +}
> > -		/* This fence will be installed by caller when doing eviction */
> > -		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> > -			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
> > -					   DMA_RESV_USAGE_BOOKKEEP);
> > -		xe_pt_commit_unbind(vma, entries, num_entries,
> > -				    unbind_pt_update.locked ? &deferred : NULL);
> > -		vma->tile_present &= ~BIT(tile->id);
> > -	} else {
> > -		kfree(rfence);
> > -		kfree(ifence);
> > +static void
> > +xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
> > +{
> > +	init_llist_head(&pt_update_ops->deferred);
> > +	pt_update_ops->start = ~0x0ull;
> > +	pt_update_ops->last = 0x0ull;
> > +}
> > +
> > +/**
> > + * xe_pt_update_ops_prepare() - Prepare PT update operations
> > + * @tile: Tile of PT update operations
> > + * @vops: VMA operationa
> > + *
> > + * Prepare PT update operations which includes updating internal PT state,
> > + * allocate memory for page tables, populate page table being pruned in, and
> > + * create PT update operations for leaf insertion / removal.
> > + *
> > + * Return: 0 on success, negative error code on error.
> > + */
> > +int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> > +{
> > +	struct xe_vm_pgtable_update_ops *pt_update_ops =
> > +		&vops->pt_update_ops[tile->id];
> > +	struct xe_vma_op *op;
> > +	int err;
> > +
> > +	lockdep_assert_held(&vops->vm->lock);
> > +	xe_vm_assert_held(vops->vm);
> > +
> > +	xe_pt_update_ops_init(pt_update_ops);
> > +
> > +	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
> > +				      tile_to_xe(tile)->info.tile_count);
> > +	if (err)
> > +		return err;
> > +
> > +	list_for_each_entry(op, &vops->list, link) {
> > +		err = op_prepare(vops->vm, tile, pt_update_ops, op);
> > +
> > +		if (err)
> > +			return err;
> >   	}
> > -	if (!vma->tile_present)
> > -		list_del_init(&vma->combined_links.rebind);
> > +	xe_tile_assert(tile, pt_update_ops->current_op <=
> > +		       pt_update_ops->num_ops);
> > +
> > +	return 0;
> > +}
> > +
> > +static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
> > +			   struct xe_vm_pgtable_update_ops *pt_update_ops,
> > +			   struct xe_vma *vma, struct dma_fence *fence)
> > +{
> > +	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> > +		dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
> > +				   pt_update_ops->wait_vm_bookkeep ?
> > +				   DMA_RESV_USAGE_KERNEL :
> > +				   DMA_RESV_USAGE_BOOKKEEP);
> > +	vma->tile_present |= BIT(tile->id);
> > +	vma->tile_staged &= ~BIT(tile->id);
> > +	if (xe_vma_is_userptr(vma)) {
> > +		lockdep_assert_held_read(&vm->userptr.notifier_lock);
> > +		to_userptr_vma(vma)->userptr.initial_bind = true;
> > +	}
> > -	if (unbind_pt_update.locked) {
> > -		xe_tile_assert(tile, xe_vma_is_userptr(vma));
> > +	/*
> > +	 * Kick rebind worker if this bind triggers preempt fences and not in
> > +	 * the rebind worker
> > +	 */
> > +	if (pt_update_ops->wait_vm_bookkeep &&
> > +	    xe_vm_in_preempt_fence_mode(vm) &&
> > +	    !current->mm)
> > +		xe_vm_queue_rebind_worker(vm);
> > +}
> > +
> > +static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
> > +			     struct xe_vm_pgtable_update_ops *pt_update_ops,
> > +			     struct xe_vma *vma, struct dma_fence *fence)
> > +{
> > +	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> > +		dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
> > +				   pt_update_ops->wait_vm_bookkeep ?
> > +				   DMA_RESV_USAGE_KERNEL :
> > +				   DMA_RESV_USAGE_BOOKKEEP);
> > +	vma->tile_present &= ~BIT(tile->id);
> > +	if (!vma->tile_present) {
> > +		list_del_init(&vma->combined_links.rebind);
> > +		if (xe_vma_is_userptr(vma)) {
> > +			lockdep_assert_held_read(&vm->userptr.notifier_lock);
> > -		if (!vma->tile_present) {
> >   			spin_lock(&vm->userptr.invalidated_lock);
> >   			list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
> >   			spin_unlock(&vm->userptr.invalidated_lock);
> >   		}
> > -		up_read(&vm->userptr.notifier_lock);
> > -		xe_bo_put_commit(&deferred);
> >   	}
> > +}
> > +
> > +static void op_commit(struct xe_vm *vm,
> > +		      struct xe_tile *tile,
> > +		      struct xe_vm_pgtable_update_ops *pt_update_ops,
> > +		      struct xe_vma_op *op, struct dma_fence *fence)
> > +{
> > +	xe_vm_assert_held(vm);
> > +
> > +	switch (op->base.op) {
> > +	case DRM_GPUVA_OP_MAP:
> > +		if (!op->map.immediate && xe_vm_in_fault_mode(vm))
> > +			break;
> > +
> > +		bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence);
> > +		break;
> > +	case DRM_GPUVA_OP_REMAP:
> > +		unbind_op_commit(vm, tile, pt_update_ops,
> > +				 gpuva_to_vma(op->base.remap.unmap->va), fence);
> > +
> > +		if (op->remap.prev)
> > +			bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
> > +				       fence);
> > +		if (op->remap.next)
> > +			bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
> > +				       fence);
> > +		break;
> > +	case DRM_GPUVA_OP_UNMAP:
> > +		unbind_op_commit(vm, tile, pt_update_ops,
> > +				 gpuva_to_vma(op->base.unmap.va), fence);
> > +		break;
> > +	case DRM_GPUVA_OP_PREFETCH:
> > +		bind_op_commit(vm, tile, pt_update_ops,
> > +			       gpuva_to_vma(op->base.prefetch.va), fence);
> > +		break;
> > +	default:
> > +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > +	}
> > +}
> > +
> > +static const struct xe_migrate_pt_update_ops migrate_ops = {
> > +	.populate = xe_vm_populate_pgtable,
> > +	.clear = xe_migrate_clear_pgtable_callback,
> > +	.pre_commit = xe_pt_pre_commit,
> > +};
> > +
> > +static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
> > +	.populate = xe_vm_populate_pgtable,
> > +	.clear = xe_migrate_clear_pgtable_callback,
> > +	.pre_commit = xe_pt_userptr_pre_commit,
> > +};
> > +
> > +/**
> > + * xe_pt_update_ops_run() - Run PT update operations
> > + * @tile: Tile of PT update operations
> > + * @vops: VMA operationa
> > + *
> > + * Run PT update operations which includes committing internal PT state changes,
> > + * creating job for PT update operations for leaf insertion / removal, and
> > + * installing job fence in various places.
> > +  *
> > + * Return: fence on success, negative ERR_PTR on error.
> > + */
> > +struct dma_fence *
> > +xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> > +{
> > +	struct xe_vm *vm = vops->vm;
> > +	struct xe_vm_pgtable_update_ops *pt_update_ops =
> > +		&vops->pt_update_ops[tile->id];
> > +	struct dma_fence *fence;
> > +	struct invalidation_fence *ifence = NULL;
> > +	struct xe_range_fence *rfence;
> > +	struct xe_vma_op *op;
> > +	int err = 0;
> > +	struct xe_migrate_pt_update update = {
> > +		.ops = pt_update_ops->needs_userptr_lock ?
> > +			&userptr_migrate_ops :
> > +			&migrate_ops,
> > +		.vops = vops,
> > +		.tile_id = tile->id
> 
> Nit: I think needs a comma here.
> 

Yep.

> > +	};
> > +
> > +	lockdep_assert_held(&vm->lock);
> > +	xe_vm_assert_held(vm);
> > +
> > +	if (!pt_update_ops->current_op) {
> > +		xe_tile_assert(tile, xe_vm_in_fault_mode(vm));
> > +
> > +		return dma_fence_get_stub();
> > +	}
> > +
> > +	if (pt_update_ops->needs_invalidation) {
> > +		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> > +		if (!ifence)
> > +			return ERR_PTR(-ENOMEM);
> > +	}
> > +
> > +	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> > +	if (!rfence) {
> > +		err = -ENOMEM;
> > +		goto free_ifence;
> > +	}
> > +
> > +	fence = xe_migrate_update_pgtables(tile->migrate, &update);
> > +	if (IS_ERR(fence)) {
> > +		err = PTR_ERR(fence);
> > +		goto free_rfence;
> > +	}
> > +
> > +	err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
> > +				    &xe_range_fence_kfree_ops,
> > +				    pt_update_ops->start,
> > +				    pt_update_ops->last, fence);
> > +	if (err)
> > +		dma_fence_wait(fence, false);
> 
> Could maybe set err back to zero or don't set it? Just so we don't leave any
> possible booby traps later?
> 

Good idea. Will fix.

Matt

> <snip>


More information about the Intel-xe mailing list