[PATCH v4 4/7] drm/xe: Convert multiple bind ops into single job
Matthew Auld
matthew.auld at intel.com
Fri Jun 21 15:23:59 UTC 2024
On 18/06/2024 18:15, Matthew Brost wrote:
> This aligns with the uAPI of an array of binds or single bind that
> results in multiple GPUVA ops to be considered a single atomic
> operations.
>
> The implemenation is roughly:
> - xe_vma_ops is a list of xe_vma_op (GPUVA op)
> - each xe_vma_op resolves to 0-3 PT ops
> - xe_vma_ops creates a single job
> - if at any point during binding a failure occurs, xe_vma_ops contains
> the information necessary unwind the PT and VMA (GPUVA) state
>
> v2:
> - add missing dma-resv slot reservation (CI, testing)
> v4:
> - Fix TLB invalidation (Paulo)
> - Add missing xe_sched_job_last_fence_add/test_dep check (Inspection)
>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
<snip>
> +
> +static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
> + struct xe_vm_pgtable_update_ops *pt_update_ops,
> + struct xe_vma *vma)
> +{
> + u32 current_op = pt_update_ops->current_op;
> + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
> + struct llist_head *deferred = &pt_update_ops->deferred;
> + int err;
>
> xe_bo_assert_held(xe_vma_bo(vma));
> - xe_vm_assert_held(vm);
>
> vm_dbg(&xe_vma_vm(vma)->xe->drm,
> - "Preparing unbind, with range [%llx...%llx) engine %p.\n",
> - xe_vma_start(vma), xe_vma_end(vma), q);
> -
> - num_entries = xe_pt_stage_unbind(tile, vma, entries);
> - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
> + "Preparing bind, with range [%llx...%llx)\n",
> + xe_vma_start(vma), xe_vma_end(vma) - 1);
>
> - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
> - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
> - num_entries);
> + pt_op->vma = NULL;
> + pt_op->bind = true;
> + pt_op->rebind = BIT(tile->id) & vma->tile_present;
>
> - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
> - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
> + err = vma_reserve_fences(tile_to_xe(tile), vma);
> if (err)
> - return ERR_PTR(err);
> + return err;
>
> - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> - if (!ifence)
> - return ERR_PTR(-ENOMEM);
> + err = xe_pt_prepare_bind(tile, vma, pt_op->entries,
> + &pt_op->num_entries);
> + if (!err) {
> + xe_tile_assert(tile, pt_op->num_entries <=
> + ARRAY_SIZE(pt_op->entries));
> + xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
> + pt_op->num_entries, true);
>
> - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> - if (!rfence) {
> - kfree(ifence);
> - return ERR_PTR(-ENOMEM);
> + xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
> + ++pt_update_ops->current_op;
> + pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
> +
> +
> + /*
> + * If rebind, we have to invalidate TLB on !LR vms to invalidate
> + * cached PTEs point to freed memory. on LR vms this is done
s/on/On/
> + * automatically when the context is re-enabled by the rebind worker,
> + * or in fault mode it was invalidated on PTE zapping.
> + *
> + * If !rebind, and scratch enabled VMs, there is a chance the scratch
> + * PTE is already cached in the TLB so it needs to be invalidated.
> + * on !LR VMs this is done in the ring ops preceding a batch, but on
ditto
> + * non-faulting LR, in particular on user-space batch buffer chaining,
> + * it needs to be done here.
> + */
> + if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
> + xe_vm_in_preempt_fence_mode(vm)))
> + pt_update_ops->needs_invalidation = true;
> + else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
> + /* We bump also if batch_invalidate_tlb is true */
> + vm->tlb_flush_seqno++;
> +
> + /* FIXME: Don't commit right away */
> + vma->tile_staged |= BIT(tile->id);
> + pt_op->vma = vma;
> + xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries,
> + pt_op->rebind, deferred);
> }
>
> + return err;
> +}
> +
> +static int unbind_op_prepare(struct xe_tile *tile,
> + struct xe_vm_pgtable_update_ops *pt_update_ops,
> + struct xe_vma *vma)
> +{
> + u32 current_op = pt_update_ops->current_op;
> + struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
> + struct llist_head *deferred = &pt_update_ops->deferred;
> + int err;
> +
> + if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id)))
> + return 0;
> +
> + xe_bo_assert_held(xe_vma_bo(vma));
> +
> + vm_dbg(&xe_vma_vm(vma)->xe->drm,
> + "Preparing unbind, with range [%llx...%llx)\n",
> + xe_vma_start(vma), xe_vma_end(vma) - 1);
> +
> /*
> - * Even if we were already evicted and unbind to destroy, we need to
> - * clear again here. The eviction may have updated pagetables at a
> - * lower level, because it needs to be more conservative.
> + * Wait for invalidation to complete. Can corrupt internal page table
> + * state if an invalidation is running while preparing an unbind.
> */
> - fence = xe_migrate_update_pgtables(tile->migrate,
> - vm, NULL, q ? q :
> - vm->q[tile->id],
> - entries, num_entries,
> - syncs, num_syncs,
> - &unbind_pt_update.base);
> - if (!IS_ERR(fence)) {
> - int err;
> -
> - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
> - &xe_range_fence_kfree_ops,
> - unbind_pt_update.base.start,
> - unbind_pt_update.base.last, fence);
> - if (err)
> - dma_fence_wait(fence, false);
> + if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma)))
> + mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier);
>
> - /* TLB invalidation must be done before signaling unbind */
> - err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> - xe_vma_start(vma),
> - xe_vma_end(vma),
> - xe_vma_vm(vma)->usm.asid);
> - if (err) {
> - dma_fence_put(fence);
> - kfree(ifence);
> - return ERR_PTR(err);
> + pt_op->vma = vma;
> + pt_op->bind = false;
> + pt_op->rebind = false;
> +
> + err = vma_reserve_fences(tile_to_xe(tile), vma);
> + if (err)
> + return err;
> +
> + pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries);
> +
> + xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
> + pt_op->num_entries, false);
> + xe_pt_update_ops_rfence_interval(pt_update_ops, vma);
> + ++pt_update_ops->current_op;
> + pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
> + pt_update_ops->needs_invalidation = true;
> +
> + /* FIXME: Don't commit right away */
> + xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries,
> + deferred);
> +
> + return 0;
> +}
> +
> +static int op_prepare(struct xe_vm *vm,
> + struct xe_tile *tile,
> + struct xe_vm_pgtable_update_ops *pt_update_ops,
> + struct xe_vma_op *op)
> +{
> + int err = 0;
> +
> + xe_vm_assert_held(vm);
> +
> + switch (op->base.op) {
> + case DRM_GPUVA_OP_MAP:
> + if (!op->map.immediate && xe_vm_in_fault_mode(vm))
> + break;
> +
> + err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma);
> + pt_update_ops->wait_vm_kernel = true;
> + break;
> + case DRM_GPUVA_OP_REMAP:
> + err = unbind_op_prepare(tile, pt_update_ops,
> + gpuva_to_vma(op->base.remap.unmap->va));
> +
> + if (!err && op->remap.prev) {
> + err = bind_op_prepare(vm, tile, pt_update_ops,
> + op->remap.prev);
> + pt_update_ops->wait_vm_bookkeep = true;
> }
> - fence = &ifence->base.base;
> + if (!err && op->remap.next) {
> + err = bind_op_prepare(vm, tile, pt_update_ops,
> + op->remap.next);
> + pt_update_ops->wait_vm_bookkeep = true;
> + }
> + break;
> + case DRM_GPUVA_OP_UNMAP:
> + err = unbind_op_prepare(tile, pt_update_ops,
> + gpuva_to_vma(op->base.unmap.va));
> + break;
> + case DRM_GPUVA_OP_PREFETCH:
> + err = bind_op_prepare(vm, tile, pt_update_ops,
> + gpuva_to_vma(op->base.prefetch.va));
> + pt_update_ops->wait_vm_kernel = true;
> + break;
> + default:
> + drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> + }
>
> - /* add shared fence now for pagetable delayed destroy */
> - dma_resv_add_fence(xe_vm_resv(vm), fence,
> - DMA_RESV_USAGE_BOOKKEEP);
> + return err;
> +}
>
> - /* This fence will be installed by caller when doing eviction */
> - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
> - DMA_RESV_USAGE_BOOKKEEP);
> - xe_pt_commit_unbind(vma, entries, num_entries,
> - unbind_pt_update.locked ? &deferred : NULL);
> - vma->tile_present &= ~BIT(tile->id);
> - } else {
> - kfree(rfence);
> - kfree(ifence);
> +static void
> +xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
> +{
> + init_llist_head(&pt_update_ops->deferred);
> + pt_update_ops->start = ~0x0ull;
> + pt_update_ops->last = 0x0ull;
> +}
> +
> +/**
> + * xe_pt_update_ops_prepare() - Prepare PT update operations
> + * @tile: Tile of PT update operations
> + * @vops: VMA operationa
> + *
> + * Prepare PT update operations which includes updating internal PT state,
> + * allocate memory for page tables, populate page table being pruned in, and
> + * create PT update operations for leaf insertion / removal.
> + *
> + * Return: 0 on success, negative error code on error.
> + */
> +int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> +{
> + struct xe_vm_pgtable_update_ops *pt_update_ops =
> + &vops->pt_update_ops[tile->id];
> + struct xe_vma_op *op;
> + int err;
> +
> + lockdep_assert_held(&vops->vm->lock);
> + xe_vm_assert_held(vops->vm);
> +
> + xe_pt_update_ops_init(pt_update_ops);
> +
> + err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
> + tile_to_xe(tile)->info.tile_count);
> + if (err)
> + return err;
> +
> + list_for_each_entry(op, &vops->list, link) {
> + err = op_prepare(vops->vm, tile, pt_update_ops, op);
> +
> + if (err)
> + return err;
> }
>
> - if (!vma->tile_present)
> - list_del_init(&vma->combined_links.rebind);
> + xe_tile_assert(tile, pt_update_ops->current_op <=
> + pt_update_ops->num_ops);
> +
> + return 0;
> +}
> +
> +static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
> + struct xe_vm_pgtable_update_ops *pt_update_ops,
> + struct xe_vma *vma, struct dma_fence *fence)
> +{
> + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
> + pt_update_ops->wait_vm_bookkeep ?
> + DMA_RESV_USAGE_KERNEL :
> + DMA_RESV_USAGE_BOOKKEEP);
> + vma->tile_present |= BIT(tile->id);
> + vma->tile_staged &= ~BIT(tile->id);
> + if (xe_vma_is_userptr(vma)) {
> + lockdep_assert_held_read(&vm->userptr.notifier_lock);
> + to_userptr_vma(vma)->userptr.initial_bind = true;
> + }
>
> - if (unbind_pt_update.locked) {
> - xe_tile_assert(tile, xe_vma_is_userptr(vma));
> + /*
> + * Kick rebind worker if this bind triggers preempt fences and not in
> + * the rebind worker
> + */
> + if (pt_update_ops->wait_vm_bookkeep &&
> + xe_vm_in_preempt_fence_mode(vm) &&
> + !current->mm)
> + xe_vm_queue_rebind_worker(vm);
> +}
> +
> +static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
> + struct xe_vm_pgtable_update_ops *pt_update_ops,
> + struct xe_vma *vma, struct dma_fence *fence)
> +{
> + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
> + pt_update_ops->wait_vm_bookkeep ?
> + DMA_RESV_USAGE_KERNEL :
> + DMA_RESV_USAGE_BOOKKEEP);
> + vma->tile_present &= ~BIT(tile->id);
> + if (!vma->tile_present) {
> + list_del_init(&vma->combined_links.rebind);
> + if (xe_vma_is_userptr(vma)) {
> + lockdep_assert_held_read(&vm->userptr.notifier_lock);
>
> - if (!vma->tile_present) {
> spin_lock(&vm->userptr.invalidated_lock);
> list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
> spin_unlock(&vm->userptr.invalidated_lock);
> }
> - up_read(&vm->userptr.notifier_lock);
> - xe_bo_put_commit(&deferred);
> }
> +}
> +
> +static void op_commit(struct xe_vm *vm,
> + struct xe_tile *tile,
> + struct xe_vm_pgtable_update_ops *pt_update_ops,
> + struct xe_vma_op *op, struct dma_fence *fence)
> +{
> + xe_vm_assert_held(vm);
> +
> + switch (op->base.op) {
> + case DRM_GPUVA_OP_MAP:
> + if (!op->map.immediate && xe_vm_in_fault_mode(vm))
> + break;
> +
> + bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence);
> + break;
> + case DRM_GPUVA_OP_REMAP:
> + unbind_op_commit(vm, tile, pt_update_ops,
> + gpuva_to_vma(op->base.remap.unmap->va), fence);
> +
> + if (op->remap.prev)
> + bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
> + fence);
> + if (op->remap.next)
> + bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
> + fence);
> + break;
> + case DRM_GPUVA_OP_UNMAP:
> + unbind_op_commit(vm, tile, pt_update_ops,
> + gpuva_to_vma(op->base.unmap.va), fence);
> + break;
> + case DRM_GPUVA_OP_PREFETCH:
> + bind_op_commit(vm, tile, pt_update_ops,
> + gpuva_to_vma(op->base.prefetch.va), fence);
> + break;
> + default:
> + drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> + }
> +}
> +
> +static const struct xe_migrate_pt_update_ops migrate_ops = {
> + .populate = xe_vm_populate_pgtable,
> + .clear = xe_migrate_clear_pgtable_callback,
> + .pre_commit = xe_pt_pre_commit,
> +};
> +
> +static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
> + .populate = xe_vm_populate_pgtable,
> + .clear = xe_migrate_clear_pgtable_callback,
> + .pre_commit = xe_pt_userptr_pre_commit,
> +};
> +
> +/**
> + * xe_pt_update_ops_run() - Run PT update operations
> + * @tile: Tile of PT update operations
> + * @vops: VMA operationa
> + *
> + * Run PT update operations which includes committing internal PT state changes,
> + * creating job for PT update operations for leaf insertion / removal, and
> + * installing job fence in various places.
> + *
> + * Return: fence on success, negative ERR_PTR on error.
> + */
> +struct dma_fence *
> +xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> +{
> + struct xe_vm *vm = vops->vm;
> + struct xe_vm_pgtable_update_ops *pt_update_ops =
> + &vops->pt_update_ops[tile->id];
> + struct dma_fence *fence;
> + struct invalidation_fence *ifence = NULL;
> + struct xe_range_fence *rfence;
> + struct xe_vma_op *op;
> + int err = 0;
> + struct xe_migrate_pt_update update = {
> + .ops = pt_update_ops->needs_userptr_lock ?
> + &userptr_migrate_ops :
> + &migrate_ops,
> + .vops = vops,
> + .tile_id = tile->id
Nit: I think needs a comma here.
> + };
> +
> + lockdep_assert_held(&vm->lock);
> + xe_vm_assert_held(vm);
> +
> + if (!pt_update_ops->current_op) {
> + xe_tile_assert(tile, xe_vm_in_fault_mode(vm));
> +
> + return dma_fence_get_stub();
> + }
> +
> + if (pt_update_ops->needs_invalidation) {
> + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
> + if (!ifence)
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
> + if (!rfence) {
> + err = -ENOMEM;
> + goto free_ifence;
> + }
> +
> + fence = xe_migrate_update_pgtables(tile->migrate, &update);
> + if (IS_ERR(fence)) {
> + err = PTR_ERR(fence);
> + goto free_rfence;
> + }
> +
> + err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
> + &xe_range_fence_kfree_ops,
> + pt_update_ops->start,
> + pt_update_ops->last, fence);
> + if (err)
> + dma_fence_wait(fence, false);
Could maybe set err back to zero or don't set it? Just so we don't leave
any possible booby traps later?
<snip>
More information about the Intel-xe
mailing list