[Intel-xe] [PATCH] drm/xe: Drop -EAGAIN userptr invalidation dance from VM bind IOCTL
Thomas Hellström
thomas.hellstrom at linux.intel.com
Mon Oct 2 09:00:30 UTC 2023
On 9/27/23 06:31, Matthew Brost wrote:
> This isn't needed as the nexr exec, preempt rebind worker, or page fault
> will issue a rebind. Drop thing complication.
s/thing/the/ ?
Also considering not a lot of code can actually be dropped and this
enables us to build page-tables in the IOCTL rather than in the rebind
worker it needs reconsidering? Also the prefetch OP will be more likely
to succeed....
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_pt.c | 61 +-------------------------------------
> drivers/gpu/drm/xe/xe_vm.c | 10 -------
> 2 files changed, 1 insertion(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index aec469842471..34954d354013 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1042,36 +1042,6 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe,
> {}
> #endif
>
> -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
> -
> -static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
Hmm. Regardless of the way we chose to handle a racing invalidation,
wouldn't it make sense to keep this but perhaps rename it to
inject_inval to make sure we properly test racing invalidations?
> -{
> - u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2;
> - static u32 count;
> -
> - if (count++ % divisor == divisor - 1) {
> - struct xe_vm *vm = xe_vma_vm(vma);
> -
> - vma->userptr.divisor = divisor << 1;
> - spin_lock(&vm->userptr.invalidated_lock);
> - list_move_tail(&vma->userptr.invalidate_link,
> - &vm->userptr.invalidated);
> - spin_unlock(&vm->userptr.invalidated_lock);
> - return true;
> - }
> -
> - return false;
> -}
> -
> -#else
> -
> -static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma)
> -{
> - return false;
> -}
> -
> -#endif
> -
> /**
> * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
> * @base: Base we derive from.
> @@ -1139,7 +1109,6 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
> struct xe_pt_migrate_pt_update *userptr_update =
> container_of(pt_update, typeof(*userptr_update), base);
> struct xe_vma *vma = pt_update->vma;
> - unsigned long notifier_seq = vma->userptr.notifier_seq;
> struct xe_vm *vm = xe_vma_vm(vma);
> int err = xe_pt_vm_dependencies(pt_update->job,
> &vm->rftree[pt_update->tile_id],
> @@ -1149,36 +1118,8 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
> if (err)
> return err;
>
> - userptr_update->locked = false;
> -
> - /*
> - * Wait until nobody is running the invalidation notifier, and
> - * since we're exiting the loop holding the notifier lock,
> - * nobody can proceed invalidating either.
> - *
> - * Note that we don't update the vma->userptr.notifier_seq since
> - * we don't update the userptr pages.
> - */
> - do {
> - down_read(&vm->userptr.notifier_lock);
> - if (!mmu_interval_read_retry(&vma->userptr.notifier,
> - notifier_seq))
> - break;
You need to keep the mmu_interval_read_retry() under the notifier lock.
Otherwise page-table updates may race with page-table zapping. But if we
don't do the repin dance you need to return 0 if
mmu_interval_read_retry() is true.
> -
> - up_read(&vm->userptr.notifier_lock);
> -
> - if (userptr_update->bind)
> - return -EAGAIN;
> -
> - notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
> - } while (true);
> -
> - /* Inject errors to test_whether they are handled correctly */
> - if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) {
> - up_read(&vm->userptr.notifier_lock);
> - return -EAGAIN;
Just return zero here if we don't do the repin.
> - }
>
> + down_read(&vm->userptr.notifier_lock);
> userptr_update->locked = true;
>
> return 0;
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 861d050871bb..147d5f901f24 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2757,7 +2757,6 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> struct drm_exec exec;
> int err;
>
> -retry_userptr:
> drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
> drm_exec_until_all_locked(&exec) {
> err = op_execute(&exec, vm, vma, op);
> @@ -2767,15 +2766,6 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> }
> drm_exec_fini(&exec);
>
> - if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
> - lockdep_assert_held_write(&vm->lock);
> - err = xe_vma_userptr_pin_pages(vma);
> - if (!err)
> - goto retry_userptr;
> -
> - trace_xe_vma_fail(vma);
> - }
> -
> return err;
> }
>
More information about the Intel-xe
mailing list