[Intel-xe] [PATCH v2 8/8] drm/xe: VM LRU bulk move

Tue May 23 06:38:19 UTC 2023

On Mon, May 22, 2023 at 10:44 PM Matthew Brost <matthew.brost at intel.com> wrote:
>
> Use the TTM LRU bulk move for BOs tied to a VM. Update the bulk moves
> LRU position on every exec.
>
> v2: Bulk move for compute VMs, remove bulk LRU in xe_gem_object_free
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c       | 24 ++++++++++++++++++++----
>  drivers/gpu/drm/xe/xe_bo.h       |  4 ++--
>  drivers/gpu/drm/xe/xe_dma_buf.c  |  2 +-
>  drivers/gpu/drm/xe/xe_exec.c     |  6 ++++++
>  drivers/gpu/drm/xe/xe_vm.c       |  4 ++++
>  drivers/gpu/drm/xe/xe_vm_types.h |  3 +++
>  6 files changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index c82e995df779..22cf65bd63fe 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -968,6 +968,16 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
>
>  static void xe_gem_object_free(struct drm_gem_object *obj)
>  {
> +       struct xe_bo *bo = gem_to_xe_bo(obj);
> +
> +       if (bo->vm && !xe_vm_in_fault_mode(bo->vm) && xe_bo_is_user(bo)) {
> +               struct ww_acquire_ctx ww;
> +
> +               xe_bo_lock(bo, &ww, 0, false);
> +               ttm_bo_set_bulk_move(&bo->ttm, NULL);
> +               xe_bo_unlock(bo, &ww);
> +       }
> +
>         /* Our BO reference counting scheme works as follows:
>          *
>          * The gem object kref is typically used throughout the driver,
> @@ -1081,8 +1091,8 @@ void xe_bo_free(struct xe_bo *bo)
>
>  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>                                     struct xe_gt *gt, struct dma_resv *resv,
> -                                   size_t size, enum ttm_bo_type type,
> -                                   u32 flags)
> +                                   struct ttm_lru_bulk_move *bulk, size_t size,
> +                                   enum ttm_bo_type type, u32 flags)
>  {
>         struct ttm_operation_ctx ctx = {
>                 .interruptible = true,
> @@ -1149,7 +1159,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>                 return ERR_PTR(err);
>
>         bo->created = true;
> -       ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> +       if (bulk)
> +               ttm_bo_set_bulk_move(&bo->ttm, bulk);
> +       else
> +               ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
>
>         return bo;
>  }
> @@ -1219,7 +1232,10 @@ xe_bo_create_locked_range(struct xe_device *xe,
>                 }
>         }
>
> -       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL, size,
> +       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL,
> +                                  vm && !xe_vm_in_fault_mode(vm) &&
> +                                  flags & XE_BO_CREATE_USER_BIT ?
> +                                  &vm->lru_bulk_move : NULL, size,
>                                    type, flags);
>         if (IS_ERR(bo))
>                 return bo;
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 7e111332c35a..f7562012b836 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -81,8 +81,8 @@ void xe_bo_free(struct xe_bo *bo);
>
>  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>                                     struct xe_gt *gt, struct dma_resv *resv,
> -                                   size_t size, enum ttm_bo_type type,
> -                                   u32 flags);
> +                                   struct ttm_lru_bulk_move *bulk, size_t size,
> +                                   enum ttm_bo_type type, u32 flags);
>  struct xe_bo *
>  xe_bo_create_locked_range(struct xe_device *xe,
>                           struct xe_gt *gt, struct xe_vm *vm,
> diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
> index 9b252cc782b7..975dee1f770f 100644
> --- a/drivers/gpu/drm/xe/xe_dma_buf.c
> +++ b/drivers/gpu/drm/xe/xe_dma_buf.c
> @@ -199,7 +199,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
>         int ret;
>
>         dma_resv_lock(resv, NULL);
> -       bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size,
> +       bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
>                                    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
>         if (IS_ERR(bo)) {
>                 ret = PTR_ERR(bo);
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index ff4df00f20a2..b2dcf34af16b 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -395,6 +395,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>         xe_sched_job_push(job);
>         xe_vm_reactivate_rebind(vm);
>
> +       if (!err && !xe_vm_no_dma_fences(vm)) {
> +               spin_lock(&xe->ttm.lru_lock);
> +               ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> +               spin_unlock(&xe->ttm.lru_lock);
> +       }
> +
>  err_repin:
>         if (!xe_vm_no_dma_fences(vm))
>                 up_read(&vm->userptr.notifier_lock);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 0398da1ef1e2..a5d65d0325d6 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -629,6 +629,10 @@ static void preempt_rebind_work_func(struct work_struct *w)
>
>  #undef retry_required
>
> +       spin_lock(&vm->xe->ttm.lru_lock);
> +       ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> +       spin_unlock(&vm->xe->ttm.lru_lock);
> +
>         /* Point of no return. */
>         arm_preempt_fences(vm, &preempt_fences);
>         resume_and_reinstall_preempt_fences(vm);
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index fada7896867f..d3e99f22510d 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -164,6 +164,9 @@ struct xe_vm {
>         /** Protects @rebind_list and the page-table structures */
>         struct dma_resv resv;
>
> +       /** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
> +       struct ttm_lru_bulk_move lru_bulk_move;
> +
>         u64 size;
>         struct rb_root vmas;
>
> --
> 2.34.1

Something in this patch version, either moving the cleanup to
gem_free, or the move_tail later in the patch set, causes kmscube to
deadlock on terminate, forcing me to reboot the machine. Reboot got
stuck when it tried to terminate my user session, so I had to press
the reset button.

On Mon, May 22, 2023 at 10:44 PM Matthew Brost <matthew.brost at intel.com> wrote:
>
> Use the TTM LRU bulk move for BOs tied to a VM. Update the bulk moves
> LRU position on every exec.
>
> v2: Bulk move for compute VMs, remove bulk LRU in xe_gem_object_free
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c       | 24 ++++++++++++++++++++----
>  drivers/gpu/drm/xe/xe_bo.h       |  4 ++--
>  drivers/gpu/drm/xe/xe_dma_buf.c  |  2 +-
>  drivers/gpu/drm/xe/xe_exec.c     |  6 ++++++
>  drivers/gpu/drm/xe/xe_vm.c       |  4 ++++
>  drivers/gpu/drm/xe/xe_vm_types.h |  3 +++
>  6 files changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index c82e995df779..22cf65bd63fe 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -968,6 +968,16 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
>
>  static void xe_gem_object_free(struct drm_gem_object *obj)
>  {
> +       struct xe_bo *bo = gem_to_xe_bo(obj);
> +
> +       if (bo->vm && !xe_vm_in_fault_mode(bo->vm) && xe_bo_is_user(bo)) {
> +               struct ww_acquire_ctx ww;
> +
> +               xe_bo_lock(bo, &ww, 0, false);
> +               ttm_bo_set_bulk_move(&bo->ttm, NULL);
> +               xe_bo_unlock(bo, &ww);
> +       }
> +
>         /* Our BO reference counting scheme works as follows:
>          *
>          * The gem object kref is typically used throughout the driver,
> @@ -1081,8 +1091,8 @@ void xe_bo_free(struct xe_bo *bo)
>
>  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>                                     struct xe_gt *gt, struct dma_resv *resv,
> -                                   size_t size, enum ttm_bo_type type,
> -                                   u32 flags)
> +                                   struct ttm_lru_bulk_move *bulk, size_t size,
> +                                   enum ttm_bo_type type, u32 flags)
>  {
>         struct ttm_operation_ctx ctx = {
>                 .interruptible = true,
> @@ -1149,7 +1159,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>                 return ERR_PTR(err);
>
>         bo->created = true;
> -       ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> +       if (bulk)
> +               ttm_bo_set_bulk_move(&bo->ttm, bulk);
> +       else
> +               ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
>
>         return bo;
>  }
> @@ -1219,7 +1232,10 @@ xe_bo_create_locked_range(struct xe_device *xe,
>                 }
>         }
>
> -       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL, size,
> +       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL,
> +                                  vm && !xe_vm_in_fault_mode(vm) &&
> +                                  flags & XE_BO_CREATE_USER_BIT ?
> +                                  &vm->lru_bulk_move : NULL, size,
>                                    type, flags);
>         if (IS_ERR(bo))
>                 return bo;
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 7e111332c35a..f7562012b836 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -81,8 +81,8 @@ void xe_bo_free(struct xe_bo *bo);
>
>  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>                                     struct xe_gt *gt, struct dma_resv *resv,
> -                                   size_t size, enum ttm_bo_type type,
> -                                   u32 flags);
> +                                   struct ttm_lru_bulk_move *bulk, size_t size,
> +                                   enum ttm_bo_type type, u32 flags);
>  struct xe_bo *
>  xe_bo_create_locked_range(struct xe_device *xe,
>                           struct xe_gt *gt, struct xe_vm *vm,
> diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
> index 9b252cc782b7..975dee1f770f 100644
> --- a/drivers/gpu/drm/xe/xe_dma_buf.c
> +++ b/drivers/gpu/drm/xe/xe_dma_buf.c
> @@ -199,7 +199,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
>         int ret;
>
>         dma_resv_lock(resv, NULL);
> -       bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size,
> +       bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
>                                    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
>         if (IS_ERR(bo)) {
>                 ret = PTR_ERR(bo);
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index ff4df00f20a2..b2dcf34af16b 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -395,6 +395,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>         xe_sched_job_push(job);
>         xe_vm_reactivate_rebind(vm);
>
> +       if (!err && !xe_vm_no_dma_fences(vm)) {
> +               spin_lock(&xe->ttm.lru_lock);
> +               ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> +               spin_unlock(&xe->ttm.lru_lock);
> +       }
> +
>  err_repin:
>         if (!xe_vm_no_dma_fences(vm))
>                 up_read(&vm->userptr.notifier_lock);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 0398da1ef1e2..a5d65d0325d6 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -629,6 +629,10 @@ static void preempt_rebind_work_func(struct work_struct *w)
>
>  #undef retry_required
>
> +       spin_lock(&vm->xe->ttm.lru_lock);
> +       ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> +       spin_unlock(&vm->xe->ttm.lru_lock);
> +
>         /* Point of no return. */
>         arm_preempt_fences(vm, &preempt_fences);
>         resume_and_reinstall_preempt_fences(vm);
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index fada7896867f..d3e99f22510d 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -164,6 +164,9 @@ struct xe_vm {
>         /** Protects @rebind_list and the page-table structures */
>         struct dma_resv resv;
>
> +       /** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
> +       struct ttm_lru_bulk_move lru_bulk_move;
> +
>         u64 size;
>         struct rb_root vmas;
>
> --
> 2.34.1
>