[Intel-xe] [PATCH v2 8/8] drm/xe: VM LRU bulk move

Wed May 24 14:02:06 UTC 2023

On Tue, May 23, 2023 at 10:01:53PM -0700, Christopher Snowhill wrote:
> On Tue, May 23, 2023 at 6:38 AM Matthew Brost <matthew.brost at intel.com> wrote:
> >
> > On Mon, May 22, 2023 at 11:52:43PM -0700, Christopher Snowhill wrote:
> > > On Mon, May 22, 2023 at 11:38 PM Christopher Snowhill <kode54 at gmail.com> wrote:
> > > >
> > > > On Mon, May 22, 2023 at 10:44 PM Matthew Brost <matthew.brost at intel.com> wrote:
> > > > >
> > > > > Use the TTM LRU bulk move for BOs tied to a VM. Update the bulk moves
> > > > > LRU position on every exec.
> > > > >
> > > > > v2: Bulk move for compute VMs, remove bulk LRU in xe_gem_object_free
> > > > >
> > > > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/xe/xe_bo.c       | 24 ++++++++++++++++++++----
> > > > >  drivers/gpu/drm/xe/xe_bo.h       |  4 ++--
> > > > >  drivers/gpu/drm/xe/xe_dma_buf.c  |  2 +-
> > > > >  drivers/gpu/drm/xe/xe_exec.c     |  6 ++++++
> > > > >  drivers/gpu/drm/xe/xe_vm.c       |  4 ++++
> > > > >  drivers/gpu/drm/xe/xe_vm_types.h |  3 +++
> > > > >  6 files changed, 36 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> > > > > index c82e995df779..22cf65bd63fe 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_bo.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > > > > @@ -968,6 +968,16 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
> > > > >
> > > > >  static void xe_gem_object_free(struct drm_gem_object *obj)
> > > > >  {
> > > > > +       struct xe_bo *bo = gem_to_xe_bo(obj);
> > > > > +
> > > > > +       if (bo->vm && !xe_vm_in_fault_mode(bo->vm) && xe_bo_is_user(bo)) {
> > > > > +               struct ww_acquire_ctx ww;
> > > > > +
> > > > > +               xe_bo_lock(bo, &ww, 0, false);
> > > > > +               ttm_bo_set_bulk_move(&bo->ttm, NULL);
> > > > > +               xe_bo_unlock(bo, &ww);
> > > > > +       }
> > > > > +
> > > > >         /* Our BO reference counting scheme works as follows:
> > > > >          *
> > > > >          * The gem object kref is typically used throughout the driver,
> > > > > @@ -1081,8 +1091,8 @@ void xe_bo_free(struct xe_bo *bo)
> > > > >
> > > > >  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> > > > >                                     struct xe_gt *gt, struct dma_resv *resv,
> > > > > -                                   size_t size, enum ttm_bo_type type,
> > > > > -                                   u32 flags)
> > > > > +                                   struct ttm_lru_bulk_move *bulk, size_t size,
> > > > > +                                   enum ttm_bo_type type, u32 flags)
> > > > >  {
> > > > >         struct ttm_operation_ctx ctx = {
> > > > >                 .interruptible = true,
> > > > > @@ -1149,7 +1159,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> > > > >                 return ERR_PTR(err);
> > > > >
> > > > >         bo->created = true;
> > > > > -       ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> > > > > +       if (bulk)
> > > > > +               ttm_bo_set_bulk_move(&bo->ttm, bulk);
> > > > > +       else
> > > > > +               ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> > > > >
> > > > >         return bo;
> > > > >  }
> > > > > @@ -1219,7 +1232,10 @@ xe_bo_create_locked_range(struct xe_device *xe,
> > > > >                 }
> > > > >         }
> > > > >
> > > > > -       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL, size,
> > > > > +       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL,
> > > > > +                                  vm && !xe_vm_in_fault_mode(vm) &&
> > > > > +                                  flags & XE_BO_CREATE_USER_BIT ?
> > > > > +                                  &vm->lru_bulk_move : NULL, size,
> > > > >                                    type, flags);
> > > > >         if (IS_ERR(bo))
> > > > >                 return bo;
> > > > > diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> > > > > index 7e111332c35a..f7562012b836 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_bo.h
> > > > > +++ b/drivers/gpu/drm/xe/xe_bo.h
> > > > > @@ -81,8 +81,8 @@ void xe_bo_free(struct xe_bo *bo);
> > > > >
> > > > >  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> > > > >                                     struct xe_gt *gt, struct dma_resv *resv,
> > > > > -                                   size_t size, enum ttm_bo_type type,
> > > > > -                                   u32 flags);
> > > > > +                                   struct ttm_lru_bulk_move *bulk, size_t size,
> > > > > +                                   enum ttm_bo_type type, u32 flags);
> > > > >  struct xe_bo *
> > > > >  xe_bo_create_locked_range(struct xe_device *xe,
> > > > >                           struct xe_gt *gt, struct xe_vm *vm,
> > > > > diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
> > > > > index 9b252cc782b7..975dee1f770f 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_dma_buf.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_dma_buf.c
> > > > > @@ -199,7 +199,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
> > > > >         int ret;
> > > > >
> > > > >         dma_resv_lock(resv, NULL);
> > > > > -       bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size,
> > > > > +       bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
> > > > >                                    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
> > > > >         if (IS_ERR(bo)) {
> > > > >                 ret = PTR_ERR(bo);
> > > > > diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> > > > > index ff4df00f20a2..b2dcf34af16b 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_exec.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > > > > @@ -395,6 +395,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > > > >         xe_sched_job_push(job);
> > > > >         xe_vm_reactivate_rebind(vm);
> > > > >
> > > > > +       if (!err && !xe_vm_no_dma_fences(vm)) {
> > > > > +               spin_lock(&xe->ttm.lru_lock);
> > > > > +               ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> > > > > +               spin_unlock(&xe->ttm.lru_lock);
> > > > > +       }
> > > > > +
> > > > >  err_repin:
> > > > >         if (!xe_vm_no_dma_fences(vm))
> > > > >                 up_read(&vm->userptr.notifier_lock);
> > > > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > > > index 0398da1ef1e2..a5d65d0325d6 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > > > @@ -629,6 +629,10 @@ static void preempt_rebind_work_func(struct work_struct *w)
> > > > >
> > > > >  #undef retry_required
> > > > >
> > > > > +       spin_lock(&vm->xe->ttm.lru_lock);
> > > > > +       ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> > > > > +       spin_unlock(&vm->xe->ttm.lru_lock);
> > > > > +
> > > > >         /* Point of no return. */
> > > > >         arm_preempt_fences(vm, &preempt_fences);
> > > > >         resume_and_reinstall_preempt_fences(vm);
> > > > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > index fada7896867f..d3e99f22510d 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > @@ -164,6 +164,9 @@ struct xe_vm {
> > > > >         /** Protects @rebind_list and the page-table structures */
> > > > >         struct dma_resv resv;
> > > > >
> > > > > +       /** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
> > > > > +       struct ttm_lru_bulk_move lru_bulk_move;
> > > > > +
> > > > >         u64 size;
> > > > >         struct rb_root vmas;
> > > > >
> > > > > --
> > > > > 2.34.1
> > > >
> > > > Something in this patch version, either moving the cleanup to
> > > > gem_free, or the move_tail later in the patch set, causes kmscube to
> > > > deadlock on terminate, forcing me to reboot the machine. Reboot got
> > > > stuck when it tried to terminate my user session, so I had to press
> > > > the reset button.
> > >
> >
> > Any chance you have a dmesg for the deadlock on terminate? Hoping it
> > gives some clues to the issue.
> >
> > > Verified, substituting patch 8 of 8 from v1 results in cleanly
> > > terminating kmscube.
> >
> > It was Thomas's suggestion to move this from close to free, this is a
> > fairly old patch and couldn't recall why it was in close rather than
> > free. Now I think about this, I believe originally this was in free but
> > moved to close to fix a bug, will try to remind why it needs to be in
> > close.
> >
> > Matt
> 
> It actually goes zombie/defunct on termination. I couldn't backtrace
> it, so I had to wait for the kernel to spew forth a backtrace of its
> own from the hung kworker processes that caused them to be stuck:
> 

This is very helpful, thanks Christopher. I see the problem, we call
xe_bo_put while holding the VM dma-resv which in turn calls
xe_gem_object_free. xe_gem_object_free tries to acquire VM dma-resv and
deadlock.

I think I'll move this to the close function.

Matt

> [  245.565723] INFO: task kworker/14:1:130 blocked for more than 122 seconds.
> [  245.566642]       Tainted: P        W  OE
> 6.3.0-1-drm-xe-next-git-g48a7045a0879-dirty #6
> [  245.567413] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  245.568207] task:kworker/14:1    state:D stack:0     pid:130
> ppid:2      flags:0x00004000
> [  245.568216] Workqueue: events __guc_engine_fini_async [xe]
> [  245.568392] Call Trace:
> [  245.568394]  <TASK>
> [  245.568398]  __schedule+0x443/0x1400
> [  245.568408]  ? check_preempt_curr+0x5e/0x70
> [  245.568415]  ? try_to_wake_up+0x263/0x610
> [  245.568421]  schedule+0x5e/0xd0
> [  245.568426]  schedule_preempt_disabled+0x15/0x30
> [  245.568431]  __ww_mutex_lock.constprop.0+0x539/0x940
> [  245.568439]  ttm_eu_reserve_buffers+0x1f4/0x2f0 [ttm
> bea26c7978113683d7615187c2f4d33b4f3ad4a6]
> [  245.568462]  xe_vm_lock+0xe3/0x110 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.568653]  xe_lrc_finish+0x62/0x130 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.568827]  xe_engine_fini+0x2f/0x90 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.568989]  process_one_work+0x1c4/0x3d0
> [  245.568996]  worker_thread+0x51/0x390
> [  245.569012]  ? __pfx_worker_thread+0x10/0x10
> [  245.569018]  kthread+0xdb/0x110
> [  245.569022]  ? __pfx_kthread+0x10/0x10
> [  245.569026]  ret_from_fork+0x29/0x50
> [  245.569036]  </TASK>
> [  245.569051] INFO: task kworker/14:3:488 blocked for more than 122 seconds.
> [  245.569963]       Tainted: P        W  OE
> 6.3.0-1-drm-xe-next-git-g48a7045a0879-dirty #6
> [  245.570782] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  245.571592] task:kworker/14:3    state:D stack:0     pid:488
> ppid:2      flags:0x00004000
> [  245.571599] Workqueue: events __guc_engine_fini_async [xe]
> [  245.571768] Call Trace:
> [  245.571769]  <TASK>
> [  245.571772]  __schedule+0x443/0x1400
> [  245.571777]  ? load_balance+0x18d/0xe20
> [  245.571785]  schedule+0x5e/0xd0
> [  245.571790]  schedule_preempt_disabled+0x15/0x30
> [  245.571794]  __ww_mutex_lock.constprop.0+0x539/0x940
> [  245.571802]  ttm_eu_reserve_buffers+0x1f4/0x2f0 [ttm
> bea26c7978113683d7615187c2f4d33b4f3ad4a6]
> [  245.571823]  xe_vm_lock+0xe3/0x110 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.572012]  xe_lrc_finish+0x62/0x130 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.572185]  xe_engine_fini+0x2f/0x90 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.572378]  process_one_work+0x1c4/0x3d0
> [  245.572385]  worker_thread+0x51/0x390
> [  245.572391]  ? __pfx_worker_thread+0x10/0x10
> [  245.572396]  kthread+0xdb/0x110
> [  245.572400]  ? __pfx_kthread+0x10/0x10
> [  245.572404]  ret_from_fork+0x29/0x50
> [  245.572412]  </TASK>
> [  245.572552] INFO: task kmscub:traceq0:8581 blocked for more than 122 seconds.
> [  245.573498]       Tainted: P        W  OE
> 6.3.0-1-drm-xe-next-git-g48a7045a0879-dirty #6
> [  245.574336] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> disables this message.
> [  245.575151] task:kmscub:traceq0  state:D stack:0     pid:8581
> ppid:8170   flags:0x00004002
> [  245.575158] Call Trace:
> [  245.575159]  <TASK>
> [  245.575161]  __schedule+0x443/0x1400
> [  245.575167]  ? asm_sysvec_call_function_single+0x1a/0x20
> [  245.575174]  schedule+0x5e/0xd0
> [  245.575179]  schedule_preempt_disabled+0x15/0x30
> [  245.575183]  __ww_mutex_lock.constprop.0+0x539/0x940
> [  245.575191]  ttm_eu_reserve_buffers+0x1f4/0x2f0 [ttm
> bea26c7978113683d7615187c2f4d33b4f3ad4a6]
> [  245.575212]  xe_bo_lock+0xaf/0xe0 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.575373]  xe_gem_object_free+0xb4/0x100 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.575532]  xe_vma_destroy_late+0xc4/0xf0 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.575726]  xe_vm_close_and_put+0x191/0x490 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.575913]  ? xa_find+0x92/0x100
> [  245.575921]  xe_file_close+0x196/0x200 [xe
> 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> [  245.576081]  drm_file_free+0x219/0x270
> [  245.576088]  drm_release_noglobal+0x1f/0x70
> [  245.576093]  __fput+0x86/0x250
> [  245.576099]  task_work_run+0x5a/0x90
> [  245.576104]  do_exit+0x342/0xaf0
> [  245.576109]  ? __futex_unqueue+0x29/0x40
> [  245.576115]  ? futex_unqueue+0x3c/0x60
> [  245.576121]  do_group_exit+0x31/0x80
> [  245.576125]  get_signal+0x9a5/0x9e0
> [  245.576133]  arch_do_signal_or_restart+0x3e/0x270
> [  245.576141]  exit_to_user_mode_prepare+0x185/0x1e0
> [  245.576148]  syscall_exit_to_user_mode+0x1b/0x40
> [  245.576155]  do_syscall_64+0x6c/0x90
> [  245.576161]  ? exc_page_fault+0x7c/0x180
> [  245.576166]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
> [  245.576174] RIP: 0033:0x7f8956f44f0e
> [  245.576179] RSP: 002b:00007f892b7fdcb0 EFLAGS: 00000246 ORIG_RAX:
> 00000000000000ca
> [  245.576184] RAX: fffffffffffffe00 RBX: 0000000000000000 RCX: 00007f8956f44f0e
> [  245.576187] RDX: 0000000000000000 RSI: 0000000000000189 RDI: 000055c8c22459c8
> [  245.576190] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffffffff
> [  245.576192] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
> [  245.576194] R13: 000055c8c2245978 R14: 0000000000000000 R15: 000055c8c22459c8
> [  245.576201]  </TASK>