[Intel-xe] [PATCH v2 8/8] drm/xe: VM LRU bulk move

Thu May 25 12:55:23 UTC 2023

On Wed, 2023-05-24 at 14:02 +0000, Matthew Brost wrote:
> On Tue, May 23, 2023 at 10:01:53PM -0700, Christopher Snowhill wrote:
> > On Tue, May 23, 2023 at 6:38 AM Matthew Brost
> > <matthew.brost at intel.com> wrote:
> > > 
> > > On Mon, May 22, 2023 at 11:52:43PM -0700, Christopher Snowhill
> > > wrote:
> > > > On Mon, May 22, 2023 at 11:38 PM Christopher Snowhill
> > > > <kode54 at gmail.com> wrote:
> > > > > 
> > > > > On Mon, May 22, 2023 at 10:44 PM Matthew Brost
> > > > > <matthew.brost at intel.com> wrote:
> > > > > > 
> > > > > > Use the TTM LRU bulk move for BOs tied to a VM. Update the
> > > > > > bulk moves
> > > > > > LRU position on every exec.
> > > > > > 
> > > > > > v2: Bulk move for compute VMs, remove bulk LRU in
> > > > > > xe_gem_object_free
> > > > > > 
> > > > > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/xe/xe_bo.c       | 24
> > > > > > ++++++++++++++++++++----
> > > > > >  drivers/gpu/drm/xe/xe_bo.h       |  4 ++--
> > > > > >  drivers/gpu/drm/xe/xe_dma_buf.c  |  2 +-
> > > > > >  drivers/gpu/drm/xe/xe_exec.c     |  6 ++++++
> > > > > >  drivers/gpu/drm/xe/xe_vm.c       |  4 ++++
> > > > > >  drivers/gpu/drm/xe/xe_vm_types.h |  3 +++
> > > > > >  6 files changed, 36 insertions(+), 7 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_bo.c
> > > > > > b/drivers/gpu/drm/xe/xe_bo.c
> > > > > > index c82e995df779..22cf65bd63fe 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_bo.c
> > > > > > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > > > > > @@ -968,6 +968,16 @@ static void xe_ttm_bo_destroy(struct
> > > > > > ttm_buffer_object *ttm_bo)
> > > > > > 
> > > > > >  static void xe_gem_object_free(struct drm_gem_object *obj)
> > > > > >  {
> > > > > > +       struct xe_bo *bo = gem_to_xe_bo(obj);
> > > > > > +
> > > > > > +       if (bo->vm && !xe_vm_in_fault_mode(bo->vm) &&
> > > > > > xe_bo_is_user(bo)) {
> > > > > > +               struct ww_acquire_ctx ww;
> > > > > > +
> > > > > > +               xe_bo_lock(bo, &ww, 0, false);
> > > > > > +               ttm_bo_set_bulk_move(&bo->ttm, NULL);
> > > > > > +               xe_bo_unlock(bo, &ww);
> > > > > > +       }
> > > > > > +
> > > > > >         /* Our BO reference counting scheme works as
> > > > > > follows:
> > > > > >          *
> > > > > >          * The gem object kref is typically used throughout
> > > > > > the driver,
> > > > > > @@ -1081,8 +1091,8 @@ void xe_bo_free(struct xe_bo *bo)
> > > > > > 
> > > > > >  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe,
> > > > > > struct xe_bo *bo,
> > > > > >                                     struct xe_gt *gt,
> > > > > > struct dma_resv *resv,
> > > > > > -                                   size_t size, enum
> > > > > > ttm_bo_type type,
> > > > > > -                                   u32 flags)
> > > > > > +                                   struct
> > > > > > ttm_lru_bulk_move *bulk, size_t size,
> > > > > > +                                   enum ttm_bo_type type,
> > > > > > u32 flags)
> > > > > >  {
> > > > > >         struct ttm_operation_ctx ctx = {
> > > > > >                 .interruptible = true,
> > > > > > @@ -1149,7 +1159,10 @@ struct xe_bo
> > > > > > *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo
> > > > > > *bo,
> > > > > >                 return ERR_PTR(err);
> > > > > > 
> > > > > >         bo->created = true;
> > > > > > -       ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> > > > > > +       if (bulk)
> > > > > > +               ttm_bo_set_bulk_move(&bo->ttm, bulk);
> > > > > > +       else
> > > > > > +               ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
> > > > > > 
> > > > > >         return bo;
> > > > > >  }
> > > > > > @@ -1219,7 +1232,10 @@ xe_bo_create_locked_range(struct
> > > > > > xe_device *xe,
> > > > > >                 }
> > > > > >         }
> > > > > > 
> > > > > > -       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm-
> > > > > > >resv : NULL, size,
> > > > > > +       bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm-
> > > > > > >resv : NULL,
> > > > > > +                                  vm &&
> > > > > > !xe_vm_in_fault_mode(vm) &&
> > > > > > +                                  flags &
> > > > > > XE_BO_CREATE_USER_BIT ?
> > > > > > +                                  &vm->lru_bulk_move :
> > > > > > NULL, size,
> > > > > >                                    type, flags);
> > > > > >         if (IS_ERR(bo))
> > > > > >                 return bo;
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_bo.h
> > > > > > b/drivers/gpu/drm/xe/xe_bo.h
> > > > > > index 7e111332c35a..f7562012b836 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_bo.h
> > > > > > +++ b/drivers/gpu/drm/xe/xe_bo.h
> > > > > > @@ -81,8 +81,8 @@ void xe_bo_free(struct xe_bo *bo);
> > > > > > 
> > > > > >  struct xe_bo *__xe_bo_create_locked(struct xe_device *xe,
> > > > > > struct xe_bo *bo,
> > > > > >                                     struct xe_gt *gt,
> > > > > > struct dma_resv *resv,
> > > > > > -                                   size_t size, enum
> > > > > > ttm_bo_type type,
> > > > > > -                                   u32 flags);
> > > > > > +                                   struct
> > > > > > ttm_lru_bulk_move *bulk, size_t size,
> > > > > > +                                   enum ttm_bo_type type,
> > > > > > u32 flags);
> > > > > >  struct xe_bo *
> > > > > >  xe_bo_create_locked_range(struct xe_device *xe,
> > > > > >                           struct xe_gt *gt, struct xe_vm
> > > > > > *vm,
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c
> > > > > > b/drivers/gpu/drm/xe/xe_dma_buf.c
> > > > > > index 9b252cc782b7..975dee1f770f 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_dma_buf.c
> > > > > > +++ b/drivers/gpu/drm/xe/xe_dma_buf.c
> > > > > > @@ -199,7 +199,7 @@ xe_dma_buf_init_obj(struct drm_device
> > > > > > *dev, struct xe_bo *storage,
> > > > > >         int ret;
> > > > > > 
> > > > > >         dma_resv_lock(resv, NULL);
> > > > > > -       bo = __xe_bo_create_locked(xe, storage, NULL, resv,
> > > > > > dma_buf->size,
> > > > > > +       bo = __xe_bo_create_locked(xe, storage, NULL, resv,
> > > > > > NULL, dma_buf->size,
> > > > > >                                    ttm_bo_type_sg,
> > > > > > XE_BO_CREATE_SYSTEM_BIT);
> > > > > >         if (IS_ERR(bo)) {
> > > > > >                 ret = PTR_ERR(bo);
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > > > > > b/drivers/gpu/drm/xe/xe_exec.c
> > > > > > index ff4df00f20a2..b2dcf34af16b 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_exec.c
> > > > > > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > > > > > @@ -395,6 +395,12 @@ int xe_exec_ioctl(struct drm_device
> > > > > > *dev, void *data, struct drm_file *file)
> > > > > >         xe_sched_job_push(job);
> > > > > >         xe_vm_reactivate_rebind(vm);
> > > > > > 
> > > > > > +       if (!err && !xe_vm_no_dma_fences(vm)) {
> > > > > > +               spin_lock(&xe->ttm.lru_lock);
> > > > > > +               ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> > > > > > +               spin_unlock(&xe->ttm.lru_lock);
> > > > > > +       }
> > > > > > +
> > > > > >  err_repin:
> > > > > >         if (!xe_vm_no_dma_fences(vm))
> > > > > >                 up_read(&vm->userptr.notifier_lock);
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_vm.c
> > > > > > b/drivers/gpu/drm/xe/xe_vm.c
> > > > > > index 0398da1ef1e2..a5d65d0325d6 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > > > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > > > > @@ -629,6 +629,10 @@ static void
> > > > > > preempt_rebind_work_func(struct work_struct *w)
> > > > > > 
> > > > > >  #undef retry_required
> > > > > > 
> > > > > > +       spin_lock(&vm->xe->ttm.lru_lock);
> > > > > > +       ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> > > > > > +       spin_unlock(&vm->xe->ttm.lru_lock);
> > > > > > +
> > > > > >         /* Point of no return. */
> > > > > >         arm_preempt_fences(vm, &preempt_fences);
> > > > > >         resume_and_reinstall_preempt_fences(vm);
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > index fada7896867f..d3e99f22510d 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > @@ -164,6 +164,9 @@ struct xe_vm {
> > > > > >         /** Protects @rebind_list and the page-table
> > > > > > structures */
> > > > > >         struct dma_resv resv;
> > > > > > 
> > > > > > +       /** @lru_bulk_move: Bulk LRU move list for this
> > > > > > VM's BOs */
> > > > > > +       struct ttm_lru_bulk_move lru_bulk_move;
> > > > > > +
> > > > > >         u64 size;
> > > > > >         struct rb_root vmas;
> > > > > > 
> > > > > > --
> > > > > > 2.34.1
> > > > > 
> > > > > Something in this patch version, either moving the cleanup to
> > > > > gem_free, or the move_tail later in the patch set, causes
> > > > > kmscube to
> > > > > deadlock on terminate, forcing me to reboot the machine.
> > > > > Reboot got
> > > > > stuck when it tried to terminate my user session, so I had to
> > > > > press
> > > > > the reset button.
> > > > 
> > > 
> > > Any chance you have a dmesg for the deadlock on terminate? Hoping
> > > it
> > > gives some clues to the issue.
> > > 
> > > > Verified, substituting patch 8 of 8 from v1 results in cleanly
> > > > terminating kmscube.
> > > 
> > > It was Thomas's suggestion to move this from close to free, this
> > > is a
> > > fairly old patch and couldn't recall why it was in close rather
> > > than
> > > free. Now I think about this, I believe originally this was in
> > > free but
> > > moved to close to fix a bug, will try to remind why it needs to
> > > be in
> > > close.
> > > 
> > > Matt
> > 
> > It actually goes zombie/defunct on termination. I couldn't
> > backtrace
> > it, so I had to wait for the kernel to spew forth a backtrace of
> > its
> > own from the hung kworker processes that caused them to be stuck:
> > 
> 
> This is very helpful, thanks Christopher. I see the problem, we call
> xe_bo_put while holding the VM dma-resv which in turn calls
> xe_gem_object_free. xe_gem_object_free tries to acquire VM dma-resv
> and
> deadlock.
> 
> I think I'll move this to the close function.

Ugh, yes. This is why it really should be in the release_notify
callback where we've individualized the resvs and can easily lock
without sleeping....

In any case, we need to modify TTM to do that so for now Ack to move to
close().

/Thomas

> 
> Matt
> 
> > [  245.565723] INFO: task kworker/14:1:130 blocked for more than
> > 122 seconds.
> > [  245.566642]       Tainted: P        W  OE
> > 6.3.0-1-drm-xe-next-git-g48a7045a0879-dirty #6
> > [  245.567413] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> > disables this message.
> > [  245.568207] task:kworker/14:1    state:D stack:0     pid:130
> > ppid:2      flags:0x00004000
> > [  245.568216] Workqueue: events __guc_engine_fini_async [xe]
> > [  245.568392] Call Trace:
> > [  245.568394]  <TASK>
> > [  245.568398]  __schedule+0x443/0x1400
> > [  245.568408]  ? check_preempt_curr+0x5e/0x70
> > [  245.568415]  ? try_to_wake_up+0x263/0x610
> > [  245.568421]  schedule+0x5e/0xd0
> > [  245.568426]  schedule_preempt_disabled+0x15/0x30
> > [  245.568431]  __ww_mutex_lock.constprop.0+0x539/0x940
> > [  245.568439]  ttm_eu_reserve_buffers+0x1f4/0x2f0 [ttm
> > bea26c7978113683d7615187c2f4d33b4f3ad4a6]
> > [  245.568462]  xe_vm_lock+0xe3/0x110 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.568653]  xe_lrc_finish+0x62/0x130 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.568827]  xe_engine_fini+0x2f/0x90 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.568989]  process_one_work+0x1c4/0x3d0
> > [  245.568996]  worker_thread+0x51/0x390
> > [  245.569012]  ? __pfx_worker_thread+0x10/0x10
> > [  245.569018]  kthread+0xdb/0x110
> > [  245.569022]  ? __pfx_kthread+0x10/0x10
> > [  245.569026]  ret_from_fork+0x29/0x50
> > [  245.569036]  </TASK>
> > [  245.569051] INFO: task kworker/14:3:488 blocked for more than
> > 122 seconds.
> > [  245.569963]       Tainted: P        W  OE
> > 6.3.0-1-drm-xe-next-git-g48a7045a0879-dirty #6
> > [  245.570782] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> > disables this message.
> > [  245.571592] task:kworker/14:3    state:D stack:0     pid:488
> > ppid:2      flags:0x00004000
> > [  245.571599] Workqueue: events __guc_engine_fini_async [xe]
> > [  245.571768] Call Trace:
> > [  245.571769]  <TASK>
> > [  245.571772]  __schedule+0x443/0x1400
> > [  245.571777]  ? load_balance+0x18d/0xe20
> > [  245.571785]  schedule+0x5e/0xd0
> > [  245.571790]  schedule_preempt_disabled+0x15/0x30
> > [  245.571794]  __ww_mutex_lock.constprop.0+0x539/0x940
> > [  245.571802]  ttm_eu_reserve_buffers+0x1f4/0x2f0 [ttm
> > bea26c7978113683d7615187c2f4d33b4f3ad4a6]
> > [  245.571823]  xe_vm_lock+0xe3/0x110 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.572012]  xe_lrc_finish+0x62/0x130 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.572185]  xe_engine_fini+0x2f/0x90 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.572378]  process_one_work+0x1c4/0x3d0
> > [  245.572385]  worker_thread+0x51/0x390
> > [  245.572391]  ? __pfx_worker_thread+0x10/0x10
> > [  245.572396]  kthread+0xdb/0x110
> > [  245.572400]  ? __pfx_kthread+0x10/0x10
> > [  245.572404]  ret_from_fork+0x29/0x50
> > [  245.572412]  </TASK>
> > [  245.572552] INFO: task kmscub:traceq0:8581 blocked for more than
> > 122 seconds.
> > [  245.573498]       Tainted: P        W  OE
> > 6.3.0-1-drm-xe-next-git-g48a7045a0879-dirty #6
> > [  245.574336] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> > disables this message.
> > [  245.575151] task:kmscub:traceq0  state:D stack:0     pid:8581
> > ppid:8170   flags:0x00004002
> > [  245.575158] Call Trace:
> > [  245.575159]  <TASK>
> > [  245.575161]  __schedule+0x443/0x1400
> > [  245.575167]  ? asm_sysvec_call_function_single+0x1a/0x20
> > [  245.575174]  schedule+0x5e/0xd0
> > [  245.575179]  schedule_preempt_disabled+0x15/0x30
> > [  245.575183]  __ww_mutex_lock.constprop.0+0x539/0x940
> > [  245.575191]  ttm_eu_reserve_buffers+0x1f4/0x2f0 [ttm
> > bea26c7978113683d7615187c2f4d33b4f3ad4a6]
> > [  245.575212]  xe_bo_lock+0xaf/0xe0 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.575373]  xe_gem_object_free+0xb4/0x100 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.575532]  xe_vma_destroy_late+0xc4/0xf0 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.575726]  xe_vm_close_and_put+0x191/0x490 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.575913]  ? xa_find+0x92/0x100
> > [  245.575921]  xe_file_close+0x196/0x200 [xe
> > 35c5fcb454ded180c3570d3957d73eb524fe3f0d]
> > [  245.576081]  drm_file_free+0x219/0x270
> > [  245.576088]  drm_release_noglobal+0x1f/0x70
> > [  245.576093]  __fput+0x86/0x250
> > [  245.576099]  task_work_run+0x5a/0x90
> > [  245.576104]  do_exit+0x342/0xaf0
> > [  245.576109]  ? __futex_unqueue+0x29/0x40
> > [  245.576115]  ? futex_unqueue+0x3c/0x60
> > [  245.576121]  do_group_exit+0x31/0x80
> > [  245.576125]  get_signal+0x9a5/0x9e0
> > [  245.576133]  arch_do_signal_or_restart+0x3e/0x270
> > [  245.576141]  exit_to_user_mode_prepare+0x185/0x1e0
> > [  245.576148]  syscall_exit_to_user_mode+0x1b/0x40
> > [  245.576155]  do_syscall_64+0x6c/0x90
> > [  245.576161]  ? exc_page_fault+0x7c/0x180
> > [  245.576166]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
> > [  245.576174] RIP: 0033:0x7f8956f44f0e
> > [  245.576179] RSP: 002b:00007f892b7fdcb0 EFLAGS: 00000246
> > ORIG_RAX:
> > 00000000000000ca
> > [  245.576184] RAX: fffffffffffffe00 RBX: 0000000000000000 RCX:
> > 00007f8956f44f0e
> > [  245.576187] RDX: 0000000000000000 RSI: 0000000000000189 RDI:
> > 000055c8c22459c8
> > [  245.576190] RBP: 0000000000000000 R08: 0000000000000000 R09:
> > 00000000ffffffff
> > [  245.576192] R10: 0000000000000000 R11: 0000000000000246 R12:
> > 0000000000000000
> > [  245.576194] R13: 000055c8c2245978 R14: 0000000000000000 R15:
> > 000055c8c22459c8
> > [  245.576201]  </TASK>