[PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

Mon Nov 6 13:33:57 UTC 2023

Am 06.11.23 um 08:56 schrieb Tatsuyuki Ishi:
>
>> On Oct 31, 2023, at 23:07, Christian König <christian.koenig at amd.com> 
>> wrote:
>>
>> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>>>
>>>
>>> On Tue, Oct 31, 2023 at 2:57 PM Christian König 
>>> <christian.koenig at amd.com> wrote:
>>>
>>>     Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>>>     > The current amdgpu_gem_va_update_vm only tries to perform
>>>     updates for the
>>>     > BO specified in the GEM ioctl; however, when a binding is
>>>     split, the
>>>     > adjacent bindings also need to be updated. Such updates
>>>     currently ends up
>>>     > getting deferred until next submission which causes stalls.
>>>
>>>     Yeah, that is a necessity. The hardware simply doesn't support
>>>     what you
>>>     try to do here in all cases.
>>>
>>>
>>> What can the hardware not do here? Is this just needing to wait for 
>>> TLB flushes before we can free pagetables, can we just delay that?
>>
>> On some hardware generations (especially Navi1x, but also everything 
>> older than Polaris) you can't invalidate the TLB while it is in use.
>>
>> For Polaris and older it just means that you don't have a guarantee 
>> that the shader can't access the memory any more. So delaying the 
>> free operation helps here.
>>
>> But for Navi1x it's a workaround for a hardware bug. If you try to 
>> invalidate the TLB while it is in use you can potentially triggering 
>> memory accesses to random addresses.
>>
>> That's why we still delay TLB invalidation's to the next CS and use a 
>> new VMID for each submission instead of invalidating the old one.
>
> Thanks for the information. I looked into the VMID allocation logic 
> and I see that if concurrent_flush is false, then we defer any flush 
> (or VMID reuse that requires a flush) until that VMID is idle.
>
> What patch #3 ends up doing is just performing the PT update right 
> away. Any required TLB update is deferred by amdgpu_vm_update_range 
> through the increment of tlb_seq, and amdgpu_vmid.c is responsible for 
> doing the actual TLB flush in a manner that does not trigger the bug.
>
> Can you confirm that this would be fine for the current hardware?

Yeah, that should work. I'm just think about the UAPI a bit, we should 
probably improve this to use something like a drm_syncobj instead of 
just a flag to be future prove.

Christian.

>
> Tatsuyuki.
>
>>
>> I'm currently working on changing that for Navi2x and newer (maybe 
>> Vega as well), but this is something you can really only do on some 
>> hw generations after validating that it works.
>>
>> Regards,
>> Christian.
>>
>>>
>>>
>>>     So this approach won't work in general.
>>>
>>>     Regards,
>>>     Christian.
>>>
>>>     >
>>>     > Introduce a new state "dirty", shared between per-VM BOs and
>>>     traditional
>>>     > BOs, containing all BOs that have pending updates in `invalids`.
>>>     > amdgpu_gem_va_update_vm will now simply flush any pending
>>>     updates for BOs
>>>     > in the dirty state.
>>>     >
>>>     > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki at gmail.com>
>>>     > ---
>>>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>>>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66
>>>     ++++++++++++++++++-------
>>>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 3 ++
>>>     >   3 files changed, 63 insertions(+), 24 deletions(-)
>>>     >
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     > index a1b15d0d6c48..01d3a97248b0 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct
>>>     drm_device *dev, void *data,
>>>     >    * vital here, so they are not reported back to userspace.
>>>     >    */
>>>     >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>>     > -                                 struct amdgpu_vm *vm,
>>>     > -                                 struct amdgpu_bo_va *bo_va,
>>>     > -                                 uint32_t operation)
>>>     > +                                 struct amdgpu_vm *vm)
>>>     >   {
>>>     > +     struct amdgpu_bo_va *bo_va;
>>>     >       int r;
>>>     >
>>>     >       if (!amdgpu_vm_ready(vm))
>>>     > @@ -617,12 +616,18 @@ static void
>>>     amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>>     >       if (r)
>>>     >               goto error;
>>>     >
>>>     > -     if (operation == AMDGPU_VA_OP_MAP ||
>>>     > -         operation == AMDGPU_VA_OP_REPLACE) {
>>>     > +     spin_lock(&vm->status_lock);
>>>     > +     while (!list_empty(&vm->dirty)) {
>>>     > +             bo_va = list_first_entry(&vm->dirty, struct
>>>     amdgpu_bo_va,
>>>     > + base.vm_status);
>>>     > +  spin_unlock(&vm->status_lock);
>>>     > +
>>>     >               r = amdgpu_vm_bo_update(adev, bo_va, false);
>>>     >               if (r)
>>>     >                       goto error;
>>>     > +  spin_lock(&vm->status_lock);
>>>     >       }
>>>     > +     spin_unlock(&vm->status_lock);
>>>     >
>>>     >       r = amdgpu_vm_update_pdes(adev, vm, false);
>>>     >
>>>     > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>     *dev, void *data,
>>>     >               break;
>>>     >       }
>>>     >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
>>>     !amdgpu_vm_debug)
>>>     > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>>>     > -  args->operation);
>>>     > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>>>     >
>>>     >   error:
>>>     >       drm_exec_fini(&exec);
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     > index dd6f72e2a1d6..01d31891cd05 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     > @@ -191,6 +191,21 @@ static void
>>>     amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evict
>>>     >  spin_unlock(&vm_bo->vm->status_lock);
>>>     >   }
>>>     >
>>>     > +/**
>>>     > + * amdgpu_vm_bo_dirty - vm_bo is dirty
>>>     > + *
>>>     > + * @vm_bo: vm_bo which is dirty
>>>     > + *
>>>     > + * State for normal and per VM BOs that are not moved, but
>>>     have new entries in
>>>     > + * bo_va->invalids.
>>>     > + */
>>>     > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
>>>     > +{
>>>     > +  spin_lock(&vm_bo->vm->status_lock);
>>>     > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
>>>     > +  spin_unlock(&vm_bo->vm->status_lock);
>>>     > +}
>>>     > +
>>>     >   /**
>>>     >    * amdgpu_vm_bo_moved - vm_bo is moved
>>>     >    *
>>>     > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct
>>>     amdgpu_vm *vm,
>>>     >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
>>>     base.eviction_status)
>>>     >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>>     >
>>>     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>>>     base.vm_status)
>>>     > +             amdgpu_vm_bo_get_memory(bo_va, stats);
>>>     > +
>>>     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>>>     base.vm_status)
>>>     >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>>     >
>>>     > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct
>>>     amdgpu_device *adev,
>>>     >                       dma_resv_unlock(resv);
>>>     >  spin_lock(&vm->status_lock);
>>>     >       }
>>>     > +
>>>     > +     while (!list_empty(&vm->dirty)) {
>>>     > +             bo_va = list_first_entry(&vm->dirty, struct
>>>     amdgpu_bo_va,
>>>     > + base.vm_status);
>>>     > +  spin_unlock(&vm->status_lock);
>>>     > +
>>>     > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
>>>     > +             if (r)
>>>     > +                     return r;
>>>     > +  spin_lock(&vm->status_lock);
>>>     > +     }
>>>     >       spin_unlock(&vm->status_lock);
>>>     >
>>>     >       return 0;
>>>     > @@ -1476,19 +1505,16 @@ static void
>>>     amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>>>     >                                   struct amdgpu_bo_va_mapping
>>>     *mapping)
>>>     >   {
>>>     >       struct amdgpu_vm *vm = bo_va->base.vm;
>>>     > -     struct amdgpu_bo *bo = bo_va->base.bo <http://base.bo/>;
>>>     >
>>>     >       mapping->bo_va = bo_va;
>>>     >       list_add(&mapping->list, &bo_va->invalids);
>>>     >       amdgpu_vm_it_insert(mapping, &vm->va);
>>>     > +     if (!bo_va->base.moved)
>>>     > +  amdgpu_vm_bo_dirty(&bo_va->base);
>>>     >
>>>     >       if (mapping->flags & AMDGPU_PTE_PRT)
>>>     >               amdgpu_vm_prt_get(adev);
>>>     >
>>>     > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>>>     > -         !bo_va->base.moved) {
>>>     > -  amdgpu_vm_bo_moved(&bo_va->base);
>>>     > -     }
>>>     >       trace_amdgpu_vm_bo_map(bo_va, mapping);
>>>     >   }
>>>     >
>>>     > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>     amdgpu_device *adev,
>>>     >                       before->flags = tmp->flags;
>>>     >                       before->bo_va = tmp->bo_va;
>>>     >  list_add(&before->list, &tmp->bo_va->invalids);
>>>     > +                     if (!tmp->bo_va->base.moved)
>>>     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>>     >               }
>>>     >
>>>     >               /* Remember mapping split at the end */
>>>     > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>     amdgpu_device *adev,
>>>     >                       after->flags = tmp->flags;
>>>     >                       after->bo_va = tmp->bo_va;
>>>     >  list_add(&after->list, &tmp->bo_va->invalids);
>>>     > +                     if (!tmp->bo_va->base.moved)
>>>     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>>     >               }
>>>     >
>>>     >               list_del(&tmp->list);
>>>     > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>     amdgpu_device *adev,
>>>     >
>>>     >       /* Insert partial mapping before the range */
>>>     >       if (!list_empty(&before->list)) {
>>>     > -             struct amdgpu_bo *bo = before->bo_va->base.bo
>>>     <http://base.bo/>;
>>>     > -
>>>     >               amdgpu_vm_it_insert(before, &vm->va);
>>>     >               if (before->flags & AMDGPU_PTE_PRT)
>>>     >  amdgpu_vm_prt_get(adev);
>>>     > -
>>>     > -             if (bo && bo->tbo.base.resv ==
>>>     vm->root.bo->tbo.base.resv &&
>>>     > -  !before->bo_va->base.moved)
>>>     > -  amdgpu_vm_bo_moved(&before->bo_va->base);
>>>     >       } else {
>>>     >               kfree(before);
>>>     >       }
>>>     >
>>>     >       /* Insert partial mapping after the range */
>>>     >       if (!list_empty(&after->list)) {
>>>     > -             struct amdgpu_bo *bo = after->bo_va->base.bo
>>>     <http://base.bo/>;
>>>     > -
>>>     >               amdgpu_vm_it_insert(after, &vm->va);
>>>     >               if (after->flags & AMDGPU_PTE_PRT)
>>>     >  amdgpu_vm_prt_get(adev);
>>>     > -
>>>     > -             if (bo && bo->tbo.base.resv ==
>>>     vm->root.bo->tbo.base.resv &&
>>>     > -  !after->bo_va->base.moved)
>>>     > -  amdgpu_vm_bo_moved(&after->bo_va->base);
>>>     >       } else {
>>>     >               kfree(after);
>>>     >       }
>>>     > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device
>>>     *adev, struct amdgpu_vm *vm, int32_t xcp
>>>     >       INIT_LIST_HEAD(&vm->evicted);
>>>     >       INIT_LIST_HEAD(&vm->relocated);
>>>     >       INIT_LIST_HEAD(&vm->moved);
>>>     > +     INIT_LIST_HEAD(&vm->dirty);
>>>     >       INIT_LIST_HEAD(&vm->idle);
>>>     >  INIT_LIST_HEAD(&vm->invalidated);
>>>     >  spin_lock_init(&vm->status_lock);
>>>     > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>     amdgpu_vm *vm, struct seq_file *m)
>>>     >   {
>>>     >       struct amdgpu_bo_va *bo_va, *tmp;
>>>     >       u64 total_idle = 0;
>>>     > +     u64 total_dirty = 0;
>>>     >       u64 total_relocated = 0;
>>>     >       u64 total_moved = 0;
>>>     >       u64 total_invalidated = 0;
>>>     >       u64 total_done = 0;
>>>     >       unsigned int total_idle_objs = 0;
>>>     > +     unsigned int total_dirty_objs = 0;
>>>     >       unsigned int total_relocated_objs = 0;
>>>     >       unsigned int total_moved_objs = 0;
>>>     >       unsigned int total_invalidated_objs = 0;
>>>     > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>     amdgpu_vm *vm, struct seq_file *m)
>>>     >       total_idle_objs = id;
>>>     >       id = 0;
>>>     >
>>>     > +     seq_puts(m, "\tDirty BOs:\n");
>>>     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>>>     base.vm_status) {
>>>     > +             if (!bo_va->base.bo <http://base.bo/>)
>>>     > +                     continue;
>>>     > +             total_dirty += amdgpu_bo_print_info(id++,
>>>     bo_va->base.bo <http://base.bo/>, m);
>>>     > +     }
>>>     > +     total_dirty_objs = id;
>>>     > +     id = 0;
>>>     > +
>>>     >       seq_puts(m, "\tRelocated BOs:\n");
>>>     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>>>     base.vm_status) {
>>>     >               if (!bo_va->base.bo <http://base.bo/>)
>>>     > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>     amdgpu_vm *vm, struct seq_file *m)
>>>     >
>>>     >       seq_printf(m, "\tTotal idle size:  
>>>     %12lld\tobjs:\t%d\n", total_idle,
>>>     >                  total_idle_objs);
>>>     > +     seq_printf(m, "\tTotal dirty size:
>>>      %12lld\tobjs:\t%d\n", total_dirty,
>>>     > +                total_dirty_objs);
>>>     >       seq_printf(m, "\tTotal relocated size:
>>>      %12lld\tobjs:\t%d\n", total_relocated,
>>>     >                  total_relocated_objs);
>>>     >       seq_printf(m, "\tTotal moved size:
>>>      %12lld\tobjs:\t%d\n", total_moved,
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     > index d9ab97eabda9..f91d4fcf80b8 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     > @@ -276,6 +276,9 @@ struct amdgpu_vm {
>>>     >       /* per VM BOs moved, but not yet updated in the PT */
>>>     >       struct list_head        moved;
>>>     >
>>>     > +     /* normal and per VM BOs that are not moved, but have
>>>     new PT entries */
>>>     > +     struct list_head        dirty;
>>>     > +
>>>     >       /* All BOs of this VM not currently in the state machine */
>>>     >       struct list_head        idle;
>>>     >
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20231106/1b41e7f3/attachment-0001.htm>