[PATCH 1/2] [WIP]drm/ttm: add waiter list to prevent allocation not in order

Fri Jan 26 12:43:42 UTC 2018

> After my investigation, this issue should be detect of TTM design 
> self, which breaks scheduling balance.
Yeah, but again. This is indented design we can't change easily.

Regards,
Christian.

Am 26.01.2018 um 13:36 schrieb Zhou, David(ChunMing):
> I am off work, so reply mail by phone, the format could not be text.
>
> back to topic itself:
> the problem indeed happen on amdgpu driver, someone reports me that 
> application runs with two instances, the performance are different.
> I also reproduced the issue with unit test(bo_eviction_test). They 
> always think our scheduler isn't working as expected.
>
> After my investigation, this issue should be detect of TTM design 
> self, which breaks scheduling balance.
>
> Further, if we run containers for our gpu, container A could run high 
> score, container B runs low score with same benchmark.
>
> So this is bug that we need fix.
>
> Regards,
> David Zhou
>
> 发自坚果 Pro
>
> Christian K鰊ig <ckoenig.leichtzumerken at gmail.com> 于 2018年1月26日 
> 下午6:31写道：
>
> Am 26.01.2018 um 11:22 schrieb Chunming Zhou:
> > there is a scheduling balance issue about get node like:
> > a. process A allocates full memory and use it for submission.
> > b. process B tries to allocates memory, will wait for process A BO 
> idle in eviction.
> > c. process A completes the job, process B eviction will put process 
> A BO node,
> > but in the meantime, process C is comming to allocate BO, whill 
> directly get node successfully, and do submission,
> > process B will again wait for process C BO idle.
> > d. repeat the above setps, process B could be delayed much more.
> >
> > later allocation must not be ahead of front in same place.
>
> Again NAK to the whole approach.
>
> At least with amdgpu the problem you described above never occurs
> because evictions are pipelined operations. We could only block for
> deleted regions to become free.
>
> But independent of that incoming memory requests while we make room for
> eviction are intended to be served first.
>
> Changing that is certainly a no-go cause that would favor memory hungry
> applications over small clients.
>
> Regards,
> Christian.
>
> >
> > Change-Id: I3daa892e50f82226c552cc008a29e55894a98f18
> > Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c    | 69 
> +++++++++++++++++++++++++++++++++++++++--
> >   include/drm/ttm/ttm_bo_api.h    |  7 +++++
> >   include/drm/ttm/ttm_bo_driver.h |  7 +++++
> >   3 files changed, 80 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index d33a6bb742a1..558ec2cf465d 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -841,6 +841,58 @@ static int ttm_bo_add_move_fence(struct 
> ttm_buffer_object *bo,
> >        return 0;
> >   }
> >
> > +static void ttm_man_init_waiter(struct ttm_bo_waiter *waiter,
> > +                             struct ttm_buffer_object *bo,
> > +                             const struct ttm_place *place)
> > +{
> > +     waiter->tbo = bo;
> > +     memcpy((void *)&waiter->place, (void *)place, sizeof(*place));
> > +     INIT_LIST_HEAD(&waiter->list);
> > +}
> > +
> > +static void ttm_man_add_waiter(struct ttm_mem_type_manager *man,
> > +                            struct ttm_bo_waiter *waiter)
> > +{
> > +     if (!waiter)
> > +             return;
> > +     spin_lock(&man->wait_lock);
> > +     list_add_tail(&waiter->list, &man->waiter_list);
> > +     spin_unlock(&man->wait_lock);
> > +}
> > +
> > +static void ttm_man_del_waiter(struct ttm_mem_type_manager *man,
> > +                            struct ttm_bo_waiter *waiter)
> > +{
> > +     if (!waiter)
> > +             return;
> > +     spin_lock(&man->wait_lock);
> > +     if (!list_empty(&waiter->list))
> > +             list_del(&waiter->list);
> > +     spin_unlock(&man->wait_lock);
> > +     kfree(waiter);
> > +}
> > +
> > +int ttm_man_check_bo(struct ttm_mem_type_manager *man,
> > +                  struct ttm_buffer_object *bo,
> > +                  const struct ttm_place *place)
> > +{
> > +     struct ttm_bo_waiter *waiter, *tmp;
> > +
> > +     spin_lock(&man->wait_lock);
> > +     list_for_each_entry_safe(waiter, tmp, &man->waiter_list, list) {
> > +             if ((bo != waiter->tbo) &&
> > +                 ((place->fpfn >= waiter->place.fpfn &&
> > +                   place->fpfn <= waiter->place.lpfn) ||
> > +                  (place->lpfn <= waiter->place.lpfn && place->lpfn >=
> > +                   waiter->place.fpfn)))
> > +                 goto later_bo;
> > +     }
> > +     spin_unlock(&man->wait_lock);
> > +     return true;
> > +later_bo:
> > +     spin_unlock(&man->wait_lock);
> > +     return false;
> > +}
> >   /**
> >    * Repeatedly evict memory from the LRU for @mem_type until we 
> create enough
> >    * space, or we've evicted everything and there isn't enough space.
> > @@ -853,17 +905,26 @@ static int ttm_bo_mem_force_space(struct 
> ttm_buffer_object *bo,
> >   {
> >        struct ttm_bo_device *bdev = bo->bdev;
> >        struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> > +     struct ttm_bo_waiter waiter;
> >        int ret;
> >
> > +     ttm_man_init_waiter(&waiter, bo, place);
> > +     ttm_man_add_waiter(man, &waiter);
> >        do {
> >                ret = (*man->func->get_node)(man, bo, place, mem);
> > -             if (unlikely(ret != 0))
> > +             if (unlikely(ret != 0)) {
> > +                     ttm_man_del_waiter(man, &waiter);
> >                        return ret;
> > -             if (mem->mm_node)
> > +             }
> > +             if (mem->mm_node) {
> > +                     ttm_man_del_waiter(man, &waiter);
> >                        break;
> > +             }
> >                ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
> > -             if (unlikely(ret != 0))
> > +             if (unlikely(ret != 0)) {
> > +                     ttm_man_del_waiter(man, &waiter);
> >                        return ret;
> > +             }
> >        } while (1);
> >        mem->mem_type = mem_type;
> >        return ttm_bo_add_move_fence(bo, man, mem);
> > @@ -1450,6 +1511,8 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, 
> unsigned type,
> >        man->use_io_reserve_lru = false;
> >        mutex_init(&man->io_reserve_mutex);
> >        spin_lock_init(&man->move_lock);
> > +     spin_lock_init(&man->wait_lock);
> > +     INIT_LIST_HEAD(&man->waiter_list);
> >        INIT_LIST_HEAD(&man->io_reserve_lru);
> >
> >        ret = bdev->driver->init_mem_type(bdev, type, man);
> > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> > index 2cd025c2abe7..0fce4dbd02e7 100644
> > --- a/include/drm/ttm/ttm_bo_api.h
> > +++ b/include/drm/ttm/ttm_bo_api.h
> > @@ -40,6 +40,7 @@
> >   #include <linux/mm.h>
> >   #include <linux/bitmap.h>
> >   #include <linux/reservation.h>
> > +#include <drm/ttm/ttm_placement.h>
> >
> >   struct ttm_bo_device;
> >
> > @@ -232,6 +233,12 @@ struct ttm_buffer_object {
> >        struct mutex wu_mutex;
> >   };
> >
> > +struct ttm_bo_waiter {
> > +     struct ttm_buffer_object *tbo;
> > +     struct ttm_place place;
> > +     struct list_head list;
> > +};
> > +
> >   /**
> >    * struct ttm_bo_kmap_obj
> >    *
> > diff --git a/include/drm/ttm/ttm_bo_driver.h 
> b/include/drm/ttm/ttm_bo_driver.h
> > index 9b417eb2df20..dc6b8b4c9e06 100644
> > --- a/include/drm/ttm/ttm_bo_driver.h
> > +++ b/include/drm/ttm/ttm_bo_driver.h
> > @@ -293,6 +293,10 @@ struct ttm_mem_type_manager {
> >        bool io_reserve_fastpath;
> >        spinlock_t move_lock;
> >
> > +     /* waiters in list */
> > +     spinlock_t wait_lock;
> > +     struct list_head waiter_list;
> > +
> >        /*
> >         * Protected by @io_reserve_mutex:
> >         */
> > @@ -748,6 +752,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
> >                     struct ttm_mem_reg *mem,
> >                     struct ttm_operation_ctx *ctx);
> >
> > +int ttm_man_check_bo(struct ttm_mem_type_manager *man,
> > +                  struct ttm_buffer_object *bo,
> > +                  const struct ttm_place *place);
> >   void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct 
> ttm_mem_reg *mem);
> >   void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo,
> >                           struct ttm_mem_reg *mem);
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20180126/9f97d378/attachment.html>