[PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

Kenny Ho y2kenny at gmail.com
Thu Aug 29 14:39:42 UTC 2019


Yes, and I think it has quite a lot of coupling with mm's page and
pressure mechanisms.  My current thought is to just copy the API but
have a separate implementation of "ttm_shrinker" and
"ttm_shrinker_control" or something like that.  I am certainly happy
to listen to additional feedbacks and suggestions.

Regards,
Kenny


On Thu, Aug 29, 2019 at 10:12 AM Koenig, Christian
<Christian.Koenig at amd.com> wrote:
>
> Yeah, that's also a really good idea as well.
>
> The problem with the shrinker API is that it only applies to system memory currently.
>
> So you won't have a distinction which domain you need to evict stuff from.
>
> Regards,
> Christian.
>
> Am 29.08.19 um 16:07 schrieb Kenny Ho:
>
> Thanks for the feedback Christian.  I am still digging into this one.  Daniel suggested leveraging the Shrinker API for the functionality of this commit in RFC v3 but I am still trying to figure it out how/if ttm fit with shrinker (though the idea behind the shrinker API seems fairly straightforward as far as I understand it currently.)
>
> Regards,
> Kenny
>
> On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian <Christian.Koenig at amd.com> wrote:
>>
>> Am 29.08.19 um 08:05 schrieb Kenny Ho:
>> > Allow DRM TTM memory manager to register a work_struct, such that, when
>> > a drmcgrp is under memory pressure, memory reclaiming can be triggered
>> > immediately.
>> >
>> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
>> > Signed-off-by: Kenny Ho <Kenny.Ho at amd.com>
>> > ---
>> >   drivers/gpu/drm/ttm/ttm_bo.c    | 49 +++++++++++++++++++++++++++++++++
>> >   include/drm/drm_cgroup.h        | 16 +++++++++++
>> >   include/drm/ttm/ttm_bo_driver.h |  2 ++
>> >   kernel/cgroup/drm.c             | 30 ++++++++++++++++++++
>> >   4 files changed, 97 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> > index d7e3d3128ebb..72efae694b7e 100644
>> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type)
>> >   }
>> >   EXPORT_SYMBOL(ttm_bo_evict_mm);
>> >
>> > +static void ttm_bo_reclaim_wq(struct work_struct *work)
>> > +{
>> > +     struct ttm_operation_ctx ctx = {
>> > +             .interruptible = false,
>> > +             .no_wait_gpu = false,
>> > +             .flags = TTM_OPT_FLAG_FORCE_ALLOC
>> > +     };
>> > +     struct ttm_mem_type_manager *man =
>> > +         container_of(work, struct ttm_mem_type_manager, reclaim_wq);
>> > +     struct ttm_bo_device *bdev = man->bdev;
>> > +     struct dma_fence *fence;
>> > +     int mem_type;
>> > +     int ret;
>> > +
>> > +     for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
>> > +             if (&bdev->man[mem_type] == man)
>> > +                     break;
>> > +
>> > +     WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
>> > +     if (mem_type >= TTM_NUM_MEM_TYPES)
>> > +             return;
>> > +
>> > +     if (!drmcg_mem_pressure_scan(bdev, mem_type))
>> > +             return;
>> > +
>> > +     ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL);
>> > +     if (ret)
>> > +             return;
>> > +
>> > +     spin_lock(&man->move_lock);
>> > +     fence = dma_fence_get(man->move);
>> > +     spin_unlock(&man->move_lock);
>> > +
>> > +     if (fence) {
>> > +             ret = dma_fence_wait(fence, false);
>> > +             dma_fence_put(fence);
>> > +     }
>>
>> Why do you want to block for the fence here? That is a rather bad idea
>> and would break pipe-lining.
>>
>> Apart from that I don't think we should put that into TTM.
>>
>> Instead drmcg_register_device_mm() should get a function pointer which
>> is called from a work item when the group is under pressure.
>>
>> TTM can then provides the function which can be called, but the actually
>> registration is job of the device and not TTM.
>>
>> Regards,
>> Christian.
>>
>> > +
>> > +}
>> > +
>> >   int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>> >                       unsigned long p_size)
>> >   {
>> > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>> >               INIT_LIST_HEAD(&man->lru[i]);
>> >       man->move = NULL;
>> >
>> > +     pr_err("drmcg %p type %d\n", bdev->ddev, type);
>> > +
>> > +     if (type <= TTM_PL_VRAM) {
>> > +             INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
>> > +             drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
>> > +     }
>> > +
>> >       return 0;
>> >   }
>> >   EXPORT_SYMBOL(ttm_bo_init_mm);
>> > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
>> >               man = &bdev->man[i];
>> >               if (man->has_type) {
>> >                       man->use_type = false;
>> > +                     drmcg_unregister_device_mm(bdev->ddev, i);
>> > +                     cancel_work_sync(&man->reclaim_wq);
>> >                       if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
>> >                               ret = -EBUSY;
>> >                               pr_err("DRM memory manager type %d is not clean\n",
>> > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
>> > index c11df388fdf2..6d9707e1eb72 100644
>> > --- a/include/drm/drm_cgroup.h
>> > +++ b/include/drm/drm_cgroup.h
>> > @@ -5,6 +5,7 @@
>> >   #define __DRM_CGROUP_H__
>> >
>> >   #include <linux/cgroup_drm.h>
>> > +#include <linux/workqueue.h>
>> >   #include <drm/ttm/ttm_bo_api.h>
>> >   #include <drm/ttm/ttm_bo_driver.h>
>> >
>> > @@ -25,12 +26,17 @@ struct drmcg_props {
>> >       s64                     mem_bw_avg_bytes_per_us_default;
>> >
>> >       s64                     mem_highs_default[TTM_PL_PRIV+1];
>> > +
>> > +     struct work_struct      *mem_reclaim_wq[TTM_PL_PRIV];
>> >   };
>> >
>> >   #ifdef CONFIG_CGROUP_DRM
>> >
>> >   void drmcg_device_update(struct drm_device *device);
>> >   void drmcg_device_early_init(struct drm_device *device);
>> > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type,
>> > +             struct work_struct *wq);
>> > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type);
>> >   bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
>> >               size_t size);
>> >   void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
>> > @@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct drm_device *device)
>> >   {
>> >   }
>> >
>> > +static inline void drmcg_register_device_mm(struct drm_device *dev,
>> > +             unsigned int type, struct work_struct *wq)
>> > +{
>> > +}
>> > +
>> > +static inline void drmcg_unregister_device_mm(struct drm_device *dev,
>> > +             unsigned int type)
>> > +{
>> > +}
>> > +
>> >   static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg,
>> >               struct drm_device *dev, size_t size)
>> >   {
>> > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>> > index e1a805d65b83..529cef92bcf6 100644
>> > --- a/include/drm/ttm/ttm_bo_driver.h
>> > +++ b/include/drm/ttm/ttm_bo_driver.h
>> > @@ -205,6 +205,8 @@ struct ttm_mem_type_manager {
>> >        * Protected by @move_lock.
>> >        */
>> >       struct dma_fence *move;
>> > +
>> > +     struct work_struct reclaim_wq;
>> >   };
>> >
>> >   /**
>> > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
>> > index 04fb9a398740..0ea7f0619e25 100644
>> > --- a/kernel/cgroup/drm.c
>> > +++ b/kernel/cgroup/drm.c
>> > @@ -804,6 +804,29 @@ void drmcg_device_early_init(struct drm_device *dev)
>> >   }
>> >   EXPORT_SYMBOL(drmcg_device_early_init);
>> >
>> > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type,
>> > +             struct work_struct *wq)
>> > +{
>> > +     if (dev == NULL || type >= TTM_PL_PRIV)
>> > +             return;
>> > +
>> > +     mutex_lock(&drmcg_mutex);
>> > +     dev->drmcg_props.mem_reclaim_wq[type] = wq;
>> > +     mutex_unlock(&drmcg_mutex);
>> > +}
>> > +EXPORT_SYMBOL(drmcg_register_device_mm);
>> > +
>> > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type)
>> > +{
>> > +     if (dev == NULL || type >= TTM_PL_PRIV)
>> > +             return;
>> > +
>> > +     mutex_lock(&drmcg_mutex);
>> > +     dev->drmcg_props.mem_reclaim_wq[type] = NULL;
>> > +     mutex_unlock(&drmcg_mutex);
>> > +}
>> > +EXPORT_SYMBOL(drmcg_unregister_device_mm);
>> > +
>> >   /**
>> >    * drmcg_try_chg_bo_alloc - charge GEM buffer usage for a device and cgroup
>> >    * @drmcg: the DRM cgroup to be charged to
>> > @@ -1013,6 +1036,13 @@ void drmcg_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
>> >
>> >               ddr->mem_bw_stats[DRMCG_MEM_BW_ATTR_BYTE_CREDIT]
>> >                       -= move_in_bytes;
>> > +
>> > +             if (dev->drmcg_props.mem_reclaim_wq[new_mem_type]
>> > +                     != NULL &&
>> > +                     ddr->mem_stats[new_mem_type] >
>> > +                             ddr->mem_highs[new_mem_type])
>> > +                     schedule_work(dev->
>> > +                             drmcg_props.mem_reclaim_wq[new_mem_type]);
>> >       }
>> >       mutex_unlock(&dev->drmcg_mutex);
>> >   }
>>
>


More information about the amd-gfx mailing list