[PATCH 1/3] drm/ttm: rework ttm_tt page limit v3

Wed Feb 3 12:18:26 UTC 2021

Am 03.02.21 um 12:26 schrieb Daniel Vetter:
> On Thu, Jan 28, 2021 at 02:16:02PM +0100, Christian König wrote:
>> TTM implements a rather extensive accounting of allocated memory.
>>
>> There are two reasons for this:
>> 1. It tries to block userspace allocating a huge number of very small
>>     BOs without accounting for the kmalloced memory.
>>
>> 2. Make sure we don't over allocate and run into an OOM situation
>>     during swapout while trying to handle the memory shortage.
>>
>> This is only partially a good idea. First of all it is perfectly
>> valid for an application to use all of system memory, limiting it to
>> 50% is not really acceptable.
>>
>> What we need to take care of is that the application is held
>> accountable for the memory it allocated. This is what control
>> mechanisms like memcg and the normal Linux page accounting already do.
>>
>> Making sure that we don't run into an OOM situation while trying to
>> cope with a memory shortage is still a good idea, but this is also
>> not very well implemented since it means another opportunity of
>> recursion from the driver back into TTM.
>>
>> So start to rework all of this by implementing a shrinker callback which
>> allows for TT object to be swapped out if necessary.
>>
>> v2: Switch from limit to shrinker callback.
>> v3: fix gfp mask handling, use atomic for swapable_pages, add debugfs
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c        |   4 +-
>>   drivers/gpu/drm/ttm/ttm_memory.c    |   7 +-
>>   drivers/gpu/drm/ttm/ttm_tt.c        | 111 ++++++++++++++++++++++++++--
>>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |   2 +-
>>   include/drm/ttm/ttm_bo_api.h        |   2 +-
>>   include/drm/ttm/ttm_tt.h            |   6 +-
>>   6 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 20256797f3a6..643befc1a6f2 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -1219,7 +1219,7 @@ EXPORT_SYMBOL(ttm_bo_wait);
>>    * A buffer object shrink method that tries to swap out the first
>>    * buffer object on the bo_global::swap_lru list.
>>    */
>> -int ttm_bo_swapout(struct ttm_operation_ctx *ctx)
>> +int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>>   {
>>   	struct ttm_global *glob = &ttm_glob;
>>   	struct ttm_buffer_object *bo;
>> @@ -1302,7 +1302,7 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx)
>>   	if (bo->bdev->funcs->swap_notify)
>>   		bo->bdev->funcs->swap_notify(bo);
>>   
>> -	ret = ttm_tt_swapout(bo->bdev, bo->ttm);
>> +	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>>   out:
>>   
>>   	/**
>> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
>> index a3bfbd9cea68..634a85c2dc4c 100644
>> --- a/drivers/gpu/drm/ttm/ttm_memory.c
>> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
>> @@ -37,6 +37,7 @@
>>   #include <linux/slab.h>
>>   #include <linux/swap.h>
>>   #include <drm/ttm/ttm_pool.h>
>> +#include <drm/ttm/ttm_tt.h>
>>   
>>   #include "ttm_module.h"
>>   
>> @@ -276,9 +277,9 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
>>   
>>   	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
>>   		spin_unlock(&glob->lock);
>> -		ret = ttm_bo_swapout(ctx);
>> +		ret = ttm_bo_swapout(ctx, GFP_KERNEL);
>>   		spin_lock(&glob->lock);
>> -		if (unlikely(ret != 0))
>> +		if (unlikely(ret < 0))
>>   			break;
>>   	}
>>   
>> @@ -453,6 +454,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>>   			zone->name, (unsigned long long)zone->max_mem >> 10);
>>   	}
>>   	ttm_pool_mgr_init(glob->zone_kernel->max_mem/(2*PAGE_SIZE));
>> +	ttm_tt_mgr_init();
>>   	return 0;
>>   out_no_zone:
>>   	ttm_mem_global_release(glob);
>> @@ -466,6 +468,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
>>   
>>   	/* let the page allocator first stop the shrink work. */
>>   	ttm_pool_mgr_fini();
>> +	ttm_tt_mgr_fini();
>>   
>>   	flush_workqueue(glob->swap_queue);
>>   	destroy_workqueue(glob->swap_queue);
>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>> index 7782d5393c7c..b67795de228d 100644
>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>> @@ -38,6 +38,11 @@
>>   #include <drm/drm_cache.h>
>>   #include <drm/ttm/ttm_bo_driver.h>
>>   
>> +#include "ttm_module.h"
>> +
>> +static struct shrinker mm_shrinker;
>> +static atomic_long_t swapable_pages;
>> +
>>   /*
>>    * Allocates a ttm structure for the given BO.
>>    */
>> @@ -223,32 +228,41 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
>>   	return ret;
>>   }
>>   
>> -int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm)
>> +/**
>> + * ttm_tt_swapout - swap out tt object
>> + *
>> + * @bdev: TTM device structure.
>> + * @ttm: The struct ttm_tt.
>> + * @gfp_flags: Flags to use for memory allocation.
>> + *
>> + * Swapout a TT object to a shmem_file, return number of pages swapped out or
>> + * negative error code.
>> + */
>> +int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm,
>> +		   gfp_t gfp_flags)
>>   {
>> +	loff_t size = (loff_t)ttm->num_pages << PAGE_SHIFT;
>>   	struct address_space *swap_space;
>>   	struct file *swap_storage;
>>   	struct page *from_page;
>>   	struct page *to_page;
>> -	gfp_t gfp_mask;
>>   	int i, ret;
>>   
>> -	swap_storage = shmem_file_setup("ttm swap",
>> -					ttm->num_pages << PAGE_SHIFT,
>> -					0);
>> +	swap_storage = shmem_file_setup("ttm swap", size, 0);
>>   	if (IS_ERR(swap_storage)) {
>>   		pr_err("Failed allocating swap storage\n");
>>   		return PTR_ERR(swap_storage);
>>   	}
>>   
>>   	swap_space = swap_storage->f_mapping;
>> -	gfp_mask = mapping_gfp_mask(swap_space);
>> +	gfp_flags &= mapping_gfp_mask(swap_space);
>>   
>>   	for (i = 0; i < ttm->num_pages; ++i) {
>>   		from_page = ttm->pages[i];
>>   		if (unlikely(from_page == NULL))
>>   			continue;
>>   
>> -		to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask);
>> +		to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_flags);
>>   		if (IS_ERR(to_page)) {
>>   			ret = PTR_ERR(to_page);
>>   			goto out_err;
>> @@ -263,7 +277,7 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm)
>>   	ttm->swap_storage = swap_storage;
>>   	ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
>>   
>> -	return 0;
>> +	return ttm->num_pages;
>>   
>>   out_err:
>>   	fput(swap_storage);
>> @@ -280,6 +294,8 @@ static void ttm_tt_add_mapping(struct ttm_device *bdev, struct ttm_tt *ttm)
>>   
>>   	for (i = 0; i < ttm->num_pages; ++i)
>>   		ttm->pages[i]->mapping = bdev->dev_mapping;
>> +
>> +	atomic_long_add(ttm->num_pages, &swapable_pages);
>>   }
>>   
>>   int ttm_tt_populate(struct ttm_device *bdev,
>> @@ -326,6 +342,8 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
>>   		(*page)->mapping = NULL;
>>   		(*page++)->index = 0;
>>   	}
>> +
>> +	atomic_long_sub(ttm->num_pages, &swapable_pages);
>>   }
>>   
>>   void ttm_tt_unpopulate(struct ttm_device *bdev,
>> @@ -341,3 +359,80 @@ void ttm_tt_unpopulate(struct ttm_device *bdev,
>>   		ttm_pool_free(&bdev->pool, ttm);
>>   	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>>   }
>> +
>> +/* As long as pages are available make sure to release at least one */
>> +static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
>> +					  struct shrink_control *sc)
>> +{
>> +	struct ttm_operation_ctx ctx = {
>> +		.no_wait_gpu = false
>> +	};
>> +	int ret;
>> +
>> +	if (!(sc->gfp_mask & __GFP_FS))
>> +		return SHRINK_EMPTY;
> These two checks here still look like cargo cult to me. I thought the
> gfp_mask you're getting is for numa/zone-aware shrinking, which we're not
> doing. __GFP_FS in the shrinker is a bug.
>
> Maybe convert to WARN_ON to convince yourself, test, then remove? If you
> ever get __GFP_FS context in a shrinker lockdep will start screaming real
> fast :-)

WARN_ON work for me as well. I just couldn't find any code which would 
prevent that.

But I agree that it doesn't make much sense from the top level idea.

>
> With that addressed:
>
> Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>

Thanks,
Christian.

>
>> +
>> +	ret = ttm_bo_swapout(&ctx, GFP_NOFS);
>> +	return ret < 0 ? SHRINK_EMPTY : ret;
>> +}
>> +
>> +/* Return the number of pages available or SHRINK_EMPTY if we have none */
>> +static unsigned long ttm_tt_shrinker_count(struct shrinker *shrink,
>> +					   struct shrink_control *sc)
>> +{
>> +	unsigned long num_pages;
>> +
>> +	if (!(sc->gfp_mask & __GFP_FS))
>> +		return SHRINK_EMPTY;
>> +
>> +	num_pages = atomic_long_read(&swapable_pages);
>> +	return num_pages ? num_pages : SHRINK_EMPTY;
>> +}
>> +
>> +#ifdef CONFIG_DEBUG_FS
>> +
>> +/* Test the shrinker functions and dump the result */
>> +static int ttm_tt_debugfs_shrink_show(struct seq_file *m, void *data)
>> +{
>> +	struct shrink_control sc = { .gfp_mask = GFP_KERNEL };
>> +
>> +	fs_reclaim_acquire(GFP_KERNEL);
>> +	seq_printf(m, "%lu/%lu\n", ttm_tt_shrinker_count(&mm_shrinker, &sc),
>> +		   ttm_tt_shrinker_scan(&mm_shrinker, &sc));
>> +	fs_reclaim_release(GFP_KERNEL);
>> +
>> +	return 0;
>> +}
>> +DEFINE_SHOW_ATTRIBUTE(ttm_tt_debugfs_shrink);
>> +
>> +#endif
>> +
>> +
>> +
>> +/**
>> + * ttm_tt_mgr_init - register with the MM shrinker
>> + *
>> + * Register with the MM shrinker for swapping out BOs.
>> + */
>> +int ttm_tt_mgr_init(void)
>> +{
>> +#ifdef CONFIG_DEBUG_FS
>> +	debugfs_create_file("tt_shrink", 0400, ttm_debugfs_root, NULL,
>> +			    &ttm_tt_debugfs_shrink_fops);
>> +#endif
>> +
>> +	mm_shrinker.count_objects = ttm_tt_shrinker_count;
>> +	mm_shrinker.scan_objects = ttm_tt_shrinker_scan;
>> +	mm_shrinker.seeks = 1;
>> +	return register_shrinker(&mm_shrinker);
>> +}
>> +
>> +/**
>> + * ttm_tt_mgr_fini - unregister our MM shrinker
>> + *
>> + * Unregisters the MM shrinker.
>> + */
>> +void ttm_tt_mgr_fini(void)
>> +{
>> +	unregister_shrinker(&mm_shrinker);
>> +}
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
>> index b454d80c273e..710ba5169a74 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
>> @@ -1383,7 +1383,7 @@ static int vmw_pm_freeze(struct device *kdev)
>>   	vmw_execbuf_release_pinned_bo(dev_priv);
>>   	vmw_resource_evict_all(dev_priv);
>>   	vmw_release_device_early(dev_priv);
>> -	while (ttm_bo_swapout(&ctx) == 0);
>> +	while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
>>   	if (dev_priv->enable_fb)
>>   		vmw_fifo_resource_dec(dev_priv);
>>   	if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>> index 62734db0b421..1297a8fb7ccb 100644
>> --- a/include/drm/ttm/ttm_bo_api.h
>> +++ b/include/drm/ttm/ttm_bo_api.h
>> @@ -569,7 +569,7 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
>>   		  const char __user *wbuf, char __user *rbuf,
>>   		  size_t count, loff_t *f_pos, bool write);
>>   
>> -int ttm_bo_swapout(struct ttm_operation_ctx *ctx);
>> +int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
>>   
>>   /**
>>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>> index 0020a0588985..cce57fb49e2c 100644
>> --- a/include/drm/ttm/ttm_tt.h
>> +++ b/include/drm/ttm/ttm_tt.h
>> @@ -135,7 +135,8 @@ void ttm_tt_destroy_common(struct ttm_device *bdev, struct ttm_tt *ttm);
>>    * Swap in a previously swap out ttm_tt.
>>    */
>>   int ttm_tt_swapin(struct ttm_tt *ttm);
>> -int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm);
>> +int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm,
>> +		   gfp_t gfp_flags);
>>   
>>   /**
>>    * ttm_tt_populate - allocate pages for a ttm
>> @@ -155,6 +156,9 @@ int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_oper
>>    */
>>   void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm);
>>   
>> +int ttm_tt_mgr_init(void);
>> +void ttm_tt_mgr_fini(void);
>> +
>>   #if IS_ENABLED(CONFIG_AGP)
>>   #include <linux/agp_backend.h>
>>   
>> -- 
>> 2.25.1
>>