[PATCH RESEND v3 2/3] drm/ttm: Reduce the number of used allocation orders for TTM pages

Tue Apr 11 13:45:42 UTC 2023

On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
> > On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
> > > When swapping out, we will split multi-order pages both in order to
> > > move them to the swap-cache and to be able to return memory to the
> > > swap cache as soon as possible on a page-by-page basis.
> > > Reduce the page max order to the system PMD size, as we can then be nicer
> > > to the system and avoid splitting gigantic pages.
> > > 
> > > Looking forward to when we might be able to swap out PMD size folios
> > > without splitting, this will also be a benefit.
> > > 
> > > v2:
> > > - Include all orders up to the PMD size (Christian König)
> > > v3:
> > > - Avoid compilation errors for architectures with special PFN_SHIFTs
> > > 
> > > Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > Reviewed-by: Christian König <christian.koenig at amd.com>
> > Apparently this fails on ppc build testing. Please supply build fix asap
> > (or I guess we need to revert). I'm kinda not clear why this only showed
> > up when I merged the drm-misc-next pr into drm-next ...
> 
> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
> on this particular platform.
> 
> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
> to be a constant.
> 
> So how exactly can that here break?

There's some in-flight patches to rework MAX_ORDER and other things in
linux-next, maybe it's recent? If you check out linux-next then you need
to reapply the patch (since sfr reverted it).
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
> > >   1 file changed, 19 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > > index dfce896c4bae..18c342a919a2 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > @@ -47,6 +47,11 @@
> > >   #include "ttm_module.h"
> > > +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
> > > +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
> > > +/* Some architectures have a weird PMD_SHIFT */
> > > +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
> > > +
> > >   /**
> > >    * struct ttm_pool_dma - Helper object for coherent DMA mappings
> > >    *
> > > @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
> > >   static atomic_long_t allocated_pages;
> > > -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> > > -static struct ttm_pool_type global_uncached[MAX_ORDER];
> > > +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
> > > +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
> > > -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> > > -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> > > +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
> > > +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
> > >   static spinlock_t shrinker_lock;
> > >   static struct list_head shrinker_list;
> > > @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> > >   	else
> > >   		gfp_flags |= GFP_HIGHUSER;
> > > -	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
> > > +	for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
> > >   	     num_pages;
> > >   	     order = min_t(unsigned int, order, __fls(num_pages))) {
> > >   		struct ttm_pool_type *pt;
> > > @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> > >   	if (use_dma_alloc) {
> > >   		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > -			for (j = 0; j < MAX_ORDER; ++j)
> > > +			for (j = 0; j < TTM_DIM_ORDER; ++j)
> > >   				ttm_pool_type_init(&pool->caching[i].orders[j],
> > >   						   pool, i, j);
> > >   	}
> > > @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> > >   	if (pool->use_dma_alloc) {
> > >   		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > -			for (j = 0; j < MAX_ORDER; ++j)
> > > +			for (j = 0; j < TTM_DIM_ORDER; ++j)
> > >   				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > >   	}
> > > @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
> > >   	unsigned int i;
> > >   	seq_puts(m, "\t ");
> > > -	for (i = 0; i < MAX_ORDER; ++i)
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i)
> > >   		seq_printf(m, " ---%2u---", i);
> > >   	seq_puts(m, "\n");
> > >   }
> > > @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
> > >   {
> > >   	unsigned int i;
> > > -	for (i = 0; i < MAX_ORDER; ++i)
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i)
> > >   		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
> > >   	seq_puts(m, "\n");
> > >   }
> > > @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> > >   {
> > >   	unsigned int i;
> > > +	BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
> > > +	BUILD_BUG_ON(TTM_DIM_ORDER < 1);
> > > +
> > >   	if (!page_pool_size)
> > >   		page_pool_size = num_pages;
> > >   	spin_lock_init(&shrinker_lock);
> > >   	INIT_LIST_HEAD(&shrinker_list);
> > > -	for (i = 0; i < MAX_ORDER; ++i) {
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
> > >   		ttm_pool_type_init(&global_write_combined[i], NULL,
> > >   				   ttm_write_combined, i);
> > >   		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> > > @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
> > >   {
> > >   	unsigned int i;
> > > -	for (i = 0; i < MAX_ORDER; ++i) {
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
> > >   		ttm_pool_type_fini(&global_write_combined[i]);
> > >   		ttm_pool_type_fini(&global_uncached[i]);
> > > -- 
> > > 2.39.2
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch