[PATCH] drm/ttm: optimize the pool shrinker a bit

Thu Apr 8 11:31:54 UTC 2021

On Thu, Apr 08, 2021 at 01:17:32PM +0200, Christian König wrote:
> Am 08.04.21 um 13:08 schrieb Daniel Vetter:
> > On Thu, Apr 01, 2021 at 03:54:13PM +0200, Christian König wrote:
> > > Switch back to using a spinlock again by moving the IOMMU unmap outside
> > > of the locked region.
> > > 
> > > Signed-off-by: Christian König <christian.koenig at amd.com>
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_pool.c | 40 +++++++++++++++-------------------
> > >   include/linux/shrinker.h       |  1 +
> > >   mm/vmscan.c                    | 10 +++++++++
> > >   3 files changed, 29 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > > index cb38b1a17b09..a8b4abe687ce 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > @@ -70,7 +70,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
> > >   static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> > >   static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> > > -static struct mutex shrinker_lock;
> > > +static spinlock_t shrinker_lock;
> > >   static struct list_head shrinker_list;
> > >   static struct shrinker mm_shrinker;
> > > @@ -263,9 +263,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
> > >   	spin_lock_init(&pt->lock);
> > >   	INIT_LIST_HEAD(&pt->pages);
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	list_add_tail(&pt->shrinker_list, &shrinker_list);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   }
> > >   /* Remove a pool_type from the global shrinker list and free all pages */
> > > @@ -273,9 +273,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
> > >   {
> > >   	struct page *p;
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	list_del(&pt->shrinker_list);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   	while ((p = ttm_pool_type_take(pt)))
> > >   		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> > > @@ -313,24 +313,19 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
> > >   static unsigned int ttm_pool_shrink(void)
> > >   {
> > >   	struct ttm_pool_type *pt;
> > > -	unsigned int num_freed;
> > >   	struct page *p;
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
> > > +	list_move_tail(&pt->shrinker_list, &shrinker_list);
> > > +	spin_unlock(&shrinker_lock);
> > >   	p = ttm_pool_type_take(pt);
> > > -	if (p) {
> > > -		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> > > -		num_freed = 1 << pt->order;
> > > -	} else {
> > > -		num_freed = 0;
> > > -	}
> > > -
> > > -	list_move_tail(&pt->shrinker_list, &shrinker_list);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	if (!p)
> > > +		return 0;
> > > -	return num_freed;
> > > +	ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> > > +	return 1 << pt->order;
> > >   }
> > >   /* Return the allocation order based for a page */
> > > @@ -530,6 +525,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> > >   			for (j = 0; j < MAX_ORDER; ++j)
> > >   				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > >   	}
> > > +	sync_shrinkers();
> > >   }
> > >   /* As long as pages are available make sure to release at least one */
> > > @@ -604,7 +600,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
> > >   {
> > >   	ttm_pool_debugfs_header(m);
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	seq_puts(m, "wc\t:");
> > >   	ttm_pool_debugfs_orders(global_write_combined, m);
> > >   	seq_puts(m, "uc\t:");
> > > @@ -613,7 +609,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
> > >   	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
> > >   	seq_puts(m, "uc 32\t:");
> > >   	ttm_pool_debugfs_orders(global_dma32_uncached, m);
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   	ttm_pool_debugfs_footer(m);
> > > @@ -640,7 +636,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
> > >   	ttm_pool_debugfs_header(m);
> > > -	mutex_lock(&shrinker_lock);
> > > +	spin_lock(&shrinker_lock);
> > >   	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
> > >   		seq_puts(m, "DMA ");
> > >   		switch (i) {
> > > @@ -656,7 +652,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
> > >   		}
> > >   		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
> > >   	}
> > > -	mutex_unlock(&shrinker_lock);
> > > +	spin_unlock(&shrinker_lock);
> > >   	ttm_pool_debugfs_footer(m);
> > >   	return 0;
> > > @@ -693,7 +689,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> > >   	if (!page_pool_size)
> > >   		page_pool_size = num_pages;
> > > -	mutex_init(&shrinker_lock);
> > > +	spin_lock_init(&shrinker_lock);
> > >   	INIT_LIST_HEAD(&shrinker_list);
> > >   	for (i = 0; i < MAX_ORDER; ++i) {
> > > diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> > > index 0f80123650e2..6b75dc372fce 100644
> > > --- a/include/linux/shrinker.h
> > > +++ b/include/linux/shrinker.h
> > > @@ -92,4 +92,5 @@ extern void register_shrinker_prepared(struct shrinker *shrinker);
> > >   extern int register_shrinker(struct shrinker *shrinker);
> > >   extern void unregister_shrinker(struct shrinker *shrinker);
> > >   extern void free_prealloced_shrinker(struct shrinker *shrinker);
> > > +extern void sync_shrinkers(void);
> > >   #endif
> > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > > index 562e87cbd7a1..46cd9c215d73 100644
> > > --- a/mm/vmscan.c
> > > +++ b/mm/vmscan.c
> > > @@ -408,6 +408,16 @@ void unregister_shrinker(struct shrinker *shrinker)
> > >   }
> > >   EXPORT_SYMBOL(unregister_shrinker);
> > > +/**
> > > + * sync_shrinker - Wait for all running shrinkers to complete.
> > > + */
> > > +void sync_shrinkers(void)
> > This one should probably be in its own patch, with a bit more commit
> > message about why we need it and all that. I'd assume that just
> > unregistering the shrinker should sync everything we needed to sync
> > already, and for other sync needs we can do locking within our own
> > shrinker?
> 
> Correct. Reason why we need the barrier is that we need to destroy the
> device (during hotplug) before the shrinker is unregistered (during module
> unload).
> 
> Going to separate that, write something up in the commit message and send it
> to the appropriate audience.

Hm why do we need that? Either way sounds like an orthogonal series for
the hotunplug work, not just shrinker optimization.
-Daniel

> 
> Thanks,
> Christian.
> 
> > -Daniel
> > 
> > > +{
> > > +	down_write(&shrinker_rwsem);
> > > +	up_write(&shrinker_rwsem);
> > > +}
> > > +EXPORT_SYMBOL(sync_shrinkers);
> > > +
> > >   #define SHRINK_BATCH 128
> > >   static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> > > -- 
> > > 2.25.1
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch