[Intel-gfx] [RFC PATCH 07/16] drm/ttm: Reduce the number of used allocation orders for TTM pages
Christian König
christian.koenig at amd.com
Wed Feb 15 17:42:37 UTC 2023
Am 15.02.23 um 17:13 schrieb Thomas Hellström:
> When swapping out, we will split multi-order pages both in order to
> move them to the swap-cache and to be able to return memory to the
> swap cache as soon as possible on a page-by-page basis.
> By reducing the page max order to the system PMD size, we can be nicer
> to the system and avoid splitting gigantic pages.
> On top of this we also
> include the 64K page size in the page sizes tried, since that appears to
> be a common size for GPU applications.
Please completely drop that. This is just nonsense spilling in from the
Windows drivers.
Christian.
>
> Looking forward to when we might be able to swap out PMD size folios
> without splitting, this will also be a benefit.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> drivers/gpu/drm/ttm/ttm_pool.c | 58 ++++++++++++++++++++++++++--------
> 1 file changed, 45 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 1cc7591a9542..8787fb6a218b 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -31,6 +31,8 @@
> * cause they are rather slow compared to alloc_pages+map.
> */
>
> +#define pr_fmt(fmt) "[TTM POOL] " fmt
> +
> #include <linux/module.h>
> #include <linux/dma-mapping.h>
> #include <linux/debugfs.h>
> @@ -47,6 +49,18 @@
>
> #include "ttm_module.h"
>
> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
> +#define TTM_64K_ORDER (16 - PAGE_SHIFT)
> +#if (TTM_MAX_ORDER < TTM_64K_ORDER)
> +#undef TTM_MAX_ORDER
> +#define TTM_MAX_ORDER TTM_64K_ORDER
> +#endif
> +#if ((MAX_ORDER - 1) < TTM_MAX_ORDER)
> +#undef TTM_MAX_ORDER
> +#define TTM_MAX_ORDER (MAX_ORDER - 1)
> +#endif
> +#define TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
> +
> /**
> * struct ttm_pool_dma - Helper object for coherent DMA mappings
> *
> @@ -65,16 +79,18 @@ module_param(page_pool_size, ulong, 0644);
>
> static atomic_long_t allocated_pages;
>
> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> -static struct ttm_pool_type global_uncached[MAX_ORDER];
> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
>
> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
>
> static spinlock_t shrinker_lock;
> static struct list_head shrinker_list;
> static struct shrinker mm_shrinker;
>
> +static unsigned int ttm_pool_orders[] = {TTM_MAX_ORDER, 0, 0};
> +
> /* Allocate pages of size 1 << order with the given gfp_flags */
> static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
> unsigned int order)
> @@ -400,6 +416,17 @@ static void __ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt,
> }
> }
>
> +static unsigned int ttm_pool_select_order(unsigned int order, pgoff_t num_pages)
> +{
> + unsigned int *cur_order = ttm_pool_orders;
> +
> + order = min_t(unsigned int, __fls(num_pages), order);
> + while (order < *cur_order)
> + ++cur_order;
> +
> + return *cur_order;
> +}
> +
> /**
> * ttm_pool_alloc - Fill a ttm_tt object
> *
> @@ -439,9 +466,8 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> else
> gfp_flags |= GFP_HIGHUSER;
>
> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
> - num_pages;
> - order = min_t(unsigned int, order, __fls(num_pages))) {
> + order = ttm_pool_select_order(ttm_pool_orders[0], num_pages);
> + for (; num_pages; order = ttm_pool_select_order(order, num_pages)) {
> struct ttm_pool_type *pt;
>
> page_caching = tt->caching;
> @@ -558,7 +584,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>
> if (use_dma_alloc) {
> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> - for (j = 0; j < MAX_ORDER; ++j)
> + for (j = 0; j < TTM_DIM_ORDER; ++j)
> ttm_pool_type_init(&pool->caching[i].orders[j],
> pool, i, j);
> }
> @@ -578,7 +604,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>
> if (pool->use_dma_alloc) {
> for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> - for (j = 0; j < MAX_ORDER; ++j)
> + for (j = 0; j < TTM_DIM_ORDER; ++j)
> ttm_pool_type_fini(&pool->caching[i].orders[j]);
> }
>
> @@ -632,7 +658,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
> unsigned int i;
>
> seq_puts(m, "\t ");
> - for (i = 0; i < MAX_ORDER; ++i)
> + for (i = 0; i < TTM_DIM_ORDER; ++i)
> seq_printf(m, " ---%2u---", i);
> seq_puts(m, "\n");
> }
> @@ -643,7 +669,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
> {
> unsigned int i;
>
> - for (i = 0; i < MAX_ORDER; ++i)
> + for (i = 0; i < TTM_DIM_ORDER; ++i)
> seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
> seq_puts(m, "\n");
> }
> @@ -749,10 +775,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> if (!page_pool_size)
> page_pool_size = num_pages;
>
> + if (TTM_64K_ORDER < TTM_MAX_ORDER)
> + ttm_pool_orders[1] = TTM_64K_ORDER;
> +
> + pr_debug("Used orders are %u %u %u\n", ttm_pool_orders[0],
> + ttm_pool_orders[1], ttm_pool_orders[2]);
> +
> spin_lock_init(&shrinker_lock);
> INIT_LIST_HEAD(&shrinker_list);
>
> - for (i = 0; i < MAX_ORDER; ++i) {
> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> ttm_pool_type_init(&global_write_combined[i], NULL,
> ttm_write_combined, i);
> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> @@ -785,7 +817,7 @@ void ttm_pool_mgr_fini(void)
> {
> unsigned int i;
>
> - for (i = 0; i < MAX_ORDER; ++i) {
> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> ttm_pool_type_fini(&global_write_combined[i]);
> ttm_pool_type_fini(&global_uncached[i]);
>
More information about the Intel-gfx
mailing list