[RFC PATCH v5 11/14] drm/ttm: Provide helpers for shinking

Sun Feb 12 13:26:58 UTC 2023

Provide TTM Pool helpers for system page shrinking.

Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
 drivers/gpu/drm/Kconfig        |   1 +
 drivers/gpu/drm/ttm/ttm_pool.c | 375 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/ttm/ttm_tt.c   |  15 +-
 include/drm/ttm/ttm_pool.h     |   4 +
 include/drm/ttm/ttm_tt.h       |   6 +
 5 files changed, 380 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dc0f94f02a82..1efd33411a92 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -196,6 +196,7 @@ source "drivers/gpu/drm/display/Kconfig"
 config DRM_TTM
 	tristate
 	depends on DRM && MMU
+	select SWAP_BACKUP_FOLIO
 	help
 	  GPU memory management subsystem for devices with multiple
 	  GPU memory types. Will be enabled automatically if a device driver
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index dd1d0a1425a0..eaa52072ecaf 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -38,6 +38,7 @@
 #include <linux/debugfs.h>
 #include <linux/highmem.h>
 #include <linux/sched/mm.h>
+#include <linux/swap.h>
 
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
@@ -49,6 +50,18 @@
 
 #include "ttm_module.h"
 
+#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
+#define TTM_64K_ORDER (16 - PAGE_SHIFT)
+#if (TTM_MAX_ORDER < TTM_64K_ORDER)
+#undef TTM_MAX_ORDER
+#define TTM_MAX_ORDER TTM_64K_ORDER
+#endif
+#if (TTM_MAX_ORDER > MAX_ORDER - 1)
+#undef TTM_MAX_ORDER
+#define TTM_MAX_ORDER MAX_ORDER -1
+#endif
+#define TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
+
 /**
  * struct ttm_pool_dma - Helper object for coherent DMA mappings
  *
@@ -60,23 +73,33 @@ struct ttm_pool_dma {
 	unsigned long vaddr;
 };
 
+/**
+ * struct ttm_pool_tt_restore - State representing restore from swap.
+ * @alloced_pages - Total number of already allocated pages for the ttm_tt.
+ * @restored_pages - Number of (sub) pages restored from swap for this
+ *                   chunk of 1 << @order pages.
+ * @first_page - The ttm page ptr representing for @old_pages[0].
+ * @old_pages - Backup copy of page pointers that were replaced by the new
+ * page allocation.
+ *
+ * Recovery from swap space might fail when we've recovered less than the
+ * full ttm_tt. In order not to loose any data (yet), keep information
+ * around that allows us to restart a failed ttm swap-space recovery.
+ */
+struct ttm_pool_tt_restore {
+	struct page **first_page;
+	struct page *old_pages[1 << TTM_MAX_ORDER];
+	struct ttm_pool *pool;
+	pgoff_t alloced_pages;
+	pgoff_t restored_pages;
+	unsigned int order;
+};
+
 static unsigned long page_pool_size;
 
 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
 module_param(page_pool_size, ulong, 0644);
 
-#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
-#define TTM_64K_ORDER (16 - PAGE_SHIFT)
-#if (TTM_MAX_ORDER < TTM_64K_ORDER)
-#undef TTM_MAX_ORDER
-#define TTM_MAX_ORDER TTM_64K_ORDER
-#endif
-#if (TTM_MAX_ORDER > MAX_ORDER - 1)
-#undef TTM_MAX_ORDER
-#define TTM_MAX_ORDER MAX_ORDER - 1
-#endif
-#define TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
-
 static atomic_long_t allocated_pages;
 
 static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
@@ -91,6 +114,23 @@ static struct shrinker mm_shrinker;
 
 static unsigned int ttm_pool_orders[] = {TTM_MAX_ORDER, 0, 0};
 
+static struct page *ttm_pool_swap_to_page_ptr(swp_entry_t swap)
+{
+	return (struct page *)(swap.val << 1 | 1);
+}
+
+static swp_entry_t ttm_pool_page_ptr_to_swap(const struct page *p)
+{
+	swp_entry_t swap = {.val = ((unsigned long)p) >> 1};
+
+	return swap;
+}
+
+static bool ttm_pool_page_ptr_is_swap(const struct page *p)
+{
+	return ((unsigned long)p) & 1;
+}
+
 /* Allocate pages of size 1 << order with the given gfp_flags */
 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
 					unsigned int order)
@@ -361,11 +401,99 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
 	return p->private;
 }
 
+/*
+ * To be able to insert single pages into the swap cache directly,
+ * we need to split multi-order page allocations and make them look
+ * like single page-allocations.
+ */
+static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
+{
+	unsigned int order = ttm_pool_page_order(pool, p);
+	pgoff_t nr;
+
+	if (!order)
+		return;
+
+	split_page(p, order);
+	nr = 1UL << order;
+	while (nr--)
+		(p++)->private = 0;
+}
+
+/**
+ * DOC: Partial shrinking and restoration of a struct ttm_tt.
+ *
+ * Swapout using swap_backup_folio() and swapin using swap_copy_folio() may fail.
+ * The former most likely due to lack of swap-space or memory, the latter due
+ * to lack of memory or because of signal interruption during waits.
+ *
+ * Swapout failure is easily handled by using a ttm_tt pages vector that holds
+ * both swap entries and page pointers. This has to be taken into account when
+ * restoring such a ttm_tt from swap, and when freeing it while swapped out.
+ * When restoring, for simplicity, new pages are actually allocated from the
+ * pool and the contents of any old pages are copied in and then the old pages
+ * are released.
+ *
+ * For swapin failures, the struct ttm_pool_tt_restore holds sufficient state
+ * to be able to resume an interrupted restore, and that structure is freed once
+ * the restoration is complete. If the struct ttm_tt is destroyed while there
+ * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
+ * care of.
+ */
+
+static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
+{
+	return restore && restore->restored_pages < (1 << restore->order);
+}
+
+static int ttm_pool_swapin(struct ttm_pool_tt_restore *restore,
+			   struct ttm_operation_ctx *ctx)
+{
+	unsigned int i, nr = 1 << restore->order;
+	int ret = 0;
+
+	if (!ttm_pool_restore_valid(restore))
+		return 0;
+
+	for (i = restore->restored_pages; i < nr; ++i) {
+		struct page *p = restore->old_pages[i];
+
+		if (ttm_pool_page_ptr_is_swap(p)) {
+			swp_entry_t swap = ttm_pool_page_ptr_to_swap(p);
+
+			if (swap.val == 0)
+				continue;
+
+			ret = swap_copy_folio(swap, restore->first_page[i], 0,
+					      ctx->interruptible);
+			if (ret)
+				break;
+
+			swap_drop_folio(swap);
+		} else if (p) {
+			/*
+			 * We could probably avoid splitting the old page
+			 * using clever logic, but ATM we don't care.
+			 */
+			ttm_pool_split_for_swap(restore->pool, p);
+			copy_highpage(restore->first_page[i], p);
+			__free_pages(p, 0);
+		}
+
+		restore->restored_pages++;
+		restore->old_pages[i] = NULL;
+		cond_resched();
+	}
+
+	return ret;
+}
+
 /* Called when we got a page, either from a pool or newly allocated */
 static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
 				   struct page *p, dma_addr_t **dma_addr,
 				   unsigned long *num_pages,
-				   struct page ***pages)
+				   struct page ***pages,
+				   struct ttm_pool_tt_restore *restore)
 {
 	unsigned int i;
 	int r;
@@ -376,6 +504,16 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
 			return r;
 	}
 
+	if (restore) {
+		memcpy(restore->old_pages, *pages,
+		       (1 << order) * sizeof(*restore->old_pages));
+		memset(*pages, 0, (1 << order) * sizeof(**pages));
+		restore->order = order;
+		restore->restored_pages = 0;
+		restore->first_page = *pages;
+		restore->alloced_pages += 1UL << order;
+	}
+
 	*num_pages -= 1 << order;
 	for (i = 1 << order; i; --i, ++(*pages), ++p)
 		**pages = p;
@@ -410,6 +548,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 		   struct ttm_operation_ctx *ctx)
 {
 	unsigned long num_pages = tt->num_pages;
+	unsigned long start_page;
 	dma_addr_t *dma_addr = tt->dma_address;
 	struct page **caching = tt->pages;
 	struct page **pages = tt->pages;
@@ -433,6 +572,27 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 		gfp_flags |= GFP_HIGHUSER;
 
 	order = ttm_pool_select_order(ttm_pool_orders[0], num_pages);
+
+	if (tt->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN) {
+		if (!tt->restore) {
+			tt->restore = kvzalloc(sizeof(*tt->restore),
+					       GFP_KERNEL);
+			if (!tt->restore)
+				return -ENOMEM;
+		} else if (ttm_pool_restore_valid(tt->restore)) {
+			struct ttm_pool_tt_restore *restore = tt->restore;
+
+			order = restore->order;
+			num_pages -= restore->alloced_pages;
+			pages += restore->alloced_pages;
+			ttm_pool_swapin(restore, ctx);
+			if (ttm_pool_restore_valid(tt->restore))
+				return -EAGAIN;
+		}
+
+		tt->restore->pool = pool;
+	}
+
 	for (; num_pages; order = ttm_pool_select_order(order, num_pages)) {
 		struct ttm_pool_type *pt;
 
@@ -448,10 +608,17 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 				r = ttm_pool_page_allocated(pool, order, p,
 							    &dma_addr,
 							    &num_pages,
-							    &pages);
+							    &pages,
+							    tt->restore);
 				if (r)
 					goto error_free_page;
 
+				if (ttm_pool_restore_valid(tt->restore)) {
+					r = ttm_pool_swapin(tt->restore, ctx);
+					if (r)
+						return r;
+				}
+
 				if (num_pages < (1 << order))
 					break;
 
@@ -470,9 +637,17 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 					goto error_free_page;
 			}
 			r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
-						    &num_pages, &pages);
+						    &num_pages, &pages,
+						    tt->restore);
 			if (r)
 				goto error_free_page;
+
+			if (ttm_pool_restore_valid(tt->restore)) {
+				r = ttm_pool_swapin(tt->restore, ctx);
+				if (r)
+					return r;
+			}
+
 			if (PageHighMem(p))
 				caching = pages;
 		}
@@ -491,14 +666,24 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 	if (r)
 		goto error_free_all;
 
+	if (tt->restore) {
+		kvfree(tt->restore);
+		tt->restore = NULL;
+	}
+
+	if (tt->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN)
+		tt->page_flags &= ~(TTM_TT_FLAG_PRIV_SHRUNKEN |
+				    TTM_TT_FLAG_SWAPPED);
+
 	return 0;
 
 error_free_page:
 	ttm_pool_free_page(pool, tt->caching, order, p);
 
 error_free_all:
+	start_page = tt->restore ? tt->restore->alloced_pages : 0;
 	num_pages = tt->num_pages - num_pages;
-	for (i = 0; i < num_pages; ) {
+	for (i = start_page; i < num_pages; ) {
 		order = ttm_pool_page_order(pool, tt->pages[i]);
 		ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]);
 		i += 1 << order;
@@ -519,12 +704,21 @@ EXPORT_SYMBOL(ttm_pool_alloc);
 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
 {
 	unsigned int i;
+	unsigned int num_pages;
 
-	for (i = 0; i < tt->num_pages; ) {
+	for (i = 0; i < tt->num_pages; i += num_pages) {
 		struct page *p = tt->pages[i];
-		unsigned int order, num_pages;
+		unsigned int order;
 		struct ttm_pool_type *pt;
 
+		if (!p) {
+			num_pages = 1;
+			continue;
+		}
+
+		if (WARN_ON_ONCE(ttm_pool_page_ptr_is_swap(p)))
+			continue;
+
 		order = ttm_pool_page_order(pool, p);
 		num_pages = 1ULL << order;
 		if (tt->dma_address)
@@ -536,8 +730,6 @@ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
 		else
 			ttm_pool_free_page(pool, tt->caching, order,
 					   tt->pages[i]);
-
-		i += num_pages;
 	}
 
 	while (atomic_long_read(&allocated_pages) > page_pool_size)
@@ -545,6 +737,149 @@ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
 }
 EXPORT_SYMBOL(ttm_pool_free);
 
+void ttm_pool_release_shrunken(struct ttm_tt *tt)
+{
+	struct ttm_pool_tt_restore *restore;
+	pgoff_t i, num_pages;
+	unsigned int order;
+	swp_entry_t swap;
+
+	if (!(tt->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN))
+		return;
+
+	restore = tt->restore;
+	/* Release an interrupted populate. */
+	if (ttm_pool_restore_valid(restore)) {
+		unsigned long nr = 1UL << restore->order;
+
+		for (i = restore->restored_pages; i < nr; ++i) {
+			struct page *p = restore->old_pages[i];
+
+			if (ttm_pool_page_ptr_is_swap(p)) {
+				swap = ttm_pool_page_ptr_to_swap(p);
+				if (swap.val == 0)
+					continue;
+
+				swap_drop_folio(swap);
+			} else if (p) {
+				ttm_pool_split_for_swap(restore->pool, p);
+				__free_pages(p, 0);
+			}
+		}
+	}
+
+	/* Release remaining pages or swap entries */
+	for (i = 0; i < tt->num_pages; i += num_pages) {
+		struct page *p = tt->pages[i];
+
+		num_pages = 1;
+		if (ttm_pool_page_ptr_is_swap(p)) {
+			swap = ttm_pool_page_ptr_to_swap(p);
+			if (swap.val == 0)
+				continue;
+
+			swap_drop_folio(swap);
+		} else if (p) {
+			order = p->private;
+			__free_pages(p, order);
+			num_pages = 1UL << order;
+		}
+	}
+
+	if (restore) {
+		kvfree(restore);
+		tt->restore = NULL;
+	}
+
+	tt->page_flags &= ~(TTM_TT_FLAG_PRIV_SHRUNKEN | TTM_TT_FLAG_SWAPPED);
+}
+
+long ttm_pool_shrink_tt(struct ttm_pool *pool, struct ttm_tt *ttm)
+{
+	struct page *page;
+	struct folio *folio;
+	swp_entry_t swap;
+	gfp_t alloc_gfp;
+	gfp_t gfp;
+	int ret = 0;
+	pgoff_t shrunken = 0;
+	pgoff_t i, num_pages;
+	bool purge = ttm_tt_purgeable(ttm);
+
+	if ((!get_nr_swap_pages() && purge) ||
+	    pool->use_dma_alloc ||
+	    (ttm->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN))
+		return -EBUSY;
+
+#ifdef CONFIG_X86
+	/* Anything returned to the system needs to be cached. */
+	if (ttm->caching != ttm_cached)
+		set_pages_array_wb(ttm->pages, ttm->num_pages);
+#endif
+
+	if (pool->use_dma32)
+		gfp = GFP_DMA32;
+	else
+		gfp = GFP_HIGHUSER;
+
+	alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN;
+	if (current_is_kswapd())
+		alloc_gfp |= __GFP_NOMEMALLOC;
+
+	if (ttm->dma_address || purge) {
+		for (i = 0; i < ttm->num_pages; i += num_pages) {
+			unsigned int order;
+
+			page = ttm->pages[i];
+			if (unlikely(!page))
+				continue;
+
+			order = 1UL << ttm_pool_page_order(pool, page);
+			num_pages = 1UL << order;
+			if (ttm->dma_address)
+				ttm_pool_unmap(pool, ttm->dma_address[i],
+					       num_pages);
+			if (purge) {
+				shrunken += num_pages;
+				__free_pages(page, order);
+				memset(ttm->pages + i, 0,
+				       num_pages * sizeof(*ttm->pages));
+			}
+		}
+	}
+
+	if (purge)
+		return shrunken;
+
+	for (i = 0; i < ttm->num_pages; ++i) {
+		page = ttm->pages[i];
+		if (unlikely(!page))
+			continue;
+
+		ttm_pool_split_for_swap(pool, page);
+
+		folio = page_folio(page);
+		folio_mark_dirty(folio);
+		swap = swap_backup_folio(folio, false, gfp, alloc_gfp);
+		if (swap.val) {
+			ttm->pages[i] = ttm_pool_swap_to_page_ptr(swap);
+			folio_put(folio);
+			shrunken++;
+		} else {
+			/* We allow partially shrunken tts */
+			ret = -ENOMEM;
+			break;
+		}
+		cond_resched();
+	}
+
+	if (shrunken)
+		ttm->page_flags |= (TTM_TT_FLAG_PRIV_SHRUNKEN |
+				    TTM_TT_FLAG_SWAPPED);
+
+	return shrunken ? shrunken : ret;
+}
+
 /**
  * ttm_pool_init - Initialize a pool
  *
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index ff7bad12ba0e..14a3d0a1549f 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -241,9 +241,9 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm,
 	ttm->caching = ttm_cached;
 	ttm->page_flags = page_flags;
 	ttm->dma_address = NULL;
-	ttm->swap_storage = NULL;
 	ttm->sg = bo->sg;
 	ttm->caching = caching;
+	ttm->restore = NULL;
 }
 
 int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
@@ -268,6 +268,8 @@ void ttm_tt_fini(struct ttm_tt *ttm)
 		fput(ttm->swap_storage);
 	ttm->swap_storage = NULL;
 
+	ttm_pool_release_shrunken(ttm);
+
 	if (ttm->pages)
 		kvfree(ttm->pages);
 	else
@@ -338,6 +340,17 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 	return ret;
 }
 
+long ttm_tt_shrink(struct ttm_device *bdev, struct ttm_tt *tt)
+{
+	long ret = ttm_pool_shrink_tt(&bdev->pool, tt);
+
+	if (ret > 0)
+		tt->page_flags &= ~TTM_TT_FLAG_PRIV_POPULATED;
+
+	return ret;
+}
+EXPORT_SYMBOL(ttm_tt_shrink);
+
 /**
  * ttm_tt_swapout - swap out tt object
  * @bdev: TTM device structure.
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index c1200552892e..bfe14138a992 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -86,6 +86,10 @@ void ttm_pool_fini(struct ttm_pool *pool);
 
 int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
 
+void ttm_pool_release_shrunken(struct ttm_tt *tt);
+
+long ttm_pool_shrink_tt(struct ttm_pool *pool, struct ttm_tt *ttm);
+
 int ttm_pool_mgr_init(unsigned long num_pages);
 void ttm_pool_mgr_fini(void);
 
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index abb17527f76c..9d529e07fb8b 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -37,6 +37,7 @@ struct ttm_tt;
 struct ttm_resource;
 struct ttm_buffer_object;
 struct ttm_operation_ctx;
+struct ttm_pool_tt_restore;
 
 /**
  * struct ttm_tt - This is a structure holding the pages, caching- and aperture
@@ -89,6 +90,7 @@ struct ttm_tt {
 #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
 #define TTM_TT_FLAG_DONTNEED		BIT(4)
 
+#define TTM_TT_FLAG_PRIV_SHRUNKEN	BIT(30)
 #define TTM_TT_FLAG_PRIV_POPULATED	BIT(31)
 	uint32_t page_flags;
 	/** @num_pages: Number of pages in the page array. */
@@ -104,6 +106,8 @@ struct ttm_tt {
 	 * ttm_caching.
 	 */
 	enum ttm_caching caching;
+	/** @restore: Swap restore state. Drivers keep off. */
+	struct ttm_pool_tt_restore *restore;
 };
 
 /**
@@ -226,6 +230,8 @@ void ttm_tt_mgr_fini(void);
 struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt,
 					    struct ttm_tt *tt);
 
+long ttm_tt_shrink(struct ttm_device *bdev, struct ttm_tt *tt);
+
 /**
  * ttm_tt_purgeable() - Whether a struct ttm_tt's contents is purgeable
  * @tt: The struct ttm_tt to consider.
-- 
2.34.1