[RFC PATCH v2 11/13] drm/ttm: Provide helpers for shinking
Thomas Hellström
thomas.hellstrom at linux.intel.com
Thu Feb 9 18:26:30 UTC 2023
Provide TTM Pool helpers for system page shrinking.
Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
drivers/gpu/drm/Kconfig | 1 +
drivers/gpu/drm/ttm/ttm_pool.c | 373 +++++++++++++++++++++++++++++++--
drivers/gpu/drm/ttm/ttm_tt.c | 15 +-
include/drm/ttm/ttm_pool.h | 4 +
include/drm/ttm/ttm_tt.h | 6 +
5 files changed, 379 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dc0f94f02a82..1efd33411a92 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -196,6 +196,7 @@ source "drivers/gpu/drm/display/Kconfig"
config DRM_TTM
tristate
depends on DRM && MMU
+ select SWAP_BACKUP_FOLIO
help
GPU memory management subsystem for devices with multiple
GPU memory types. Will be enabled automatically if a device driver
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 0787574699c4..078c6f6bd085 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -38,6 +38,7 @@
#include <linux/debugfs.h>
#include <linux/highmem.h>
#include <linux/sched/mm.h>
+#include <linux/swap.h>
#ifdef CONFIG_X86
#include <asm/set_memory.h>
@@ -49,6 +50,17 @@
#include "ttm_module.h"
+#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
+#define TTM_64K_ORDER (16 - PAGE_SHIFT)
+#if (TTM_MAX_ORDER < TTM_64K_ORDER)
+#undef TTM_MAX_ORDER
+#define TTM_MAX_ORDER TTM_64K_ORDER
+#endif
+#if (TTM_MAX_ORDER > MAX_ORDER)
+#undef TTM_MAX_ORDER
+#define TTM_MAX_ORDER MAX_ORDER
+#endif
+
/**
* struct ttm_pool_dma - Helper object for coherent DMA mappings
*
@@ -60,22 +72,33 @@ struct ttm_pool_dma {
unsigned long vaddr;
};
+/**
+ * struct ttm_pool_tt_restore - State representing restore from swap.
+ * @alloced_pages - Total number of already allocated pages for the ttm_tt.
+ * @restored_pages - Number of (sub) pages restored from swap for this
+ * chunk of 1 << @order pages.
+ * @first_page - The ttm page ptr representing for @old_pages[0].
+ * @old_pages - Backup copy of page pointers that were replaced by the new
+ * page allocation.
+ *
+ * Recovery from swap space might fail when we've recovered less than the
+ * full ttm_tt. In order not to loose any data (yet), keep information
+ * around that allows us to restart a failed ttm swap-space recovery.
+ */
+struct ttm_pool_tt_restore {
+ struct page **first_page;
+ struct page *old_pages[1 << TTM_MAX_ORDER];
+ struct ttm_pool *pool;
+ pgoff_t alloced_pages;
+ pgoff_t restored_pages;
+ unsigned int order;
+};
+
static unsigned long page_pool_size;
MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
module_param(page_pool_size, ulong, 0644);
-#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
-#define TTM_64K_ORDER (16 - PAGE_SHIFT)
-#if (TTM_MAX_ORDER < TTM_64K_ORDER)
-#undef TTM_MAX_ORDER
-#define TTM_MAX_ORDER TTM_64K_ORDER
-#endif
-#if (TTM_MAX_ORDER > MAX_ORDER)
-#undef TTM_MAX_ORDER
-#define TTM_MAX_ORDER MAX_ORDER
-#endif
-
static atomic_long_t allocated_pages;
static struct ttm_pool_type global_write_combined[TTM_MAX_ORDER];
@@ -90,6 +113,23 @@ static struct shrinker mm_shrinker;
static unsigned int ttm_pool_orders[] = {TTM_MAX_ORDER, 0, 0};
+static struct page *ttm_pool_swap_to_page_ptr(swp_entry_t swap)
+{
+ return (struct page *)(swap.val << 1 | 1);
+}
+
+static swp_entry_t ttm_pool_page_ptr_to_swap(const struct page *p)
+{
+ swp_entry_t swap = {.val = ((unsigned long)p) >> 1};
+
+ return swap;
+}
+
+static bool ttm_pool_page_ptr_is_swap(const struct page *p)
+{
+ return ((unsigned long)p) & 1;
+}
+
/* Allocate pages of size 1 << order with the given gfp_flags */
static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
unsigned int order)
@@ -360,11 +400,99 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
return p->private;
}
+/*
+ * To be able to insert single pages into the swap cache directly,
+ * we need to split multi-order page allocations and make them look
+ * like single page-allocations.
+ */
+static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
+{
+ unsigned int order = ttm_pool_page_order(pool, p);
+ pgoff_t nr;
+
+ if (!order)
+ return;
+
+ split_page(p, order);
+ nr = 1UL << order;
+ while (nr--)
+ (p++)->private = 0;
+}
+
+/**
+ * DOC: Partial shrinking and restoration of a struct ttm_tt.
+ *
+ * Swapout using swap_backup_folio() and swapin using swap_copy_folio() may fail.
+ * The former most likely due to lack of swap-space or memory, the latter due
+ * to lack of memory or because of signal interruption during waits.
+ *
+ * Swapout failure is easily handled by using a ttm_tt pages vector that holds
+ * both swap entries and page pointers. This has to be taken into account when
+ * restoring such a ttm_tt from swap, and when freeing it while swapped out.
+ * When restoring, for simplicity, new pages are actually allocated from the
+ * pool and the contents of any old pages are copied in and then the old pages
+ * are released.
+ *
+ * For swapin failures, the struct ttm_pool_tt_restore holds sufficient state
+ * to be able to resume an interrupted restore, and that structure is freed once
+ * the restoration is complete. If the struct ttm_tt is destroyed while there
+ * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
+ * care of.
+ */
+
+static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
+{
+ return restore && restore->restored_pages < (1 << restore->order);
+}
+
+static int ttm_pool_swapin(struct ttm_pool_tt_restore *restore,
+ struct ttm_operation_ctx *ctx)
+{
+ unsigned int i, nr = 1 << restore->order;
+ int ret = 0;
+
+ if (!ttm_pool_restore_valid(restore))
+ return 0;
+
+ for (i = restore->restored_pages; i < nr; ++i) {
+ struct page *p = restore->old_pages[i];
+
+ if (ttm_pool_page_ptr_is_swap(p)) {
+ swp_entry_t swap = ttm_pool_page_ptr_to_swap(p);
+
+ if (swap.val == 0)
+ continue;
+
+ ret = swap_copy_folio(swap, restore->first_page[i], 0,
+ ctx->interruptible);
+ if (ret)
+ break;
+
+ swap_drop_folio(swap);
+ } else if (p) {
+ /*
+ * We could probably avoid splitting the old page
+ * using clever logic, but ATM we don't care.
+ */
+ ttm_pool_split_for_swap(restore->pool, p);
+ copy_highpage(restore->first_page[i], p);
+ __free_pages(p, 0);
+ }
+
+ restore->restored_pages++;
+ restore->old_pages[i] = NULL;
+ cond_resched();
+ }
+
+ return ret;
+}
+
/* Called when we got a page, either from a pool or newly allocated */
static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
struct page *p, dma_addr_t **dma_addr,
unsigned long *num_pages,
- struct page ***pages)
+ struct page ***pages,
+ struct ttm_pool_tt_restore *restore)
{
unsigned int i;
int r;
@@ -375,6 +503,16 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
return r;
}
+ if (restore) {
+ memcpy(restore->old_pages, *pages,
+ (1 << order) * sizeof(*restore->old_pages));
+ memset(*pages, 0, (1 << order) * sizeof(**pages));
+ restore->order = order;
+ restore->restored_pages = 0;
+ restore->first_page = *pages;
+ restore->alloced_pages += 1UL << order;
+ }
+
*num_pages -= 1 << order;
for (i = 1 << order; i; --i, ++(*pages), ++p)
**pages = p;
@@ -409,6 +547,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
struct ttm_operation_ctx *ctx)
{
unsigned long num_pages = tt->num_pages;
+ unsigned long start_page;
dma_addr_t *dma_addr = tt->dma_address;
struct page **caching = tt->pages;
struct page **pages = tt->pages;
@@ -432,6 +571,27 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
gfp_flags |= GFP_HIGHUSER;
order = ttm_pool_select_order(ttm_pool_orders[0], num_pages);
+
+ if (tt->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN) {
+ if (!tt->restore) {
+ tt->restore = kvzalloc(sizeof(*tt->restore),
+ GFP_KERNEL);
+ if (!tt->restore)
+ return -ENOMEM;
+ } else if (ttm_pool_restore_valid(tt->restore)) {
+ struct ttm_pool_tt_restore *restore = tt->restore;
+
+ order = restore->order;
+ num_pages -= restore->alloced_pages;
+ pages += restore->alloced_pages;
+ ttm_pool_swapin(restore, ctx);
+ if (ttm_pool_restore_valid(tt->restore))
+ return -EAGAIN;
+ }
+
+ tt->restore->pool = pool;
+ }
+
for (; num_pages; order = ttm_pool_select_order(order, num_pages)) {
struct ttm_pool_type *pt;
@@ -447,10 +607,17 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
r = ttm_pool_page_allocated(pool, order, p,
&dma_addr,
&num_pages,
- &pages);
+ &pages,
+ tt->restore);
if (r)
goto error_free_page;
+ if (ttm_pool_restore_valid(tt->restore)) {
+ r = ttm_pool_swapin(tt->restore, ctx);
+ if (r)
+ return r;
+ }
+
if (num_pages < (1 << order))
break;
@@ -469,9 +636,17 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
goto error_free_page;
}
r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
- &num_pages, &pages);
+ &num_pages, &pages,
+ tt->restore);
if (r)
goto error_free_page;
+
+ if (ttm_pool_restore_valid(tt->restore)) {
+ r = ttm_pool_swapin(tt->restore, ctx);
+ if (r)
+ return r;
+ }
+
if (PageHighMem(p))
caching = pages;
}
@@ -490,14 +665,24 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
if (r)
goto error_free_all;
+ if (tt->restore) {
+ kvfree(tt->restore);
+ tt->restore = NULL;
+ }
+
+ if (tt->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN)
+ tt->page_flags &= ~(TTM_TT_FLAG_PRIV_SHRUNKEN |
+ TTM_TT_FLAG_SWAPPED);
+
return 0;
error_free_page:
ttm_pool_free_page(pool, tt->caching, order, p);
error_free_all:
+ start_page = tt->restore ? tt->restore->alloced_pages : 0;
num_pages = tt->num_pages - num_pages;
- for (i = 0; i < num_pages; ) {
+ for (i = start_page; i < num_pages; ) {
order = ttm_pool_page_order(pool, tt->pages[i]);
ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]);
i += 1 << order;
@@ -518,12 +703,21 @@ EXPORT_SYMBOL(ttm_pool_alloc);
void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
{
unsigned int i;
+ unsigned int num_pages;
- for (i = 0; i < tt->num_pages; ) {
+ for (i = 0; i < tt->num_pages; i += num_pages) {
struct page *p = tt->pages[i];
- unsigned int order, num_pages;
+ unsigned int order;
struct ttm_pool_type *pt;
+ if (!p) {
+ num_pages = 1;
+ continue;
+ }
+
+ if (WARN_ON_ONCE(ttm_pool_page_ptr_is_swap(p)))
+ continue;
+
order = ttm_pool_page_order(pool, p);
num_pages = 1ULL << order;
if (tt->dma_address)
@@ -535,8 +729,6 @@ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
else
ttm_pool_free_page(pool, tt->caching, order,
tt->pages[i]);
-
- i += num_pages;
}
while (atomic_long_read(&allocated_pages) > page_pool_size)
@@ -544,6 +736,149 @@ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
}
EXPORT_SYMBOL(ttm_pool_free);
+void ttm_pool_release_shrunken(struct ttm_tt *tt)
+{
+ struct ttm_pool_tt_restore *restore;
+ pgoff_t i, num_pages;
+ unsigned int order;
+ swp_entry_t swap;
+
+ if (!(tt->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN))
+ return;
+
+ restore = tt->restore;
+ /* Release an interrupted populate. */
+ if (ttm_pool_restore_valid(restore)) {
+ unsigned long nr = 1UL << restore->order;
+
+ for (i = restore->restored_pages; i < nr; ++i) {
+ struct page *p = restore->old_pages[i];
+
+ if (ttm_pool_page_ptr_is_swap(p)) {
+ swap = ttm_pool_page_ptr_to_swap(p);
+ if (swap.val == 0)
+ continue;
+
+ swap_drop_folio(swap);
+ } else if (p) {
+ ttm_pool_split_for_swap(restore->pool, p);
+ __free_pages(p, 0);
+ }
+ }
+ }
+
+ /* Release remaining pages or swap entries */
+ for (i = 0; i < tt->num_pages; i += num_pages) {
+ struct page *p = tt->pages[i];
+
+ num_pages = 1;
+ if (ttm_pool_page_ptr_is_swap(p)) {
+ swap = ttm_pool_page_ptr_to_swap(p);
+ if (swap.val == 0)
+ continue;
+
+ swap_drop_folio(swap);
+ } else if (p) {
+ order = p->private;
+ __free_pages(p, order);
+ num_pages = 1UL << order;
+ }
+ }
+
+ if (restore) {
+ kvfree(restore);
+ tt->restore = NULL;
+ }
+
+ tt->page_flags &= ~(TTM_TT_FLAG_PRIV_SHRUNKEN | TTM_TT_FLAG_SWAPPED);
+}
+
+long ttm_pool_shrink_tt(struct ttm_pool *pool, struct ttm_tt *ttm)
+{
+ struct page *page;
+ struct folio *folio;
+ swp_entry_t swap;
+ gfp_t alloc_gfp;
+ gfp_t gfp;
+ int ret = 0;
+ pgoff_t shrunken = 0;
+ pgoff_t i, num_pages;
+ bool purge = ttm_tt_purgeable(ttm);
+
+ if ((!get_nr_swap_pages() && purge) ||
+ pool->use_dma_alloc ||
+ (ttm->page_flags & TTM_TT_FLAG_PRIV_SHRUNKEN))
+ return -EBUSY;
+
+#ifdef CONFIG_X86
+ /* Anything returned to the system needs to be cached. */
+ if (ttm->caching != ttm_cached)
+ set_pages_array_wb(ttm->pages, ttm->num_pages);
+#endif
+
+ if (pool->use_dma32)
+ gfp = GFP_DMA32;
+ else
+ gfp = GFP_HIGHUSER;
+
+ alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN;
+ if (current_is_kswapd())
+ alloc_gfp = __GFP_NOMEMALLOC;
+
+ if (ttm->dma_address || purge) {
+ for (i = 0; i < ttm->num_pages; i += num_pages) {
+ unsigned int order;
+
+ page = ttm->pages[i];
+ if (unlikely(!page))
+ continue;
+
+ order = 1UL << ttm_pool_page_order(pool, page);
+ num_pages = 1UL << order;
+ if (ttm->dma_address)
+ ttm_pool_unmap(pool, ttm->dma_address[i],
+ num_pages);
+ if (purge) {
+ shrunken += num_pages;
+ __free_pages(page, order);
+ memset(ttm->pages + i, 0,
+ num_pages * sizeof(*ttm->pages));
+ }
+ }
+ }
+
+ if (purge)
+ return shrunken;
+
+ for (i = 0; i < ttm->num_pages; ++i) {
+ page = ttm->pages[i];
+ if (unlikely(!page))
+ continue;
+
+ ttm_pool_split_for_swap(pool, page);
+
+ folio = page_folio(page);
+ folio_mark_dirty(folio);
+ swap = swap_backup_folio(folio, false, gfp, alloc_gfp);
+ if (swap.val) {
+ ttm->pages[i] = ttm_pool_swap_to_page_ptr(swap);
+ folio_put(folio);
+ shrunken++;
+ } else {
+ /* We allow partially shrunken tts */
+ ret = -ENOMEM;
+ break;
+ }
+ cond_resched();
+ }
+
+ if (shrunken)
+ ttm->page_flags |= (TTM_TT_FLAG_PRIV_SHRUNKEN |
+ TTM_TT_FLAG_SWAPPED);
+
+ return shrunken ? shrunken : ret;
+}
+
/**
* ttm_pool_init - Initialize a pool
*
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index ff7bad12ba0e..14a3d0a1549f 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -241,9 +241,9 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm,
ttm->caching = ttm_cached;
ttm->page_flags = page_flags;
ttm->dma_address = NULL;
- ttm->swap_storage = NULL;
ttm->sg = bo->sg;
ttm->caching = caching;
+ ttm->restore = NULL;
}
int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
@@ -268,6 +268,8 @@ void ttm_tt_fini(struct ttm_tt *ttm)
fput(ttm->swap_storage);
ttm->swap_storage = NULL;
+ ttm_pool_release_shrunken(ttm);
+
if (ttm->pages)
kvfree(ttm->pages);
else
@@ -338,6 +340,17 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
return ret;
}
+long ttm_tt_shrink(struct ttm_device *bdev, struct ttm_tt *tt)
+{
+ long ret = ttm_pool_shrink_tt(&bdev->pool, tt);
+
+ if (ret > 0)
+ tt->page_flags &= ~TTM_TT_FLAG_PRIV_POPULATED;
+
+ return ret;
+}
+EXPORT_SYMBOL(ttm_tt_shrink);
+
/**
* ttm_tt_swapout - swap out tt object
* @bdev: TTM device structure.
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index c1200552892e..bfe14138a992 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -86,6 +86,10 @@ void ttm_pool_fini(struct ttm_pool *pool);
int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
+void ttm_pool_release_shrunken(struct ttm_tt *tt);
+
+long ttm_pool_shrink_tt(struct ttm_pool *pool, struct ttm_tt *ttm);
+
int ttm_pool_mgr_init(unsigned long num_pages);
void ttm_pool_mgr_fini(void);
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index abb17527f76c..9d529e07fb8b 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -37,6 +37,7 @@ struct ttm_tt;
struct ttm_resource;
struct ttm_buffer_object;
struct ttm_operation_ctx;
+struct ttm_pool_tt_restore;
/**
* struct ttm_tt - This is a structure holding the pages, caching- and aperture
@@ -89,6 +90,7 @@ struct ttm_tt {
#define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3)
#define TTM_TT_FLAG_DONTNEED BIT(4)
+#define TTM_TT_FLAG_PRIV_SHRUNKEN BIT(30)
#define TTM_TT_FLAG_PRIV_POPULATED BIT(31)
uint32_t page_flags;
/** @num_pages: Number of pages in the page array. */
@@ -104,6 +106,8 @@ struct ttm_tt {
* ttm_caching.
*/
enum ttm_caching caching;
+ /** @restore: Swap restore state. Drivers keep off. */
+ struct ttm_pool_tt_restore *restore;
};
/**
@@ -226,6 +230,8 @@ void ttm_tt_mgr_fini(void);
struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt,
struct ttm_tt *tt);
+long ttm_tt_shrink(struct ttm_device *bdev, struct ttm_tt *tt);
+
/**
* ttm_tt_purgeable() - Whether a struct ttm_tt's contents is purgeable
* @tt: The struct ttm_tt to consider.
--
2.34.1
More information about the Intel-gfx-trybot
mailing list