[PATCH 1/3] drm/ttm: rework ttm_tt page limit

Tue Nov 17 14:06:13 UTC 2020

TTM implements a rather extensive accounting of allocated memory.

There are two reasons for this:
1. It tries to block userspace allocating a huge number of very small
   BOs without accounting for the kmalloced memory.

2. Make sure we don't over allocate and run into an OOM situation
   during swapout while trying to handle the memory shortage.

This is only partially a good idea. First of all it is perfectly
valid for an application to use all of system memory, limiting it to
50% is not really acceptable.

What we need to take care of is that the application is held
accountable for the memory it allocated. This is what control
mechanisms like memcg and the normal Linux page accounting already do.

Making sure that we don't run into an OOM situation while trying to
cope with a memory shortage is still a good idea, but this is also
not very well implemented since it means another opportunity of
recursion from the driver back into TTM.

So start to rework all of this by accounting the memory before
allocating the pages and making sure that we never go over a certain
limit pages locked by TTM in the ttm_tt objects.

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/ttm/ttm_memory.c | 17 ++++++++-
 drivers/gpu/drm/ttm/ttm_tt.c     | 64 +++++++++++++++++++++++++++++---
 include/drm/ttm/ttm_tt.h         |  2 +
 3 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index 5ed1fc8f2ace..c53136048630 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -38,6 +38,7 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <drm/ttm/ttm_pool.h>
+#include <drm/ttm/ttm_tt.h>
 
 #define TTM_MEMORY_ALLOC_RETRIES 4
 
@@ -414,10 +415,11 @@ static int ttm_mem_init_dma32_zone(struct ttm_mem_global *glob,
 
 int ttm_mem_global_init(struct ttm_mem_global *glob)
 {
+	uint64_t num_pages, num_dma32_pages;
+	struct ttm_mem_zone *zone;
 	struct sysinfo si;
 	int ret;
 	int i;
-	struct ttm_mem_zone *zone;
 
 	spin_lock_init(&glob->lock);
 	glob->swap_queue = create_singlethread_workqueue("ttm_swap");
@@ -452,6 +454,19 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
 			zone->name, (unsigned long long)zone->max_mem >> 10);
 	}
 	ttm_pool_mgr_init(glob->zone_kernel->max_mem/(2*PAGE_SIZE));
+
+	/* We generally allow TTM to allocate a maximum of 50% of
+	 * the available system memory.
+	 */
+	num_pages = (u64)si.totalram * si.mem_unit;
+	num_pages = (num_pages * 50 / 100) >> PAGE_SHIFT;
+
+	/* But for DMA32 we limit ourself to only use 2GiB maximum. */
+	num_dma32_pages = (u64)(si.totalram - si.totalhigh) * si.mem_unit;
+	num_dma32_pages = min(num_dma32_pages, 2ULL << 30) >> PAGE_SHIFT;
+
+	ttm_tt_mgr_init(num_pages, num_dma32_pages);
+
 	return 0;
 out_no_zone:
 	ttm_mem_global_release(glob);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index cfd633c7e764..c7b71c9861a6 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -38,6 +38,19 @@
 #include <drm/drm_cache.h>
 #include <drm/ttm/ttm_bo_driver.h>
 
+static unsigned long ttm_pages_limit;
+
+MODULE_PARM_DESC(pages_limit, "Limit for the allocated pages");
+module_param_named(pages_limit, ttm_pages_limit, ulong, 0644);
+
+static unsigned long ttm_dma32_pages_limit;
+
+MODULE_PARM_DESC(dma32_pages_limit, "Limit for the allocated DMA32 pages");
+module_param_named(dma32_pages_limit, ttm_dma32_pages_limit, ulong, 0644);
+
+static atomic_long_t ttm_pages_allocated;
+static atomic_long_t ttm_dma32_pages_allocated;
+
 /**
  * Allocates a ttm structure for the given BO.
  */
@@ -295,8 +308,8 @@ static void ttm_tt_add_mapping(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
 		ttm->pages[i]->mapping = bdev->dev_mapping;
 }
 
-int ttm_tt_populate(struct ttm_bo_device *bdev,
-		    struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
+int ttm_tt_populate(struct ttm_bo_device *bdev, struct ttm_tt *ttm,
+		    struct ttm_operation_ctx *ctx)
 {
 	int ret;
 
@@ -306,12 +319,25 @@ int ttm_tt_populate(struct ttm_bo_device *bdev,
 	if (ttm_tt_is_populated(ttm))
 		return 0;
 
+	atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
+	if (bdev->pool.use_dma32)
+		atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated);
+
+	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
+	       atomic_long_read(&ttm_dma32_pages_allocated) >
+	       ttm_dma32_pages_limit) {
+
+		ret = ttm_bo_swapout(ctx);
+		if (ret)
+			goto error;
+	}
+
 	if (bdev->driver->ttm_tt_populate)
 		ret = bdev->driver->ttm_tt_populate(bdev, ttm, ctx);
 	else
 		ret = ttm_pool_alloc(&bdev->pool, ttm, ctx);
 	if (ret)
-		return ret;
+		goto error;
 
 	ttm_tt_add_mapping(bdev, ttm);
 	ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
@@ -324,6 +350,12 @@ int ttm_tt_populate(struct ttm_bo_device *bdev,
 	}
 
 	return 0;
+
+error:
+	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
+	if (bdev->pool.use_dma32)
+		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
+	return ret;
 }
 EXPORT_SYMBOL(ttm_tt_populate);
 
@@ -341,8 +373,7 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
 	}
 }
 
-void ttm_tt_unpopulate(struct ttm_bo_device *bdev,
-		       struct ttm_tt *ttm)
+void ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm)
 {
 	if (!ttm_tt_is_populated(ttm))
 		return;
@@ -352,5 +383,28 @@ void ttm_tt_unpopulate(struct ttm_bo_device *bdev,
 		bdev->driver->ttm_tt_unpopulate(bdev, ttm);
 	else
 		ttm_pool_free(&bdev->pool, ttm);
+
+	atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
+	if (bdev->pool.use_dma32)
+		atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated);
+
 	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
 }
+
+/**
+ * ttm_tt_mgr_init - set defaults for the number of pages
+ *
+ * @num_pages: global limit of system pages to use
+ * @num_dma32_pages: global limit of DMA32 pages to use
+ *
+ * Set reasonable defaults for the number of allocated pages.
+ */
+void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages)
+{
+	if (!ttm_pages_limit)
+		ttm_pages_limit = num_pages;
+
+	if (!ttm_dma32_pages_limit)
+		ttm_dma32_pages_limit = num_dma32_pages;
+
+}
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index da27e9d8fa64..05ca43f13884 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -157,6 +157,8 @@ int ttm_tt_populate(struct ttm_bo_device *bdev, struct ttm_tt *ttm, struct ttm_o
  */
 void ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm);
 
+void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages);
+
 #if IS_ENABLED(CONFIG_AGP)
 #include <linux/agp_backend.h>
 
-- 
2.25.1