[PATCH] drm: Add support for two-ended allocation

Lauri Kasanen cand at gmx.com
Mon Mar 31 08:00:49 PDT 2014


Clients like i915 need to segregate cache domains within the GTT which
can lead to small amounts of fragmentation. By allocating the uncached
buffers from the bottom and the cacheable buffers from the top, we can
reduce the amount of wasted space and also optimize allocation of the
mappable portion of the GTT to only those buffers that require CPU
access through the GTT.

For other drivers, allocating small bos from one end and large ones
from the other helps improve the quality of fragmentation.

Based on drm_mm work by Chris Wilson.

--

Radeon uses a 512kb threshold.

This decreases eviction by up to 20%, by improving the fragmentation
quality. No harm in normal cases that fit VRAM fully (PTS gaming suite).

In some cases, even the VRAM-fitting cases improved slightly (openarena, urban terror).

512kb was measured as the most optimal threshold for 3d workloads common to radeon.
Other drivers may need different thresholds according to their workloads.

Signed-off-by: Lauri Kasanen <cand at gmx.com>
---
 drivers/gpu/drm/ast/ast_ttm.c         |  3 +-
 drivers/gpu/drm/bochs/bochs_mm.c      |  3 +-
 drivers/gpu/drm/cirrus/cirrus_ttm.c   |  3 +-
 drivers/gpu/drm/drm_mm.c              | 56 +++++++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem.c       |  3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  3 +-
 drivers/gpu/drm/mgag200/mgag200_ttm.c |  3 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c |  2 +-
 drivers/gpu/drm/qxl/qxl_ttm.c         |  2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c   |  3 +-
 drivers/gpu/drm/ttm/ttm_bo.c          |  4 ++-
 drivers/gpu/drm/ttm/ttm_bo_manager.c  | 16 ++++++++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |  2 +-
 include/drm/drm_mm.h                  | 29 +++++++++++++++---
 include/drm/ttm/ttm_bo_driver.h       |  7 ++++-
 15 files changed, 105 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index b824622..61f9e39 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -262,7 +262,8 @@ int ast_mm_init(struct ast_private *ast)
 				 &ast_bo_driver,
 				 dev->anon_inode->i_mapping,
 				 DRM_FILE_PAGE_OFFSET,
-				 true);
+				 true,
+				 0);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index f488be5..9dfd24a 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -228,7 +228,8 @@ int bochs_mm_init(struct bochs_device *bochs)
 				 &bochs_bo_driver,
 				 bochs->dev->anon_inode->i_mapping,
 				 DRM_FILE_PAGE_OFFSET,
-				 true);
+				 true,
+				 0);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 92e6b77..74e8e21 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -262,7 +262,8 @@ int cirrus_mm_init(struct cirrus_device *cirrus)
 				 &cirrus_bo_driver,
 				 dev->anon_inode->i_mapping,
 				 DRM_FILE_PAGE_OFFSET,
-				 true);
+				 true,
+				 0);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a2d45b74..1728bcc 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -102,7 +102,8 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 				 struct drm_mm_node *node,
 				 unsigned long size, unsigned alignment,
-				 unsigned long color)
+				 unsigned long color,
+				 enum drm_mm_allocator_flags flags)
 {
 	struct drm_mm *mm = hole_node->mm;
 	unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -115,12 +116,22 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
 
+	if (flags & DRM_MM_CREATE_TOP)
+		adj_start = adj_end - size;
+
 	if (alignment) {
 		unsigned tmp = adj_start % alignment;
-		if (tmp)
-			adj_start += alignment - tmp;
+		if (tmp) {
+			if (flags & DRM_MM_CREATE_TOP)
+				adj_start -= tmp;
+			else
+				adj_start += alignment - tmp;
+		}
 	}
 
+	BUG_ON(adj_start < hole_start);
+	BUG_ON(adj_end > hole_end);
+
 	if (adj_start == hole_start) {
 		hole_node->hole_follows = 0;
 		list_del(&hole_node->hole_stack);
@@ -215,16 +226,17 @@ EXPORT_SYMBOL(drm_mm_reserve_node);
 int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
 			       unsigned long size, unsigned alignment,
 			       unsigned long color,
-			       enum drm_mm_search_flags flags)
+			       enum drm_mm_search_flags sflags,
+			       enum drm_mm_allocator_flags aflags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_generic(mm, size, alignment,
-					       color, flags);
+					       color, sflags);
 	if (!hole_node)
 		return -ENOSPC;
 
-	drm_mm_insert_helper(hole_node, node, size, alignment, color);
+	drm_mm_insert_helper(hole_node, node, size, alignment, color, aflags);
 	return 0;
 }
 EXPORT_SYMBOL(drm_mm_insert_node_generic);
@@ -233,7 +245,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 				       struct drm_mm_node *node,
 				       unsigned long size, unsigned alignment,
 				       unsigned long color,
-				       unsigned long start, unsigned long end)
+				       unsigned long start, unsigned long end,
+				       enum drm_mm_allocator_flags flags)
 {
 	struct drm_mm *mm = hole_node->mm;
 	unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -248,13 +261,20 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 	if (adj_end > end)
 		adj_end = end;
 
+	if (flags & DRM_MM_CREATE_TOP)
+		adj_start = adj_end - size;
+
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
 
 	if (alignment) {
 		unsigned tmp = adj_start % alignment;
-		if (tmp)
-			adj_start += alignment - tmp;
+		if (tmp) {
+			if (flags & DRM_MM_CREATE_TOP)
+				adj_start -= tmp;
+			else
+				adj_start += alignment - tmp;
+		}
 	}
 
 	if (adj_start == hole_start) {
@@ -271,6 +291,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 	INIT_LIST_HEAD(&node->hole_stack);
 	list_add(&node->node_list, &hole_node->node_list);
 
+	BUG_ON(node->start < start);
+	BUG_ON(node->start < adj_start);
 	BUG_ON(node->start + node->size > adj_end);
 	BUG_ON(node->start + node->size > end);
 
@@ -298,21 +320,23 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
-					unsigned long size, unsigned alignment, unsigned long color,
+					unsigned long size, unsigned alignment,
+					unsigned long color,
 					unsigned long start, unsigned long end,
-					enum drm_mm_search_flags flags)
+					enum drm_mm_search_flags sflags,
+					enum drm_mm_allocator_flags aflags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_in_range_generic(mm,
 							size, alignment, color,
-							start, end, flags);
+							start, end, sflags);
 	if (!hole_node)
 		return -ENOSPC;
 
 	drm_mm_insert_helper_range(hole_node, node,
 				   size, alignment, color,
-				   start, end);
+				   start, end, aflags);
 	return 0;
 }
 EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
@@ -391,7 +415,8 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+	__drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+			       flags & DRM_MM_SEARCH_BELOW) {
 		if (mm->color_adjust) {
 			mm->color_adjust(entry, color, &adj_start, &adj_end);
 			if (adj_end <= adj_start)
@@ -432,7 +457,8 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
 	best = NULL;
 	best_size = ~0UL;
 
-	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+	__drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+			       flags & DRM_MM_SEARCH_BELOW) {
 		if (adj_start < start)
 			adj_start = start;
 		if (adj_end > end)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9c52f68..489bc33 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3270,7 +3270,8 @@ search_free:
 	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
 						  size, alignment,
 						  obj->cache_level, 0, gtt_max,
-						  DRM_MM_SEARCH_DEFAULT);
+						  DRM_MM_SEARCH_DEFAULT,
+						  DRM_MM_CREATE_DEFAULT);
 	if (ret) {
 		ret = i915_gem_evict_something(dev, vm, size, alignment,
 					       obj->cache_level, flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 63a6dc7..dd72d35 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1072,7 +1072,8 @@ alloc:
 						  &ppgtt->node, GEN6_PD_SIZE,
 						  GEN6_PD_ALIGN, 0,
 						  0, dev_priv->gtt.base.total,
-						  DRM_MM_SEARCH_DEFAULT);
+						  DRM_MM_SEARCH_DEFAULT,
+						  DRM_MM_CREATE_DEFAULT);
 	if (ret == -ENOSPC && !retried) {
 		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 5a00e90..6844b24 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -262,7 +262,8 @@ int mgag200_mm_init(struct mga_device *mdev)
 				 &mgag200_bo_driver,
 				 dev->anon_inode->i_mapping,
 				 DRM_FILE_PAGE_OFFSET,
-				 true);
+				 true,
+				 0);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index ab0228f..3fef97c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -384,7 +384,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 				  &nouveau_bo_driver,
 				  dev->anon_inode->i_mapping,
 				  DRM_FILE_PAGE_OFFSET,
-				  bits <= 32 ? true : false);
+				  bits <= 32 ? true : false, 0);
 	if (ret) {
 		NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 29c02e0..8401a00 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -495,7 +495,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
 			       qdev->mman.bo_global_ref.ref.object,
 			       &qxl_bo_driver,
 			       qdev->ddev->anon_inode->i_mapping,
-			       DRM_FILE_PAGE_OFFSET, 0);
+			       DRM_FILE_PAGE_OFFSET, 0, 0);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index c8a8a51..1aef339 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -710,7 +710,8 @@ int radeon_ttm_init(struct radeon_device *rdev)
 			       &radeon_bo_driver,
 			       rdev->ddev->anon_inode->i_mapping,
 			       DRM_FILE_PAGE_OFFSET,
-			       rdev->need_dma32);
+			       rdev->need_dma32,
+			       512 * 1024);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9df79ac..caf7cd3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1453,7 +1453,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		       struct ttm_bo_driver *driver,
 		       struct address_space *mapping,
 		       uint64_t file_page_offset,
-		       bool need_dma32)
+		       bool need_dma32,
+		       uint32_t alloc_threshold)
 {
 	int ret = -EINVAL;
 
@@ -1476,6 +1477,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	bdev->dev_mapping = mapping;
 	bdev->glob = glob;
 	bdev->need_dma32 = need_dma32;
+	bdev->alloc_threshold = alloc_threshold;
 	bdev->val_seq = 0;
 	spin_lock_init(&bdev->fence_lock);
 	mutex_lock(&glob->device_list_mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index c58eba33..db9fcb4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -55,6 +55,7 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
 	struct drm_mm *mm = &rman->mm;
 	struct drm_mm_node *node = NULL;
+	enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
 	unsigned long lpfn;
 	int ret;
 
@@ -65,12 +66,21 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node)
 		return -ENOMEM;
+	/**
+	 * If the driver requested a threshold, use two-ended allocation.
+	 * Pinned buffers require bottom-up allocation.
+	 */
+	if (man->bdev->alloc_threshold &&
+		!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT) &&
+		man->bdev->alloc_threshold < (mem->num_pages * PAGE_SIZE))
+		aflags = DRM_MM_CREATE_TOP;
 
 	spin_lock(&rman->lock);
-	ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
-					  mem->page_alignment,
+	ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages,
+					  mem->page_alignment, 0,
 					  placement->fpfn, lpfn,
-					  DRM_MM_SEARCH_BEST);
+					  DRM_MM_SEARCH_BEST,
+					  aflags);
 	spin_unlock(&rman->lock);
 
 	if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c700958..59a50dd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -725,7 +725,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 				 &vmw_bo_driver,
 				 dev->anon_inode->i_mapping,
 				 VMWGFX_FILE_PAGE_OFFSET,
-				 false);
+				 false, 0);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
 		goto out_err1;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 8b6981a..f2bc79b 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -47,8 +47,17 @@
 enum drm_mm_search_flags {
 	DRM_MM_SEARCH_DEFAULT =		0,
 	DRM_MM_SEARCH_BEST =		1 << 0,
+	DRM_MM_SEARCH_BELOW =		1 << 1,
 };
 
+enum drm_mm_allocator_flags {
+	DRM_MM_CREATE_DEFAULT =		0,
+	DRM_MM_CREATE_TOP =		1 << 0,
+};
+
+#define DRM_MM_BOTTOMUP DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT
+#define DRM_MM_TOPDOWN DRM_MM_SEARCH_BELOW, DRM_MM_CREATE_TOP
+
 struct drm_mm_node {
 	struct list_head node_list;
 	struct list_head hole_stack;
@@ -195,6 +204,14 @@ static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
 	     1 : 0; \
 	     entry = list_entry(entry->hole_stack.next, struct drm_mm_node, hole_stack))
 
+#define __drm_mm_for_each_hole(entry, mm, hole_start, hole_end, backwards) \
+	for (entry = list_entry((backwards) ? (mm)->hole_stack.prev : (mm)->hole_stack.next, struct drm_mm_node, hole_stack); \
+	     &entry->hole_stack != &(mm)->hole_stack ? \
+	     hole_start = drm_mm_hole_node_start(entry), \
+	     hole_end = drm_mm_hole_node_end(entry), \
+	     1 : 0; \
+	     entry = list_entry((backwards) ? entry->hole_stack.prev : entry->hole_stack.next, struct drm_mm_node, hole_stack))
+
 /*
  * Basic range manager support (drm_mm.c)
  */
@@ -205,7 +222,8 @@ int drm_mm_insert_node_generic(struct drm_mm *mm,
 			       unsigned long size,
 			       unsigned alignment,
 			       unsigned long color,
-			       enum drm_mm_search_flags flags);
+			       enum drm_mm_search_flags sflags,
+			       enum drm_mm_allocator_flags aflags);
 /**
  * drm_mm_insert_node - search for space and insert @node
  * @mm: drm_mm to allocate from
@@ -228,7 +246,8 @@ static inline int drm_mm_insert_node(struct drm_mm *mm,
 				     unsigned alignment,
 				     enum drm_mm_search_flags flags)
 {
-	return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags);
+	return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags,
+					  DRM_MM_CREATE_DEFAULT);
 }
 
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
@@ -238,7 +257,8 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
 					unsigned long color,
 					unsigned long start,
 					unsigned long end,
-					enum drm_mm_search_flags flags);
+					enum drm_mm_search_flags sflags,
+					enum drm_mm_allocator_flags aflags);
 /**
  * drm_mm_insert_node_in_range - ranged search for space and insert @node
  * @mm: drm_mm to allocate from
@@ -266,7 +286,8 @@ static inline int drm_mm_insert_node_in_range(struct drm_mm *mm,
 					      enum drm_mm_search_flags flags)
 {
 	return drm_mm_insert_node_in_range_generic(mm, node, size, alignment,
-						   0, start, end, flags);
+						   0, start, end, flags,
+						   DRM_MM_CREATE_DEFAULT);
 }
 
 void drm_mm_remove_node(struct drm_mm_node *node);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 5d8aabe..f5fe6df 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -565,6 +565,7 @@ struct ttm_bo_device {
 	struct delayed_work wq;
 
 	bool need_dma32;
+	uint32_t alloc_threshold;
 };
 
 /**
@@ -751,6 +752,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
  * @file_page_offset: Offset into the device address space that is available
  * for buffer data. This ensures compatibility with other users of the
  * address space.
+ * @alloc_threshold: If non-zero, use this as the threshold for two-ended
+ * allocation.
  *
  * Initializes a struct ttm_bo_device:
  * Returns:
@@ -760,7 +763,9 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
 			      struct ttm_bo_global *glob,
 			      struct ttm_bo_driver *driver,
 			      struct address_space *mapping,
-			      uint64_t file_page_offset, bool need_dma32);
+			      uint64_t file_page_offset,
+			      bool need_dma32,
+			      uint32_t alloc_threshold);
 
 /**
  * ttm_bo_unmap_virtual
-- 
1.8.3.1



More information about the dri-devel mailing list