[Intel-gfx] [PATCH 13/49] drm/i915: Split batch pool into size buckets

Fri Mar 27 04:01:45 PDT 2015

Now with the trimmed memcpy before the command parser, we try to
allocate many different sizes of batches, predominantly one or two
pages. We can therefore speed up searching for a good sized batch by
keeping the objects of buckets of roughly the same size.

v2: Add a comment about bucket sizes

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 46 ++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_drv.h            |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            |  2 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 49 +++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_batch_pool.h |  2 +-
 5 files changed, 64 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1c803ffe4a3b..b37b7c2ae5e2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -378,15 +378,17 @@ static void print_batch_pool_stats(struct seq_file *m,
 	struct drm_i915_gem_object *obj;
 	struct file_stats stats;
 	struct intel_engine_cs *ring;
-	int i;
+	int i, j;
 
 	memset(&stats, 0, sizeof(stats));
 
 	for_each_ring(ring, dev_priv, i) {
-		list_for_each_entry(obj,
-				    &ring->batch_pool.cache_list,
-				    batch_pool_list)
-			per_file_stats(0, obj, &stats);
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				per_file_stats(0, obj, &stats);
+		}
 	}
 
 	print_file_stats(m, "batch pool", stats);
@@ -618,26 +620,38 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	struct intel_engine_cs *ring;
-	int count = 0;
-	int ret, i;
+	int total = 0;
+	int ret, i, j;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
 	for_each_ring(ring, dev_priv, i) {
-		seq_printf(m, "%s cache:\n", ring->name);
-		list_for_each_entry(obj,
-				    &ring->batch_pool.cache_list,
-				    batch_pool_list) {
-			seq_puts(m, "   ");
-			describe_obj(m, obj);
-			seq_putc(m, '\n');
-			count++;
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			int count;
+
+			count = 0;
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				count++;
+			seq_printf(m, "%s cache[%d]: %d objects\n",
+				   ring->name, j, count);
+
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link) {
+				seq_puts(m, "   ");
+				describe_obj(m, obj);
+				seq_putc(m, '\n');
+			}
+
+			total += count;
 		}
 	}
 
-	seq_printf(m, "total: %d\n", count);
+	seq_printf(m, "total: %d\n", total);
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 84dcf391b244..896aae1c10ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1909,7 +1909,7 @@ struct drm_i915_gem_object {
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
-	struct list_head batch_pool_list;
+	struct list_head batch_pool_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a99e434126ba..413b0e78b161 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4413,7 +4413,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
-	INIT_LIST_HEAD(&obj->batch_pool_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 1287abf55b84..7bf2f3f2968e 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -47,8 +47,12 @@
 void i915_gem_batch_pool_init(struct drm_device *dev,
 			      struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	pool->dev = dev;
-	INIT_LIST_HEAD(&pool->cache_list);
+
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+		INIT_LIST_HEAD(&pool->cache_list[n]);
 }
 
 /**
@@ -59,16 +63,20 @@ void i915_gem_batch_pool_init(struct drm_device *dev,
  */
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	while (!list_empty(&pool->cache_list)) {
-		struct drm_i915_gem_object *obj =
-			list_first_entry(&pool->cache_list,
-					 struct drm_i915_gem_object,
-					 batch_pool_list);
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		while (!list_empty(&pool->cache_list[n])) {
+			struct drm_i915_gem_object *obj =
+				list_first_entry(&pool->cache_list[n],
+						 struct drm_i915_gem_object,
+						 batch_pool_link);
 
-		list_del(&obj->batch_pool_list);
-		drm_gem_object_unreference(&obj->base);
+			list_del(&obj->batch_pool_link);
+			drm_gem_object_unreference(&obj->base);
+		}
 	}
 }
 
@@ -91,28 +99,33 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 {
 	struct drm_i915_gem_object *obj = NULL;
 	struct drm_i915_gem_object *tmp, *next;
+	struct list_head *list;
+	int n;
 
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	list_for_each_entry_safe(tmp, next,
-				 &pool->cache_list, batch_pool_list) {
+	/* Compute a power-of-two bucket, but throw everything greater than
+	 * 16KiB into the same bucket: i.e. the the buckets hold objects of
+	 * (1 page, 2 pages, 4 pages, 8+ pages).
+	 */
+	n = fls(size >> PAGE_SHIFT) - 1;
+	if (n >= ARRAY_SIZE(pool->cache_list))
+		n = ARRAY_SIZE(pool->cache_list) - 1;
+	list = &pool->cache_list[n];
+
+	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
 		if (tmp->active)
 			break;
 
 		/* While we're looping, do some clean up */
 		if (tmp->madv == __I915_MADV_PURGED) {
-			list_del(&tmp->batch_pool_list);
+			list_del(&tmp->batch_pool_link);
 			drm_gem_object_unreference(&tmp->base);
 			continue;
 		}
 
-		/*
-		 * Select a buffer that is at least as big as needed
-		 * but not 'too much' bigger. A better way to do this
-		 * might be to bucket the pool objects based on size.
-		 */
-		if (tmp->base.size >= size && tmp->base.size <= 2 * size) {
+		if (tmp->base.size >= size) {
 			obj = tmp;
 			break;
 		}
@@ -132,7 +145,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 		obj->madv = I915_MADV_DONTNEED;
 	}
 
-	list_move_tail(&obj->batch_pool_list, &pool->cache_list);
+	list_move_tail(&obj->batch_pool_link, list);
 	i915_gem_object_pin_pages(obj);
 	return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 5ed70ef6a887..848e90703eed 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -29,7 +29,7 @@
 
 struct i915_gem_batch_pool {
 	struct drm_device *dev;
-	struct list_head cache_list;
+	struct list_head cache_list[4];
 };
 
 /* i915_gem_batch_pool.c */
-- 
2.1.4