[Intel-gfx] [PATCH 5/6] drm/i915: Split batch pool into size buckets
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Mar 19 10:35:35 PDT 2015
On 03/09/2015 09:55 AM, Chris Wilson wrote:
> Now with the trimmed memcpy before the command parser, we try to
> allocate many different sizes of batches, predominantly one or two
> pages. We can therefore speed up searching for a good sized batch by
> keeping the objects of buckets of roughly the same size.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 46 +++++++++++++++++++-----------
> drivers/gpu/drm/i915/i915_drv.h | 2 +-
> drivers/gpu/drm/i915/i915_gem.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_batch_pool.c | 45 +++++++++++++++++------------
> drivers/gpu/drm/i915/i915_gem_batch_pool.h | 2 +-
> 5 files changed, 60 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d8ca5c89647c..042ad2fec484 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -378,15 +378,17 @@ static void print_batch_pool_stats(struct seq_file *m,
> struct drm_i915_gem_object *obj;
> struct file_stats stats;
> struct intel_engine_cs *ring;
> - int i;
> + int i, j;
>
> memset(&stats, 0, sizeof(stats));
>
> for_each_ring(ring, dev_priv, i) {
> - list_for_each_entry(obj,
> - &ring->batch_pool.cache_list,
> - batch_pool_list)
> - per_file_stats(0, obj, &stats);
> + for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
> + list_for_each_entry(obj,
> + &ring->batch_pool.cache_list[j],
> + batch_pool_link)
> + per_file_stats(0, obj, &stats);
> + }
> }
>
> print_file_stats(m, "batch pool", stats);
> @@ -624,26 +626,38 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct drm_i915_gem_object *obj;
> struct intel_engine_cs *ring;
> - int count = 0;
> - int ret, i;
> + int total = 0;
> + int ret, i, j;
>
> ret = mutex_lock_interruptible(&dev->struct_mutex);
> if (ret)
> return ret;
>
> for_each_ring(ring, dev_priv, i) {
> - seq_printf(m, "%s cache:\n", ring->name);
> - list_for_each_entry(obj,
> - &ring->batch_pool.cache_list,
> - batch_pool_list) {
> - seq_puts(m, " ");
> - describe_obj(m, obj);
> - seq_putc(m, '\n');
> - count++;
> + for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
> + int count;
> +
> + count = 0;
> + list_for_each_entry(obj,
> + &ring->batch_pool.cache_list[j],
> + batch_pool_link)
> + count++;
> + seq_printf(m, "%s cache[%d]: %d objects\n",
> + ring->name, j, count);
> +
> + list_for_each_entry(obj,
> + &ring->batch_pool.cache_list[j],
> + batch_pool_link) {
> + seq_puts(m, " ");
> + describe_obj(m, obj);
> + seq_putc(m, '\n');
> + }
> +
> + total += count;
> }
> }
>
> - seq_printf(m, "total: %d\n", count);
> + seq_printf(m, "total: %d\n", total);
>
> mutex_unlock(&dev->struct_mutex);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9fe1274cfe59..5c7b89bd138d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1893,7 +1893,7 @@ struct drm_i915_gem_object {
> /** Used in execbuf to temporarily hold a ref */
> struct list_head obj_exec_link;
>
> - struct list_head batch_pool_list;
> + struct list_head batch_pool_link;
>
> /**
> * This is set if the object is on the active lists (has pending
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index efb5545251c7..a0c35f80836c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4439,7 +4439,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
> INIT_LIST_HEAD(&obj->ring_list);
> INIT_LIST_HEAD(&obj->obj_exec_link);
> INIT_LIST_HEAD(&obj->vma_list);
> - INIT_LIST_HEAD(&obj->batch_pool_list);
> + INIT_LIST_HEAD(&obj->batch_pool_link);
>
> obj->ops = ops;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 1287abf55b84..23374352d8eb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -47,8 +47,12 @@
> void i915_gem_batch_pool_init(struct drm_device *dev,
> struct i915_gem_batch_pool *pool)
> {
> + int n;
> +
> pool->dev = dev;
> - INIT_LIST_HEAD(&pool->cache_list);
> +
> + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
> + INIT_LIST_HEAD(&pool->cache_list[n]);
> }
>
> /**
> @@ -59,16 +63,20 @@ void i915_gem_batch_pool_init(struct drm_device *dev,
> */
> void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
> {
> + int n;
> +
> WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
>
> - while (!list_empty(&pool->cache_list)) {
> - struct drm_i915_gem_object *obj =
> - list_first_entry(&pool->cache_list,
> - struct drm_i915_gem_object,
> - batch_pool_list);
> + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
> + while (!list_empty(&pool->cache_list[n])) {
> + struct drm_i915_gem_object *obj =
> + list_first_entry(&pool->cache_list[n],
> + struct drm_i915_gem_object,
> + batch_pool_link);
>
> - list_del(&obj->batch_pool_list);
> - drm_gem_object_unreference(&obj->base);
> + list_del(&obj->batch_pool_link);
> + drm_gem_object_unreference(&obj->base);
> + }
> }
> }
>
> @@ -91,28 +99,29 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
> {
> struct drm_i915_gem_object *obj = NULL;
> struct drm_i915_gem_object *tmp, *next;
> + struct list_head *list;
> + int n;
>
> WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
>
> - list_for_each_entry_safe(tmp, next,
> - &pool->cache_list, batch_pool_list) {
> + n = fls(size >> PAGE_SHIFT) - 1;
Is this better in some way that size / PAGE_SIZE - 1? Or I went from
trusting compiler smartness to little to too much? :)
> + if (n >= ARRAY_SIZE(pool->cache_list))
> + n = ARRAY_SIZE(pool->cache_list) - 1;
> + list = &pool->cache_list[n];
> +
> + list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
> /* The batches are strictly LRU ordered */
> if (tmp->active)
> break;
>
> /* While we're looping, do some clean up */
> if (tmp->madv == __I915_MADV_PURGED) {
> - list_del(&tmp->batch_pool_list);
> + list_del(&tmp->batch_pool_link);
> drm_gem_object_unreference(&tmp->base);
> continue;
> }
>
> - /*
> - * Select a buffer that is at least as big as needed
> - * but not 'too much' bigger. A better way to do this
> - * might be to bucket the pool objects based on size.
> - */
> - if (tmp->base.size >= size && tmp->base.size <= 2 * size) {
> + if (tmp->base.size >= size) {
> obj = tmp;
> break;
> }
> @@ -132,7 +141,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
> obj->madv = I915_MADV_DONTNEED;
> }
>
> - list_move_tail(&obj->batch_pool_list, &pool->cache_list);
> + list_move_tail(&obj->batch_pool_link, list);
> i915_gem_object_pin_pages(obj);
> return obj;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> index 5ed70ef6a887..848e90703eed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> @@ -29,7 +29,7 @@
>
> struct i915_gem_batch_pool {
> struct drm_device *dev;
> - struct list_head cache_list;
> + struct list_head cache_list[4];
> };
>
> /* i915_gem_batch_pool.c */
>
Pretty straightforward and trusting you on bucket sizes,
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list