[Intel-gfx] [PATCH 3/6] drm/i915: Split the batch pool by engine

Wed Mar 18 09:51:58 PDT 2015

On 03/09/2015 09:55 AM, Chris Wilson wrote:
> I woke up one morning and found 50k objects sitting in the batch pool
> and every search seemed to iterate the entire list... Painting the
> screen in oils would provide a more fluid display.
>
> One issue with the current design is that we only check for retirements
> on the current ring when preparing to submit a new batch. This means
> that we can have thousands of "active" batches on another ring that we
> have to walk over. The simplest way to avoid that is to split the pools
> per ring and then our LRU execution ordering will also ensure that the
> inactive buffers remain at the front.

Is the retire work handler not keeping up? Regularly or under some 
specific circumstances?

> v2: execlists still requires duplicate code.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 33 ++++++++++++++++++------------
>   drivers/gpu/drm/i915/i915_dma.c            |  1 -
>   drivers/gpu/drm/i915/i915_drv.h            |  8 --------
>   drivers/gpu/drm/i915/i915_gem.c            |  2 --
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c |  3 ++-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +--
>   drivers/gpu/drm/i915/intel_lrc.c           |  1 +
>   drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 ++
>   drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++++++++
>   9 files changed, 34 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d2e6475b0466..d8ca5c89647c 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -377,13 +377,17 @@ static void print_batch_pool_stats(struct seq_file *m,
>   {
>   	struct drm_i915_gem_object *obj;
>   	struct file_stats stats;
> +	struct intel_engine_cs *ring;
> +	int i;
>
>   	memset(&stats, 0, sizeof(stats));
>
> -	list_for_each_entry(obj,
> -			    &dev_priv->mm.batch_pool.cache_list,
> -			    batch_pool_list)
> -		per_file_stats(0, obj, &stats);
> +	for_each_ring(ring, dev_priv, i) {
> +		list_for_each_entry(obj,
> +				    &ring->batch_pool.cache_list,
> +				    batch_pool_list)
> +			per_file_stats(0, obj, &stats);
> +	}
>
>   	print_file_stats(m, "batch pool", stats);
>   }
> @@ -619,21 +623,24 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
>   	struct drm_device *dev = node->minor->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct drm_i915_gem_object *obj;
> +	struct intel_engine_cs *ring;
>   	int count = 0;
> -	int ret;
> +	int ret, i;
>
>   	ret = mutex_lock_interruptible(&dev->struct_mutex);
>   	if (ret)
>   		return ret;
>
> -	seq_puts(m, "cache:\n");
> -	list_for_each_entry(obj,
> -			    &dev_priv->mm.batch_pool.cache_list,
> -			    batch_pool_list) {
> -		seq_puts(m, "   ");
> -		describe_obj(m, obj);
> -		seq_putc(m, '\n');
> -		count++;
> +	for_each_ring(ring, dev_priv, i) {
> +		seq_printf(m, "%s cache:\n", ring->name);
> +		list_for_each_entry(obj,
> +				    &ring->batch_pool.cache_list,
> +				    batch_pool_list) {
> +			seq_puts(m, "   ");
> +			describe_obj(m, obj);
> +			seq_putc(m, '\n');
> +			count++;
> +		}
>   	}
>
>   	seq_printf(m, "total: %d\n", count);
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 8e914303b831..70e050ac02b4 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1062,7 +1062,6 @@ int i915_driver_unload(struct drm_device *dev)
>
>   	mutex_lock(&dev->struct_mutex);
>   	i915_gem_cleanup_ringbuffer(dev);
> -	i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
>   	i915_gem_context_fini(dev);
>   	mutex_unlock(&dev->struct_mutex);
>   	i915_gem_cleanup_stolen(dev);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b62e3c478221..9fe1274cfe59 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -37,7 +37,6 @@
>   #include "intel_bios.h"
>   #include "intel_ringbuffer.h"
>   #include "intel_lrc.h"
> -#include "i915_gem_batch_pool.h"
>   #include "i915_gem_gtt.h"
>   #include "i915_gem_render_state.h"
>   #include <linux/io-mapping.h>
> @@ -1149,13 +1148,6 @@ struct i915_gem_mm {
>   	 */
>   	struct list_head unbound_list;
>
> -	/*
> -	 * A pool of objects to use as shadow copies of client batch buffers
> -	 * when the command parser is enabled. Prevents the client from
> -	 * modifying the batch contents after software parsing.
> -	 */
> -	struct i915_gem_batch_pool batch_pool;
> -
>   	/** Usable portion of the GTT for GEM */
>   	unsigned long stolen_base; /* limited to low memory (32-bit) */
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4d8fb3f4a7a1..9f33005bdfd1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5039,8 +5039,6 @@ i915_gem_load(struct drm_device *dev)
>   	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
>   	register_oom_notifier(&dev_priv->mm.oom_notifier);
>
> -	i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
> -
>   	mutex_init(&dev_priv->fb_tracking.lock);
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 21f3356cc0ab..1287abf55b84 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>
>   	list_for_each_entry_safe(tmp, next,
>   				 &pool->cache_list, batch_pool_list) {
> +		/* The batches are strictly LRU ordered */
>   		if (tmp->active)
> -			continue;
> +			break;

This hunk would be a better fit for 2/6, correct?

>   		/* While we're looping, do some clean up */
>   		if (tmp->madv == __I915_MADV_PURGED) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 36f8f05928d1..8129a8c75a21 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1152,12 +1152,11 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
>   			  u32 batch_len,
>   			  bool is_master)
>   {
> -	struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
>   	struct drm_i915_gem_object *shadow_batch_obj;
>   	struct i915_vma *vma;
>   	int ret;
>
> -	shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
> +	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
>   						   PAGE_ALIGN(batch_len));
>   	if (IS_ERR(shadow_batch_obj))
>   		return shadow_batch_obj;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index fcb074bd55dc..a6d4d5502188 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1421,6 +1421,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>   	ring->dev = dev;
>   	INIT_LIST_HEAD(&ring->active_list);
>   	INIT_LIST_HEAD(&ring->request_list);
> +	i915_gem_batch_pool_init(dev, &ring->batch_pool);
>   	init_waitqueue_head(&ring->irq_queue);
>
>   	INIT_LIST_HEAD(&ring->execlist_queue);

Cleanup part for _lrc ?

Regards,

Tvrtko