[Intel-gfx] [PATCH 3/6] drm/i915: Split the batch pool by engine
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Mar 18 09:51:58 PDT 2015
On 03/09/2015 09:55 AM, Chris Wilson wrote:
> I woke up one morning and found 50k objects sitting in the batch pool
> and every search seemed to iterate the entire list... Painting the
> screen in oils would provide a more fluid display.
>
> One issue with the current design is that we only check for retirements
> on the current ring when preparing to submit a new batch. This means
> that we can have thousands of "active" batches on another ring that we
> have to walk over. The simplest way to avoid that is to split the pools
> per ring and then our LRU execution ordering will also ensure that the
> inactive buffers remain at the front.
Is the retire work handler not keeping up? Regularly or under some
specific circumstances?
> v2: execlists still requires duplicate code.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 33 ++++++++++++++++++------------
> drivers/gpu/drm/i915/i915_dma.c | 1 -
> drivers/gpu/drm/i915/i915_drv.h | 8 --------
> drivers/gpu/drm/i915/i915_gem.c | 2 --
> drivers/gpu/drm/i915/i915_gem_batch_pool.c | 3 ++-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +--
> drivers/gpu/drm/i915/intel_lrc.c | 1 +
> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++
> drivers/gpu/drm/i915/intel_ringbuffer.h | 8 ++++++++
> 9 files changed, 34 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d2e6475b0466..d8ca5c89647c 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -377,13 +377,17 @@ static void print_batch_pool_stats(struct seq_file *m,
> {
> struct drm_i915_gem_object *obj;
> struct file_stats stats;
> + struct intel_engine_cs *ring;
> + int i;
>
> memset(&stats, 0, sizeof(stats));
>
> - list_for_each_entry(obj,
> - &dev_priv->mm.batch_pool.cache_list,
> - batch_pool_list)
> - per_file_stats(0, obj, &stats);
> + for_each_ring(ring, dev_priv, i) {
> + list_for_each_entry(obj,
> + &ring->batch_pool.cache_list,
> + batch_pool_list)
> + per_file_stats(0, obj, &stats);
> + }
>
> print_file_stats(m, "batch pool", stats);
> }
> @@ -619,21 +623,24 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
> struct drm_device *dev = node->minor->dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct drm_i915_gem_object *obj;
> + struct intel_engine_cs *ring;
> int count = 0;
> - int ret;
> + int ret, i;
>
> ret = mutex_lock_interruptible(&dev->struct_mutex);
> if (ret)
> return ret;
>
> - seq_puts(m, "cache:\n");
> - list_for_each_entry(obj,
> - &dev_priv->mm.batch_pool.cache_list,
> - batch_pool_list) {
> - seq_puts(m, " ");
> - describe_obj(m, obj);
> - seq_putc(m, '\n');
> - count++;
> + for_each_ring(ring, dev_priv, i) {
> + seq_printf(m, "%s cache:\n", ring->name);
> + list_for_each_entry(obj,
> + &ring->batch_pool.cache_list,
> + batch_pool_list) {
> + seq_puts(m, " ");
> + describe_obj(m, obj);
> + seq_putc(m, '\n');
> + count++;
> + }
> }
>
> seq_printf(m, "total: %d\n", count);
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 8e914303b831..70e050ac02b4 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1062,7 +1062,6 @@ int i915_driver_unload(struct drm_device *dev)
>
> mutex_lock(&dev->struct_mutex);
> i915_gem_cleanup_ringbuffer(dev);
> - i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
> i915_gem_context_fini(dev);
> mutex_unlock(&dev->struct_mutex);
> i915_gem_cleanup_stolen(dev);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b62e3c478221..9fe1274cfe59 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -37,7 +37,6 @@
> #include "intel_bios.h"
> #include "intel_ringbuffer.h"
> #include "intel_lrc.h"
> -#include "i915_gem_batch_pool.h"
> #include "i915_gem_gtt.h"
> #include "i915_gem_render_state.h"
> #include <linux/io-mapping.h>
> @@ -1149,13 +1148,6 @@ struct i915_gem_mm {
> */
> struct list_head unbound_list;
>
> - /*
> - * A pool of objects to use as shadow copies of client batch buffers
> - * when the command parser is enabled. Prevents the client from
> - * modifying the batch contents after software parsing.
> - */
> - struct i915_gem_batch_pool batch_pool;
> -
> /** Usable portion of the GTT for GEM */
> unsigned long stolen_base; /* limited to low memory (32-bit) */
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4d8fb3f4a7a1..9f33005bdfd1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5039,8 +5039,6 @@ i915_gem_load(struct drm_device *dev)
> dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
> register_oom_notifier(&dev_priv->mm.oom_notifier);
>
> - i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
> -
> mutex_init(&dev_priv->fb_tracking.lock);
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 21f3356cc0ab..1287abf55b84 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>
> list_for_each_entry_safe(tmp, next,
> &pool->cache_list, batch_pool_list) {
> + /* The batches are strictly LRU ordered */
> if (tmp->active)
> - continue;
> + break;
This hunk would be a better fit for 2/6, correct?
> /* While we're looping, do some clean up */
> if (tmp->madv == __I915_MADV_PURGED) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 36f8f05928d1..8129a8c75a21 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1152,12 +1152,11 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
> u32 batch_len,
> bool is_master)
> {
> - struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
> struct drm_i915_gem_object *shadow_batch_obj;
> struct i915_vma *vma;
> int ret;
>
> - shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
> + shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
> PAGE_ALIGN(batch_len));
> if (IS_ERR(shadow_batch_obj))
> return shadow_batch_obj;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index fcb074bd55dc..a6d4d5502188 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1421,6 +1421,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
> ring->dev = dev;
> INIT_LIST_HEAD(&ring->active_list);
> INIT_LIST_HEAD(&ring->request_list);
> + i915_gem_batch_pool_init(dev, &ring->batch_pool);
> init_waitqueue_head(&ring->irq_queue);
>
> INIT_LIST_HEAD(&ring->execlist_queue);
Cleanup part for _lrc ?
Regards,
Tvrtko
More information about the Intel-gfx
mailing list