[Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand

Mon Nov 17 15:38:01 CET 2014

Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goels at gmail.com>"

On Thu, Nov 13, 2014 at 3:58 PM, Thomas Daniel <thomas.daniel at intel.com>
wrote:

> From: Oscar Mateo <oscar.mateo at intel.com>
>
> Up until now, we have pinned every logical ring context backing object
> during creation, and left it pinned until destruction. This made my life
> easier, but it's a harmful thing to do, because we cause fragmentation
> of the GGTT (and, eventually, we would run out of space).
>
> This patch makes the pinning on-demand: the backing objects of the two
> contexts that are written to the ELSP are pinned right before submission
> and unpinned once the hardware is done with them. The only context that
> is still pinned regardless is the global default one, so that the HWS can
> still be accessed in the same way (ring->status_page).
>
> v2: In the early version of this patch, we were pinning the context as
> we put it into the ELSP: on the one hand, this is very efficient because
> only a maximum two contexts are pinned at any given time, but on the other
> hand, we cannot really pin in interrupt time :(
>
> v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> Do not unpin default context in free_request.
>
> v4: Break out pin and unpin into functions.  Fix style problems reported
> by checkpatch
>
> v5: Remove unpin_lock as all pinning and unpinning is done with the struct
> mutex already locked.  Add WARN_ONs to make sure this is the case in
> future.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |   12 +++++-
>  drivers/gpu/drm/i915/i915_drv.h     |    1 +
>  drivers/gpu/drm/i915/i915_gem.c     |   39 +++++++++++++-------
>  drivers/gpu/drm/i915/intel_lrc.c    |   69
> +++++++++++++++++++++++++++++------
>  drivers/gpu/drm/i915/intel_lrc.h    |    4 ++
>  5 files changed, 98 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index e60d5c2..6eaf813 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void
> *unused)
>                                 continue;
>
>                         if (ctx_obj) {
> -                               struct page *page =
> i915_gem_object_get_page(ctx_obj, 1);
> -                               uint32_t *reg_state = kmap_atomic(page);
> +                               struct page *page;
> +                               uint32_t *reg_state;
>                                 int j;
>
> +                               i915_gem_obj_ggtt_pin(ctx_obj,
> +                                               GEN8_LR_CONTEXT_ALIGN, 0);
> +
> +                               page = i915_gem_object_get_page(ctx_obj,
> 1);
> +                               reg_state = kmap_atomic(page);
> +
>                                 seq_printf(m, "CONTEXT: %s %u\n",
> ring->name,
>
> intel_execlists_ctx_id(ctx_obj));
>
> @@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void
> *unused)
>                                 }
>                                 kunmap_atomic(reg_state);
>
> +                               i915_gem_object_ggtt_unpin(ctx_obj);
> +
>                                 seq_putc(m, '\n');
>                         }
>                 }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 059330c..3c7299d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -655,6 +655,7 @@ struct intel_context {
>         struct {
>                 struct drm_i915_gem_object *state;
>                 struct intel_ringbuffer *ringbuf;
> +               int unpin_count;
>         } engine[I915_NUM_RINGS];
>
>         struct list_head link;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> index 408afe7..2ee6996 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct
> drm_i915_private *dev_priv,
>
>  static void i915_gem_free_request(struct drm_i915_gem_request *request)
>  {
> +       struct intel_context *ctx = request->ctx;
> +
>         list_del(&request->list);
>         i915_gem_request_remove_from_client(request);
>
> -       if (request->ctx)
> -               i915_gem_context_unreference(request->ctx);
> +       if (i915.enable_execlists && ctx) {
> +               struct intel_engine_cs *ring = request->ring;
>
> +               if (ctx != ring->default_context)
> +                       intel_lr_context_unpin(ring, ctx);
> +               i915_gem_context_unreference(ctx);
> +       }
>         kfree(request);
>  }
>
> @@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct
> drm_i915_private *dev_priv,
>         }
>
>         /*
> +        * Clear the execlists queue up before freeing the requests, as
> those
> +        * are the ones that keep the context and ringbuffer backing
> objects
> +        * pinned in place.
> +        */
> +       while (!list_empty(&ring->execlist_queue)) {
> +               struct intel_ctx_submit_request *submit_req;
> +
> +               submit_req = list_first_entry(&ring->execlist_queue,
> +                               struct intel_ctx_submit_request,
> +                               execlist_link);
> +               list_del(&submit_req->execlist_link);
> +               intel_runtime_pm_put(dev_priv);
> +               i915_gem_context_unreference(submit_req->ctx);
> +               kfree(submit_req);
> +       }
> +
> +       /*
>          * We must free the requests after all the corresponding objects
> have
>          * been moved off active lists. Which is the same order as the
> normal
>          * retire_requests function does. This is important if object hold
> @@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct
> drm_i915_private *dev_priv,
>                 i915_gem_free_request(request);
>         }
>
> -       while (!list_empty(&ring->execlist_queue)) {
> -               struct intel_ctx_submit_request *submit_req;
> -
> -               submit_req = list_first_entry(&ring->execlist_queue,
> -                               struct intel_ctx_submit_request,
> -                               execlist_link);
> -               list_del(&submit_req->execlist_link);
> -               intel_runtime_pm_put(dev_priv);
> -               i915_gem_context_unreference(submit_req->ctx);
> -               kfree(submit_req);
> -       }
> -
>         /* These may not have been flush before the reset, do so now */
>         kfree(ring->preallocated_lazy_request);
>         ring->preallocated_lazy_request = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 906b985..f7fa0f7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -139,8 +139,6 @@
>  #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
>  #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
>
> -#define GEN8_LR_CONTEXT_ALIGN 4096
> -
>  #define RING_EXECLIST_QFULL            (1 << 0x2)
>  #define RING_EXECLIST1_VALID           (1 << 0x3)
>  #define RING_EXECLIST0_VALID           (1 << 0x4)
> @@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct
> intel_ringbuffer *ringbuf)
>         execlists_context_queue(ring, ctx, ringbuf->tail);
>  }
>
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx)
> +{
> +       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +       int ret = 0;
> +
> +       WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +       if (ctx->engine[ring->id].unpin_count++ == 0) {
> +               ret = i915_gem_obj_ggtt_pin(ctx_obj,
> +                               GEN8_LR_CONTEXT_ALIGN, 0);
> +               if (ret)
> +                       ctx->engine[ring->id].unpin_count = 0;
> +       }
> +
> +       return ret;
> +}
> +
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx)
> +{
> +       struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +
> +       if (ctx_obj) {
> +               WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +               if (--ctx->engine[ring->id].unpin_count == 0)
> +                       i915_gem_object_ggtt_unpin(ctx_obj);
> +       }
> +}
> +
>  static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
>                                     struct intel_context *ctx)
>  {
> +       int ret;
> +
>         if (ring->outstanding_lazy_seqno)
>                 return 0;
>
> @@ -814,6 +843,14 @@ static int logical_ring_alloc_seqno(struct
> intel_engine_cs *ring,
>                 if (request == NULL)
>                         return -ENOMEM;
>
> +               if (ctx != ring->default_context) {
> +                       ret = intel_lr_context_pin(ring, ctx);
> +                       if (ret) {
> +                               kfree(request);
> +                               return ret;
> +                       }
> +               }
> +
>                 /* Hold a reference to the context this request belongs to
>                  * (we will need it when the time comes to emit/retire the
>                  * request).
> @@ -1626,12 +1663,16 @@ void intel_lr_context_free(struct intel_context
> *ctx)
>
>         for (i = 0; i < I915_NUM_RINGS; i++) {
>                 struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> -               struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
>
>                 if (ctx_obj) {
> +                       struct intel_ringbuffer *ringbuf =
> +                                       ctx->engine[i].ringbuf;
> +                       struct intel_engine_cs *ring = ringbuf->ring;
> +
>                         intel_destroy_ringbuffer_obj(ringbuf);
>                         kfree(ringbuf);
> -                       i915_gem_object_ggtt_unpin(ctx_obj);
> +                       if (ctx == ring->default_context)
> +                               i915_gem_object_ggtt_unpin(ctx_obj);
>                         drm_gem_object_unreference(&ctx_obj->base);
>                 }
>         }
> @@ -1695,6 +1736,7 @@ static int lrc_setup_hardware_status_page(struct
> intel_engine_cs *ring,
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>                                      struct intel_engine_cs *ring)
>  {
> +       const bool is_global_default_ctx = (ctx == ring->default_context);
>         struct drm_device *dev = ring->dev;
>         struct drm_i915_gem_object *ctx_obj;
>         uint32_t context_size;
> @@ -1714,18 +1756,22 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>                 return ret;
>         }
>
> -       ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> -       if (ret) {
> -               DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
> -               drm_gem_object_unreference(&ctx_obj->base);
> -               return ret;
> +       if (is_global_default_ctx) {
> +               ret = i915_gem_obj_ggtt_pin(ctx_obj,
> GEN8_LR_CONTEXT_ALIGN, 0);
> +               if (ret) {
> +                       DRM_DEBUG_DRIVER("Pin LRC backing obj failed:
> %d\n",
> +                                       ret);
> +                       drm_gem_object_unreference(&ctx_obj->base);
> +                       return ret;
> +               }
>         }
>
>         ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
>         if (!ringbuf) {
>                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>                                 ring->name);
> -               i915_gem_object_ggtt_unpin(ctx_obj);
> +               if (is_global_default_ctx)
> +                       i915_gem_object_ggtt_unpin(ctx_obj);
>                 drm_gem_object_unreference(&ctx_obj->base);
>                 ret = -ENOMEM;
>                 return ret;
> @@ -1787,7 +1833,8 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>
>  error:
>         kfree(ringbuf);
> -       i915_gem_object_ggtt_unpin(ctx_obj);
> +       if (is_global_default_ctx)
> +               i915_gem_object_ggtt_unpin(ctx_obj);
>         drm_gem_object_unreference(&ctx_obj->base);
>         return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h
> b/drivers/gpu/drm/i915/intel_lrc.h
> index 84bbf19..14b216b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -24,6 +24,8 @@
>  #ifndef _INTEL_LRC_H_
>  #define _INTEL_LRC_H_
>
> +#define GEN8_LR_CONTEXT_ALIGN 4096
> +
>  /* Execlists regs */
>  #define RING_ELSP(ring)                        ((ring)->mmio_base+0x230)
>  #define RING_EXECLIST_STATUS(ring)     ((ring)->mmio_base+0x234)
> @@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct
> intel_engine_cs *ring,
>  void intel_lr_context_free(struct intel_context *ctx);
>  int intel_lr_context_deferred_create(struct intel_context *ctx,
>                                      struct intel_engine_cs *ring);
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx);
>
>  /* Execlists */
>  int intel_sanitize_enable_execlists(struct drm_device *dev, int
> enable_execlists);
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20141117/6f70367f/attachment.html>