[Intel-gfx] [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand

akash goel akash.goels at gmail.com
Tue Nov 18 06:18:37 CET 2014


Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goels at gmail.com>"


On Thu, Nov 13, 2014 at 3:58 PM, Thomas Daniel <thomas.daniel at intel.com>
wrote:

> Same as with the context, pinning to GGTT regardless is harmful (it
> badly fragments the GGTT and can even exhaust it).
>
> Unfortunately, this case is also more complex than the previous one
> because we need to map and access the ringbuffer in several places
> along the execbuffer path (and we cannot make do by leaving the
> default ringbuffer pinned, as before). Also, the context object
> itself contains a pointer to the ringbuffer address that we have to
> keep updated if we are going to allow the ringbuffer to move around.
>
> v2: Same as with the context pinning, we cannot really do it during
> an interrupt. Also, pin the default ringbuffers objects regardless
> (makes error capture a lot easier).
>
> v3: Rebased. Take a pin reference of the ringbuffer for each item
> in the execlist request queue because the hardware may still be using
> the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
> is executed.  The ringbuffer must remain pinned until the context save
> is complete.  No longer pin and unpin ringbuffer in
> populate_lr_context() - this transient address is meaningless and the
> pinning can cause a sleep while atomic.
>
> v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
> Downgraded pinning check BUG_ONs to WARN_ONs.
>
> v5: Reinstated WARN_ONs for unexpected execlist states.  Removed unused
> variable.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        |  102
> +++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   85 +++++++++++++++-----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    3 +
>  3 files changed, 128 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> b/drivers/gpu/drm/i915/intel_lrc.c
> index f7fa0f7..ca20f91 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -202,6 +202,9 @@ enum {
>  };
>  #define GEN8_CTX_ID_SHIFT 32
>
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> +               struct intel_context *ctx);
> +
>  /**
>   * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
>   * @dev: DRM device.
> @@ -339,7 +342,9 @@ static void execlists_elsp_write(struct
> intel_engine_cs *ring,
>         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>  }
>
> -static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj,
> u32 tail)
> +static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
> +                                   struct drm_i915_gem_object *ring_obj,
> +                                   u32 tail)
>  {
>         struct page *page;
>         uint32_t *reg_state;
> @@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct
> drm_i915_gem_object *ctx_obj, u32 tai
>         reg_state = kmap_atomic(page);
>
>         reg_state[CTX_RING_TAIL+1] = tail;
> +       reg_state[CTX_RING_BUFFER_START+1] =
> i915_gem_obj_ggtt_offset(ring_obj);
>
>         kunmap_atomic(reg_state);
>
> @@ -358,21 +364,25 @@ static int execlists_submit_context(struct
> intel_engine_cs *ring,
>                                     struct intel_context *to0, u32 tail0,
>                                     struct intel_context *to1, u32 tail1)
>  {
> -       struct drm_i915_gem_object *ctx_obj0;
> +       struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
> +       struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
>         struct drm_i915_gem_object *ctx_obj1 = NULL;
> +       struct intel_ringbuffer *ringbuf1 = NULL;
>
> -       ctx_obj0 = to0->engine[ring->id].state;
>         BUG_ON(!ctx_obj0);
>         WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
> +       WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
>
> -       execlists_ctx_write_tail(ctx_obj0, tail0);
> +       execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
>
>         if (to1) {
> +               ringbuf1 = to1->engine[ring->id].ringbuf;
>                 ctx_obj1 = to1->engine[ring->id].state;
>                 BUG_ON(!ctx_obj1);
>                 WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
> +               WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
>
> -               execlists_ctx_write_tail(ctx_obj1, tail1);
> +               execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
>         }
>
>         execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
> @@ -524,6 +534,10 @@ static int execlists_context_queue(struct
> intel_engine_cs *ring,
>                 return -ENOMEM;
>         req->ctx = to;
>         i915_gem_context_reference(req->ctx);
> +
> +       if (to != ring->default_context)
> +               intel_lr_context_pin(ring, to);
> +
>         req->ring = ring;
>         req->tail = tail;
>
> @@ -544,7 +558,7 @@ static int execlists_context_queue(struct
> intel_engine_cs *ring,
>
>                 if (to == tail_req->ctx) {
>                         WARN(tail_req->elsp_submitted != 0,
> -                            "More than 2 already-submitted reqs
> queued\n");
> +                               "More than 2 already-submitted reqs
> queued\n");
>                         list_del(&tail_req->execlist_link);
>                         list_add_tail(&tail_req->execlist_link,
>                                 &ring->execlist_retired_req_list);
> @@ -732,6 +746,12 @@ void intel_execlists_retire_requests(struct
> intel_engine_cs *ring)
>         spin_unlock_irqrestore(&ring->execlist_lock, flags);
>
>         list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> +               struct intel_context *ctx = req->ctx;
> +               struct drm_i915_gem_object *ctx_obj =
> +                               ctx->engine[ring->id].state;
> +
> +               if (ctx_obj && (ctx != ring->default_context))
> +                       intel_lr_context_unpin(ring, ctx);
>                 intel_runtime_pm_put(dev_priv);
>                 i915_gem_context_unreference(req->ctx);
>                 list_del(&req->execlist_link);
> @@ -803,6 +823,7 @@ static int intel_lr_context_pin(struct intel_engine_cs
> *ring,
>                 struct intel_context *ctx)
>  {
>         struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>         int ret = 0;
>
>         WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> @@ -810,21 +831,35 @@ static int intel_lr_context_pin(struct
> intel_engine_cs *ring,
>                 ret = i915_gem_obj_ggtt_pin(ctx_obj,
>                                 GEN8_LR_CONTEXT_ALIGN, 0);
>                 if (ret)
> -                       ctx->engine[ring->id].unpin_count = 0;
> +                       goto reset_unpin_count;
> +
> +               ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
> +               if (ret)
> +                       goto unpin_ctx_obj;
>         }
>
>         return ret;
> +
> +unpin_ctx_obj:
> +       i915_gem_object_ggtt_unpin(ctx_obj);
> +reset_unpin_count:
> +       ctx->engine[ring->id].unpin_count = 0;
> +
> +       return ret;
>  }
>
>  void intel_lr_context_unpin(struct intel_engine_cs *ring,
>                 struct intel_context *ctx)
>  {
>         struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
>
>         if (ctx_obj) {
>                 WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> -               if (--ctx->engine[ring->id].unpin_count == 0)
> +               if (--ctx->engine[ring->id].unpin_count == 0) {
> +                       intel_unpin_ringbuffer_obj(ringbuf);
>                         i915_gem_object_ggtt_unpin(ctx_obj);
> +               }
>         }
>  }
>
> @@ -1541,7 +1576,6 @@ populate_lr_context(struct intel_context *ctx,
> struct drm_i915_gem_object *ctx_o
>  {
>         struct drm_device *dev = ring->dev;
>         struct drm_i915_private *dev_priv = dev->dev_private;
> -       struct drm_i915_gem_object *ring_obj = ringbuf->obj;
>         struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
>         struct page *page;
>         uint32_t *reg_state;
> @@ -1587,7 +1621,9 @@ populate_lr_context(struct intel_context *ctx,
> struct drm_i915_gem_object *ctx_o
>         reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
>         reg_state[CTX_RING_TAIL+1] = 0;
>         reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
> -       reg_state[CTX_RING_BUFFER_START+1] =
> i915_gem_obj_ggtt_offset(ring_obj);
> +       /* Ring buffer start address is not known until the buffer is
> pinned.
> +        * It is written to the context image in execlists_update_context()
> +        */
>         reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
>         reg_state[CTX_RING_BUFFER_CONTROL+1] =
>                         ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) |
> RING_VALID;
> @@ -1669,10 +1705,12 @@ void intel_lr_context_free(struct intel_context
> *ctx)
>                                         ctx->engine[i].ringbuf;
>                         struct intel_engine_cs *ring = ringbuf->ring;
>
> +                       if (ctx == ring->default_context) {
> +                               intel_unpin_ringbuffer_obj(ringbuf);
> +                               i915_gem_object_ggtt_unpin(ctx_obj);
> +                       }
>                         intel_destroy_ringbuffer_obj(ringbuf);
>                         kfree(ringbuf);
> -                       if (ctx == ring->default_context)
> -                               i915_gem_object_ggtt_unpin(ctx_obj);
>                         drm_gem_object_unreference(&ctx_obj->base);
>                 }
>         }
> @@ -1770,11 +1808,8 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>         if (!ringbuf) {
>                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>                                 ring->name);
> -               if (is_global_default_ctx)
> -                       i915_gem_object_ggtt_unpin(ctx_obj);
> -               drm_gem_object_unreference(&ctx_obj->base);
>                 ret = -ENOMEM;
> -               return ret;
> +               goto error_unpin_ctx;
>         }
>
>         ringbuf->ring = ring;
> @@ -1787,22 +1822,30 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>         ringbuf->space = ringbuf->size;
>         ringbuf->last_retired_head = -1;
>
> -       /* TODO: For now we put this in the mappable region so that we can
> reuse
> -        * the existing ringbuffer code which ioremaps it. When we start
> -        * creating many contexts, this will no longer work and we must
> switch
> -        * to a kmapish interface.
> -        */
> -       ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> -       if (ret) {
> -               DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s:
> %d\n",
> +       if (ringbuf->obj == NULL) {
> +               ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> +               if (ret) {
> +                       DRM_DEBUG_DRIVER(
> +                               "Failed to allocate ringbuffer obj %s:
> %d\n",
>                                 ring->name, ret);
> -               goto error;
> +                       goto error_free_rbuf;
> +               }
> +
> +               if (is_global_default_ctx) {
> +                       ret = intel_pin_and_map_ringbuffer_obj(dev,
> ringbuf);
> +                       if (ret) {
> +                               DRM_ERROR(
> +                                       "Failed to pin and map ringbuffer
> %s: %d\n",
> +                                       ring->name, ret);
> +                               goto error_destroy_rbuf;
> +                       }
> +               }
> +
>         }
>
>         ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
>         if (ret) {
>                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
> -               intel_destroy_ringbuffer_obj(ringbuf);
>                 goto error;
>         }
>
> @@ -1823,7 +1866,6 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>                         DRM_ERROR("Init render state failed: %d\n", ret);
>                         ctx->engine[ring->id].ringbuf = NULL;
>                         ctx->engine[ring->id].state = NULL;
> -                       intel_destroy_ringbuffer_obj(ringbuf);
>                         goto error;
>                 }
>                 ctx->rcs_initialized = true;
> @@ -1832,7 +1874,13 @@ int intel_lr_context_deferred_create(struct
> intel_context *ctx,
>         return 0;
>
>  error:
> +       if (is_global_default_ctx)
> +               intel_unpin_ringbuffer_obj(ringbuf);
> +error_destroy_rbuf:
> +       intel_destroy_ringbuffer_obj(ringbuf);
> +error_free_rbuf:
>         kfree(ringbuf);
> +error_unpin_ctx:
>         if (is_global_default_ctx)
>                 i915_gem_object_ggtt_unpin(ctx_obj);
>         drm_gem_object_unreference(&ctx_obj->base);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index a8f72e8..0c4aab1 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1721,13 +1721,42 @@ static int init_phys_status_page(struct
> intel_engine_cs *ring)
>         return 0;
>  }
>
> -void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
>  {
> -       if (!ringbuf->obj)
> -               return;
> -
>         iounmap(ringbuf->virtual_start);
> +       ringbuf->virtual_start = NULL;
>         i915_gem_object_ggtt_unpin(ringbuf->obj);
> +}
> +
> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> +                                    struct intel_ringbuffer *ringbuf)
> +{
> +       struct drm_i915_private *dev_priv = to_i915(dev);
> +       struct drm_i915_gem_object *obj = ringbuf->obj;
> +       int ret;
> +
> +       ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> +       if (ret)
> +               return ret;
> +
> +       ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +       if (ret) {
> +               i915_gem_object_ggtt_unpin(obj);
> +               return ret;
> +       }
> +
> +       ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
> +                       i915_gem_obj_ggtt_offset(obj), ringbuf->size);
> +       if (ringbuf->virtual_start == NULL) {
> +               i915_gem_object_ggtt_unpin(obj);
> +               return -EINVAL;
> +       }
> +
> +       return 0;
> +}
> +
> +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> +{
>         drm_gem_object_unreference(&ringbuf->obj->base);
>         ringbuf->obj = NULL;
>  }
> @@ -1735,12 +1764,7 @@ void intel_destroy_ringbuffer_obj(struct
> intel_ringbuffer *ringbuf)
>  int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>                                struct intel_ringbuffer *ringbuf)
>  {
> -       struct drm_i915_private *dev_priv = to_i915(dev);
>         struct drm_i915_gem_object *obj;
> -       int ret;
> -
> -       if (ringbuf->obj)
> -               return 0;
>
>         obj = NULL;
>         if (!HAS_LLC(dev))
> @@ -1753,30 +1777,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device
> *dev,
>         /* mark ring buffers as read-only from GPU side by default */
>         obj->gt_ro = 1;
>
> -       ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> -       if (ret)
> -               goto err_unref;
> -
> -       ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -       if (ret)
> -               goto err_unpin;
> -
> -       ringbuf->virtual_start =
> -               ioremap_wc(dev_priv->gtt.mappable_base +
> i915_gem_obj_ggtt_offset(obj),
> -                               ringbuf->size);
> -       if (ringbuf->virtual_start == NULL) {
> -               ret = -EINVAL;
> -               goto err_unpin;
> -       }
> -
>         ringbuf->obj = obj;
> -       return 0;
>
> -err_unpin:
> -       i915_gem_object_ggtt_unpin(obj);
> -err_unref:
> -       drm_gem_object_unreference(&obj->base);
> -       return ret;
> +       return 0;
>  }
>
>  static int intel_init_ring_buffer(struct drm_device *dev,
> @@ -1813,10 +1816,21 @@ static int intel_init_ring_buffer(struct
> drm_device *dev,
>                         goto error;
>         }
>
> -       ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> -       if (ret) {
> -               DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
> ring->name, ret);
> -               goto error;
> +       if (ringbuf->obj == NULL) {
> +               ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
> +               if (ret) {
> +                       DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
> +                                       ring->name, ret);
> +                       goto error;
> +               }
> +
> +               ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
> +               if (ret) {
> +                       DRM_ERROR("Failed to pin and map ringbuffer %s:
> %d\n",
> +                                       ring->name, ret);
> +                       intel_destroy_ringbuffer_obj(ringbuf);
> +                       goto error;
> +               }
>         }
>
>         /* Workaround an erratum on the i830 which causes a hang if
> @@ -1854,6 +1868,7 @@ void intel_cleanup_ring_buffer(struct
> intel_engine_cs *ring)
>         intel_stop_ring_buffer(ring);
>         WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE)
> == 0);
>
> +       intel_unpin_ringbuffer_obj(ringbuf);
>         intel_destroy_ringbuffer_obj(ringbuf);
>         ring->preallocated_lazy_request = NULL;
>         ring->outstanding_lazy_seqno = 0;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8c002d2..365854ad 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -382,6 +382,9 @@ intel_write_status_page(struct intel_engine_cs *ring,
>  #define I915_GEM_HWS_SCRATCH_INDEX     0x30
>  #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX <<
> MI_STORE_DWORD_INDEX_SHIFT)
>
> +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
> +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> +                                    struct intel_ringbuffer *ringbuf);
>  void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
>  int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>                                struct intel_ringbuffer *ringbuf);
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20141118/33d19af8/attachment.html>


More information about the Intel-gfx mailing list