[Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand
Deepak S
deepak.s at intel.com
Tue Nov 18 07:40:51 CET 2014
On Thursday 13 November 2014 03:58 PM, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo at intel.com>
>
> Up until now, we have pinned every logical ring context backing object
> during creation, and left it pinned until destruction. This made my life
> easier, but it's a harmful thing to do, because we cause fragmentation
> of the GGTT (and, eventually, we would run out of space).
>
> This patch makes the pinning on-demand: the backing objects of the two
> contexts that are written to the ELSP are pinned right before submission
> and unpinned once the hardware is done with them. The only context that
> is still pinned regardless is the global default one, so that the HWS can
> still be accessed in the same way (ring->status_page).
>
> v2: In the early version of this patch, we were pinning the context as
> we put it into the ELSP: on the one hand, this is very efficient because
> only a maximum two contexts are pinned at any given time, but on the other
> hand, we cannot really pin in interrupt time :(
>
> v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> Do not unpin default context in free_request.
>
> v4: Break out pin and unpin into functions. Fix style problems reported
> by checkpatch
>
> v5: Remove unpin_lock as all pinning and unpinning is done with the struct
> mutex already locked. Add WARN_ONs to make sure this is the case in future.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 12 +++++-
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++-------
> drivers/gpu/drm/i915/intel_lrc.c | 69 +++++++++++++++++++++++++++++------
> drivers/gpu/drm/i915/intel_lrc.h | 4 ++
> 5 files changed, 98 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e60d5c2..6eaf813 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
> continue;
>
> if (ctx_obj) {
> - struct page *page = i915_gem_object_get_page(ctx_obj, 1);
> - uint32_t *reg_state = kmap_atomic(page);
> + struct page *page;
> + uint32_t *reg_state;
> int j;
>
> + i915_gem_obj_ggtt_pin(ctx_obj,
> + GEN8_LR_CONTEXT_ALIGN, 0);
> +
> + page = i915_gem_object_get_page(ctx_obj, 1);
> + reg_state = kmap_atomic(page);
> +
> seq_printf(m, "CONTEXT: %s %u\n", ring->name,
> intel_execlists_ctx_id(ctx_obj));
>
> @@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
> }
> kunmap_atomic(reg_state);
>
> + i915_gem_object_ggtt_unpin(ctx_obj);
> +
> seq_putc(m, '\n');
> }
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 059330c..3c7299d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -655,6 +655,7 @@ struct intel_context {
> struct {
> struct drm_i915_gem_object *state;
> struct intel_ringbuffer *ringbuf;
> + int unpin_count;
> } engine[I915_NUM_RINGS];
>
> struct list_head link;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 408afe7..2ee6996 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
>
> static void i915_gem_free_request(struct drm_i915_gem_request *request)
> {
> + struct intel_context *ctx = request->ctx;
> +
> list_del(&request->list);
> i915_gem_request_remove_from_client(request);
>
> - if (request->ctx)
> - i915_gem_context_unreference(request->ctx);
> + if (i915.enable_execlists && ctx) {
> + struct intel_engine_cs *ring = request->ring;
>
> + if (ctx != ring->default_context)
> + intel_lr_context_unpin(ring, ctx);
> + i915_gem_context_unreference(ctx);
> + }
> kfree(request);
> }
>
> @@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
> }
>
> /*
> + * Clear the execlists queue up before freeing the requests, as those
> + * are the ones that keep the context and ringbuffer backing objects
> + * pinned in place.
> + */
> + while (!list_empty(&ring->execlist_queue)) {
> + struct intel_ctx_submit_request *submit_req;
> +
> + submit_req = list_first_entry(&ring->execlist_queue,
> + struct intel_ctx_submit_request,
> + execlist_link);
> + list_del(&submit_req->execlist_link);
> + intel_runtime_pm_put(dev_priv);
> + i915_gem_context_unreference(submit_req->ctx);
> + kfree(submit_req);
> + }
> +
> + /*
> * We must free the requests after all the corresponding objects have
> * been moved off active lists. Which is the same order as the normal
> * retire_requests function does. This is important if object hold
> @@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
> i915_gem_free_request(request);
> }
>
> - while (!list_empty(&ring->execlist_queue)) {
> - struct intel_ctx_submit_request *submit_req;
> -
> - submit_req = list_first_entry(&ring->execlist_queue,
> - struct intel_ctx_submit_request,
> - execlist_link);
> - list_del(&submit_req->execlist_link);
> - intel_runtime_pm_put(dev_priv);
> - i915_gem_context_unreference(submit_req->ctx);
> - kfree(submit_req);
> - }
> -
> /* These may not have been flush before the reset, do so now */
> kfree(ring->preallocated_lazy_request);
> ring->preallocated_lazy_request = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 906b985..f7fa0f7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -139,8 +139,6 @@
> #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
> #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
>
> -#define GEN8_LR_CONTEXT_ALIGN 4096
> -
> #define RING_EXECLIST_QFULL (1 << 0x2)
> #define RING_EXECLIST1_VALID (1 << 0x3)
> #define RING_EXECLIST0_VALID (1 << 0x4)
> @@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
> execlists_context_queue(ring, ctx, ringbuf->tail);
> }
>
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> + struct intel_context *ctx)
> +{
> + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> + int ret = 0;
> +
> + WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
With pin specific mutex from previous patch set removed.
Reviewed-by: Deepak S<deepak.s at linux.intel.com>
> + if (ctx->engine[ring->id].unpin_count++ == 0) {
> + ret = i915_gem_obj_ggtt_pin(ctx_obj,
> + GEN8_LR_CONTEXT_ALIGN, 0);
> + if (ret)
> + ctx->engine[ring->id].unpin_count = 0;
> + }
> +
> + return ret;
> +}
> +
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> + struct intel_context *ctx)
> +{
> + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +
> + if (ctx_obj) {
> + WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> + if (--ctx->engine[ring->id].unpin_count == 0)
> + i915_gem_object_ggtt_unpin(ctx_obj);
> + }
> +}
> +
> static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
> struct intel_context *ctx)
> {
> + int ret;
> +
> if (ring->outstanding_lazy_seqno)
> return 0;
>
> @@ -814,6 +843,14 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
> if (request == NULL)
> return -ENOMEM;
>
> + if (ctx != ring->default_context) {
> + ret = intel_lr_context_pin(ring, ctx);
> + if (ret) {
> + kfree(request);
> + return ret;
> + }
> + }
> +
> /* Hold a reference to the context this request belongs to
> * (we will need it when the time comes to emit/retire the
> * request).
> @@ -1626,12 +1663,16 @@ void intel_lr_context_free(struct intel_context *ctx)
>
> for (i = 0; i < I915_NUM_RINGS; i++) {
> struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> - struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
>
> if (ctx_obj) {
> + struct intel_ringbuffer *ringbuf =
> + ctx->engine[i].ringbuf;
> + struct intel_engine_cs *ring = ringbuf->ring;
> +
> intel_destroy_ringbuffer_obj(ringbuf);
> kfree(ringbuf);
> - i915_gem_object_ggtt_unpin(ctx_obj);
> + if (ctx == ring->default_context)
> + i915_gem_object_ggtt_unpin(ctx_obj);
> drm_gem_object_unreference(&ctx_obj->base);
> }
> }
> @@ -1695,6 +1736,7 @@ static int lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
> int intel_lr_context_deferred_create(struct intel_context *ctx,
> struct intel_engine_cs *ring)
> {
> + const bool is_global_default_ctx = (ctx == ring->default_context);
> struct drm_device *dev = ring->dev;
> struct drm_i915_gem_object *ctx_obj;
> uint32_t context_size;
> @@ -1714,18 +1756,22 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
> return ret;
> }
>
> - ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> - if (ret) {
> - DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
> - drm_gem_object_unreference(&ctx_obj->base);
> - return ret;
> + if (is_global_default_ctx) {
> + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> + if (ret) {
> + DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
> + ret);
> + drm_gem_object_unreference(&ctx_obj->base);
> + return ret;
> + }
> }
>
> ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
> if (!ringbuf) {
> DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
> ring->name);
> - i915_gem_object_ggtt_unpin(ctx_obj);
> + if (is_global_default_ctx)
> + i915_gem_object_ggtt_unpin(ctx_obj);
> drm_gem_object_unreference(&ctx_obj->base);
> ret = -ENOMEM;
> return ret;
> @@ -1787,7 +1833,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>
> error:
> kfree(ringbuf);
> - i915_gem_object_ggtt_unpin(ctx_obj);
> + if (is_global_default_ctx)
> + i915_gem_object_ggtt_unpin(ctx_obj);
> drm_gem_object_unreference(&ctx_obj->base);
> return ret;
> }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 84bbf19..14b216b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -24,6 +24,8 @@
> #ifndef _INTEL_LRC_H_
> #define _INTEL_LRC_H_
>
> +#define GEN8_LR_CONTEXT_ALIGN 4096
> +
> /* Execlists regs */
> #define RING_ELSP(ring) ((ring)->mmio_base+0x230)
> #define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234)
> @@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
> void intel_lr_context_free(struct intel_context *ctx);
> int intel_lr_context_deferred_create(struct intel_context *ctx,
> struct intel_engine_cs *ring);
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> + struct intel_context *ctx);
>
> /* Execlists */
> int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
More information about the Intel-gfx
mailing list