[Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand

Tue Nov 18 07:40:51 CET 2014

On Thursday 13 November 2014 03:58 PM, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo at intel.com>
>
> Up until now, we have pinned every logical ring context backing object
> during creation, and left it pinned until destruction. This made my life
> easier, but it's a harmful thing to do, because we cause fragmentation
> of the GGTT (and, eventually, we would run out of space).
>
> This patch makes the pinning on-demand: the backing objects of the two
> contexts that are written to the ELSP are pinned right before submission
> and unpinned once the hardware is done with them. The only context that
> is still pinned regardless is the global default one, so that the HWS can
> still be accessed in the same way (ring->status_page).
>
> v2: In the early version of this patch, we were pinning the context as
> we put it into the ELSP: on the one hand, this is very efficient because
> only a maximum two contexts are pinned at any given time, but on the other
> hand, we cannot really pin in interrupt time :(
>
> v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
> Do not unpin default context in free_request.
>
> v4: Break out pin and unpin into functions.  Fix style problems reported
> by checkpatch
>
> v5: Remove unpin_lock as all pinning and unpinning is done with the struct
> mutex already locked.  Add WARN_ONs to make sure this is the case in future.
>
> Issue: VIZ-4277
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c |   12 +++++-
>   drivers/gpu/drm/i915/i915_drv.h     |    1 +
>   drivers/gpu/drm/i915/i915_gem.c     |   39 +++++++++++++-------
>   drivers/gpu/drm/i915/intel_lrc.c    |   69 +++++++++++++++++++++++++++++------
>   drivers/gpu/drm/i915/intel_lrc.h    |    4 ++
>   5 files changed, 98 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e60d5c2..6eaf813 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
>   				continue;
>   
>   			if (ctx_obj) {
> -				struct page *page = i915_gem_object_get_page(ctx_obj, 1);
> -				uint32_t *reg_state = kmap_atomic(page);
> +				struct page *page;
> +				uint32_t *reg_state;
>   				int j;
>   
> +				i915_gem_obj_ggtt_pin(ctx_obj,
> +						GEN8_LR_CONTEXT_ALIGN, 0);
> +
> +				page = i915_gem_object_get_page(ctx_obj, 1);
> +				reg_state = kmap_atomic(page);
> +
>   				seq_printf(m, "CONTEXT: %s %u\n", ring->name,
>   						intel_execlists_ctx_id(ctx_obj));
>   
> @@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
>   				}
>   				kunmap_atomic(reg_state);
>   
> +				i915_gem_object_ggtt_unpin(ctx_obj);
> +
>   				seq_putc(m, '\n');
>   			}
>   		}
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 059330c..3c7299d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -655,6 +655,7 @@ struct intel_context {
>   	struct {
>   		struct drm_i915_gem_object *state;
>   		struct intel_ringbuffer *ringbuf;
> +		int unpin_count;
>   	} engine[I915_NUM_RINGS];
>   
>   	struct list_head link;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 408afe7..2ee6996 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
>   
>   static void i915_gem_free_request(struct drm_i915_gem_request *request)
>   {
> +	struct intel_context *ctx = request->ctx;
> +
>   	list_del(&request->list);
>   	i915_gem_request_remove_from_client(request);
>   
> -	if (request->ctx)
> -		i915_gem_context_unreference(request->ctx);
> +	if (i915.enable_execlists && ctx) {
> +		struct intel_engine_cs *ring = request->ring;
>   
> +		if (ctx != ring->default_context)
> +			intel_lr_context_unpin(ring, ctx);
> +		i915_gem_context_unreference(ctx);
> +	}
>   	kfree(request);
>   }
>   
> @@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>   	}
>   
>   	/*
> +	 * Clear the execlists queue up before freeing the requests, as those
> +	 * are the ones that keep the context and ringbuffer backing objects
> +	 * pinned in place.
> +	 */
> +	while (!list_empty(&ring->execlist_queue)) {
> +		struct intel_ctx_submit_request *submit_req;
> +
> +		submit_req = list_first_entry(&ring->execlist_queue,
> +				struct intel_ctx_submit_request,
> +				execlist_link);
> +		list_del(&submit_req->execlist_link);
> +		intel_runtime_pm_put(dev_priv);
> +		i915_gem_context_unreference(submit_req->ctx);
> +		kfree(submit_req);
> +	}
> +
> +	/*
>   	 * We must free the requests after all the corresponding objects have
>   	 * been moved off active lists. Which is the same order as the normal
>   	 * retire_requests function does. This is important if object hold
> @@ -2570,18 +2593,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>   		i915_gem_free_request(request);
>   	}
>   
> -	while (!list_empty(&ring->execlist_queue)) {
> -		struct intel_ctx_submit_request *submit_req;
> -
> -		submit_req = list_first_entry(&ring->execlist_queue,
> -				struct intel_ctx_submit_request,
> -				execlist_link);
> -		list_del(&submit_req->execlist_link);
> -		intel_runtime_pm_put(dev_priv);
> -		i915_gem_context_unreference(submit_req->ctx);
> -		kfree(submit_req);
> -	}
> -
>   	/* These may not have been flush before the reset, do so now */
>   	kfree(ring->preallocated_lazy_request);
>   	ring->preallocated_lazy_request = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 906b985..f7fa0f7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -139,8 +139,6 @@
>   #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
>   #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
>   
> -#define GEN8_LR_CONTEXT_ALIGN 4096
> -
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
>   #define RING_EXECLIST1_VALID		(1 << 0x3)
>   #define RING_EXECLIST0_VALID		(1 << 0x4)
> @@ -801,9 +799,40 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
>   	execlists_context_queue(ring, ctx, ringbuf->tail);
>   }
>   
> +static int intel_lr_context_pin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx)
> +{
> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +	int ret = 0;
> +
> +	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));

With pin specific mutex from previous patch set removed.
Reviewed-by: Deepak S<deepak.s at linux.intel.com>

> +	if (ctx->engine[ring->id].unpin_count++ == 0) {
> +		ret = i915_gem_obj_ggtt_pin(ctx_obj,
> +				GEN8_LR_CONTEXT_ALIGN, 0);
> +		if (ret)
> +			ctx->engine[ring->id].unpin_count = 0;
> +	}
> +
> +	return ret;
> +}
> +
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx)
> +{
> +	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> +
> +	if (ctx_obj) {
> +		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +		if (--ctx->engine[ring->id].unpin_count == 0)
> +			i915_gem_object_ggtt_unpin(ctx_obj);
> +	}
> +}
> +
>   static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
>   				    struct intel_context *ctx)
>   {
> +	int ret;
> +
>   	if (ring->outstanding_lazy_seqno)
>   		return 0;
>   
> @@ -814,6 +843,14 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
>   		if (request == NULL)
>   			return -ENOMEM;
>   
> +		if (ctx != ring->default_context) {
> +			ret = intel_lr_context_pin(ring, ctx);
> +			if (ret) {
> +				kfree(request);
> +				return ret;
> +			}
> +		}
> +
>   		/* Hold a reference to the context this request belongs to
>   		 * (we will need it when the time comes to emit/retire the
>   		 * request).
> @@ -1626,12 +1663,16 @@ void intel_lr_context_free(struct intel_context *ctx)
>   
>   	for (i = 0; i < I915_NUM_RINGS; i++) {
>   		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
> -		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
>   
>   		if (ctx_obj) {
> +			struct intel_ringbuffer *ringbuf =
> +					ctx->engine[i].ringbuf;
> +			struct intel_engine_cs *ring = ringbuf->ring;
> +
>   			intel_destroy_ringbuffer_obj(ringbuf);
>   			kfree(ringbuf);
> -			i915_gem_object_ggtt_unpin(ctx_obj);
> +			if (ctx == ring->default_context)
> +				i915_gem_object_ggtt_unpin(ctx_obj);
>   			drm_gem_object_unreference(&ctx_obj->base);
>   		}
>   	}
> @@ -1695,6 +1736,7 @@ static int lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
>   int intel_lr_context_deferred_create(struct intel_context *ctx,
>   				     struct intel_engine_cs *ring)
>   {
> +	const bool is_global_default_ctx = (ctx == ring->default_context);
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_gem_object *ctx_obj;
>   	uint32_t context_size;
> @@ -1714,18 +1756,22 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   		return ret;
>   	}
>   
> -	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
> -		drm_gem_object_unreference(&ctx_obj->base);
> -		return ret;
> +	if (is_global_default_ctx) {
> +		ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> +		if (ret) {
> +			DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
> +					ret);
> +			drm_gem_object_unreference(&ctx_obj->base);
> +			return ret;
> +		}
>   	}
>   
>   	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
>   	if (!ringbuf) {
>   		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
>   				ring->name);
> -		i915_gem_object_ggtt_unpin(ctx_obj);
> +		if (is_global_default_ctx)
> +			i915_gem_object_ggtt_unpin(ctx_obj);
>   		drm_gem_object_unreference(&ctx_obj->base);
>   		ret = -ENOMEM;
>   		return ret;
> @@ -1787,7 +1833,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>   
>   error:
>   	kfree(ringbuf);
> -	i915_gem_object_ggtt_unpin(ctx_obj);
> +	if (is_global_default_ctx)
> +		i915_gem_object_ggtt_unpin(ctx_obj);
>   	drm_gem_object_unreference(&ctx_obj->base);
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 84bbf19..14b216b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -24,6 +24,8 @@
>   #ifndef _INTEL_LRC_H_
>   #define _INTEL_LRC_H_
>   
> +#define GEN8_LR_CONTEXT_ALIGN 4096
> +
>   /* Execlists regs */
>   #define RING_ELSP(ring)			((ring)->mmio_base+0x230)
>   #define RING_EXECLIST_STATUS(ring)	((ring)->mmio_base+0x234)
> @@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
>   void intel_lr_context_free(struct intel_context *ctx);
>   int intel_lr_context_deferred_create(struct intel_context *ctx,
>   				     struct intel_engine_cs *ring);
> +void intel_lr_context_unpin(struct intel_engine_cs *ring,
> +		struct intel_context *ctx);
>   
>   /* Execlists */
>   int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);