[Intel-gfx] [PATCH 102/190] drm/i915: Move the "per-ring" default_context to the device

Mon Jan 11 06:40:41 PST 2016

On 11/01/16 10:44, Chris Wilson wrote:
> We have a false notion of a default_context allocated per engine,
> whereas actually it is a singular context reserved for kernel use.
> Remove it from the engines, and rename it thus.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 19 ++++++++++++++-----
>   drivers/gpu/drm/i915/i915_drv.h            |  1 +
>   drivers/gpu/drm/i915/i915_gem.c            |  2 +-
>   drivers/gpu/drm/i915/i915_gem_context.c    | 28 +++++++++++-----------------
>   drivers/gpu/drm/i915/i915_gem_evict.c      |  4 ++--
>   drivers/gpu/drm/i915/i915_guc_submission.c |  9 +++++----
>   drivers/gpu/drm/i915/intel_display.c       |  2 +-
>   drivers/gpu/drm/i915/intel_lrc.c           |  6 +++---
>   drivers/gpu/drm/i915/intel_overlay.c       |  8 ++++----
>   drivers/gpu/drm/i915/intel_ringbuffer.h    |  1 -
>   10 files changed, 42 insertions(+), 38 deletions(-)

Well generally, I like this, 'cos it looks just like my patch that 
strangely was rejected, but ...

> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ea5b9f6d0fc9..dee66807c6bd 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1960,12 +1960,21 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   			continue;
>
>   		seq_puts(m, "HW context ");
> +		if (IS_ERR(ctx->file_priv)) {
> +			seq_puts(m, "(deleted) ");
> +		} else if (ctx->file_priv) {
> +			struct pid *pid = ctx->file_priv->file->pid;
> +			struct task_struct *task;
> +
> +			task = get_pid_task(pid, PIDTYPE_PID);
> +			if (task) {
> +				seq_printf(m, "(%s [%d]) ",
> +					   task->comm, task->pid);
> +				put_task_struct(task);
> +			}
> +		} else
> +			seq_puts(m, "(kernel) ");

Improper formatting, needs {} round else clause. Would look prettier to 
put "if (!ctx->file_priv)" first of all in this if-ladder, so the 
trivial cases (kernel, deleted) are dealt with first.

>   		describe_ctx(m, ctx);
> -		for_each_ring(ring, dev_priv, i) {
> -			if (ring->default_context == ctx)
> -				seq_printf(m, "(default context %s) ",
> -					   ring->name);
> -		}
>
>   		if (i915.enable_execlists) {
>   			seq_putc(m, '\n');
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5711ae3a22a1..4ada625b751e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1703,6 +1703,7 @@ struct drm_i915_private {
>
>   	struct pci_dev *bridge_dev;
>   	struct intel_engine_cs ring[I915_NUM_RINGS];
> +	struct intel_context *kernel_context;
>   	struct drm_i915_gem_object *semaphore_obj;
>   	uint32_t last_seqno, next_seqno;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d705005ca26e..a82a06a61262 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4097,7 +4097,7 @@ i915_gem_init_hw(struct drm_device *dev)
>   	 */
>   	init_unused_rings(dev);
>
> -	BUG_ON(!dev_priv->ring[RCS].default_context);
> +	BUG_ON(!dev_priv->kernel_context);
>
>   	ret = i915_ppgtt_init_hw(dev);
>   	if (ret) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9f9892525945..593c22a702fa 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -216,6 +216,7 @@ static void context_close(struct intel_context *ctx)
>   	ctx->closed = true;
>   	if (ctx->ppgtt)
>   		i915_ppgtt_close(&ctx->ppgtt->base);
> +	ctx->file_priv = ERR_PTR(-ENOENT);

On the one hand, yes, I agree with zapping ctx->file_priv, if it's going 
to remain at all. I don't care whether it's NULLed or set to any other 
not-a-pointer (I had a version where it was POISONed).

On the other, I can't apply this patch to drm-intel-nightly without 
first taking at least some large subset of the 101 preceding patches; 
which I won't do because I don't have the time or knowledge to review 
every one of them, but if I take one without reviewing it then that 
invalidates reviewing subsequent dependants.

It's surely much easier for reviewers if patchsets are arranged in a 
short broad bush rather than a single long chain? I would probably give 
this my R-B if it were near the base of the tree and I could follow the 
(short) chain from root to tip; but here it's queued behind lots of 
things I don't want to check :(

>   	i915_gem_context_unreference(ctx);
>   }
>
> @@ -358,22 +359,21 @@ void i915_gem_context_reset(struct drm_device *dev)
>   			i915_gem_context_unreference(lctx);
>   			ring->last_context = NULL;
>   		}
> -
> -		/* Force the GPU state to be reinitialised on enabling */
> -		if (ring->default_context)
> -			ring->default_context->legacy_hw_ctx.initialized = false;
>   	}
> +
> +	/* Force the GPU state to be reinitialised on enabling */
> +	if (dev_priv->kernel_context)
> +		dev_priv->kernel_context->legacy_hw_ctx.initialized = false;
>   }
>
>   int i915_gem_context_init(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_context *ctx;
> -	int i;
>
>   	/* Init should only be called once per module load. Eventually the
>   	 * restriction on the context_disabled check can be loosened. */
> -	if (WARN_ON(dev_priv->ring[RCS].default_context))
> +	if (WARN_ON(dev_priv->kernel_context))
>   		return 0;
>
>   	if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) {
> @@ -402,13 +402,7 @@ int i915_gem_context_init(struct drm_device *dev)
>   			  PTR_ERR(ctx));
>   		return PTR_ERR(ctx);
>   	}
> -
> -	for (i = 0; i < I915_NUM_RINGS; i++) {
> -		struct intel_engine_cs *ring = &dev_priv->ring[i];
> -
> -		/* NB: RCS will hold a ref for all rings */
> -		ring->default_context = ctx;
> -	}
> +	dev_priv->kernel_context = ctx;
>
>   	DRM_DEBUG_DRIVER("%s context support initialized\n",
>   			i915.enable_execlists ? "LR" :
> @@ -419,7 +413,7 @@ int i915_gem_context_init(struct drm_device *dev)
>   void i915_gem_context_fini(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_context *dctx = dev_priv->ring[RCS].default_context;
> +	struct intel_context *dctx = dev_priv->kernel_context;
>   	int i;
>
>   	if (dctx->legacy_hw_ctx.rcs_state) {
> @@ -449,10 +443,10 @@ void i915_gem_context_fini(struct drm_device *dev)
>   	for (i = 0; i < I915_NUM_RINGS; i++) {
>   		struct intel_engine_cs *ring = &dev_priv->ring[i];
>
> -		if (ring->last_context)
> -			i915_gem_context_unreference(ring->last_context);
> +		if (ring->last_context == NULL)
> +			continue;
>
> -		ring->default_context = NULL;
> +		i915_gem_context_unreference(ring->last_context);
>   		ring->last_context = NULL;
>   	}
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index b7bcc324a7a7..679b7dd3a312 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -45,10 +45,10 @@ static int switch_to_pinned_context(struct drm_i915_private *dev_priv)
>   	for_each_ring(ring, dev_priv, i) {
>   		struct drm_i915_gem_request *req;
>
> -		if (ring->last_context == ring->default_context)
> +		if (ring->last_context == dev_priv->kernel_context)
>   			continue;
>
> -		req = i915_gem_request_alloc(ring, ring->default_context);
> +		req = i915_gem_request_alloc(ring, dev_priv->kernel_context);
>   		if (IS_ERR(req))
>   			return PTR_ERR(req);
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index f4e09952d52c..63e58253280b 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -937,11 +937,12 @@ int i915_guc_submission_enable(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_guc *guc = &dev_priv->guc;
> -	struct intel_context *ctx = dev_priv->ring[RCS].default_context;
>   	struct i915_guc_client *client;
>
>   	/* client for execbuf submission */
> -	client = guc_client_alloc(dev, GUC_CTX_PRIORITY_KMD_NORMAL, ctx);
> +	client = guc_client_alloc(dev,
> +				  GUC_CTX_PRIORITY_KMD_NORMAL,
> +				  dev_priv->kernel_context);
>   	if (!client) {
>   		DRM_ERROR("Failed to create execbuf guc_client\n");
>   		return -ENOMEM;
> @@ -994,7 +995,7 @@ int intel_guc_suspend(struct drm_device *dev)
>   	if (!i915.enable_guc_submission)
>   		return 0;
>
> -	ctx = dev_priv->ring[RCS].default_context;
> +	ctx = dev_priv->kernel_context;
>
>   	data[0] = HOST2GUC_ACTION_ENTER_S_STATE;
>   	/* any value greater than GUC_POWER_D0 */
> @@ -1020,7 +1021,7 @@ int intel_guc_resume(struct drm_device *dev)
>   	if (!i915.enable_guc_submission)
>   		return 0;
>
> -	ctx = dev_priv->ring[RCS].default_context;
> +	ctx = dev_priv->kernel_context;
>
>   	data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
>   	data[1] = GUC_POWER_D0;
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index f227cdaf38ec..e8f957785a64 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11672,7 +11672,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>   	 * into the display plane and skip any waits.
>   	 */
>   	if (!mmio_flip) {
> -		request = i915_gem_request_alloc(ring, ring->default_context);
> +		request = i915_gem_request_alloc(ring, ring->last_context);

Why is this ring->last_context rather than dev_priv->kernel_context? Is 
it guaranteed that the last context active on this engine is somehow 
connected with the flip activity? Could it not have been an earlier 
workload that prepared the framebuffer associated with the flip, then an 
unrelated batch in a different context ran on this engine, then the flip 
that relates to the earlier batch?

.Dave.

>   		if (IS_ERR(request)) {
>   			ret = PTR_ERR(request);
>   			goto cleanup_pending;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 850cacdf6dda..4d5196547e78 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1058,7 +1058,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
>   	struct drm_device *dev = ring->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	lrc_setup_hardware_status_page(ring,
> -				ring->default_context->engine[ring->id].state);
> +			dev_priv->kernel_context->engine[ring->id].state);
>
>   	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
>   	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
> @@ -1424,7 +1424,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
>   		kunmap(sg_page(ring->status_page.obj->pages->sgl));
>   		ring->status_page.obj = NULL;
>   	}
> -	intel_lr_context_unpin(ring->default_context, ring);
> +	intel_lr_context_unpin(ring->i915->kernel_context, ring);
>
>   	lrc_destroy_wa_ctx_obj(ring);
>   	ring->dev = NULL;
> @@ -1458,7 +1458,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>   	if (ret)
>   		goto error;
>
> -	ctx = ring->default_context;
> +	ctx = ring->i915->kernel_context;
>
>   	ret = execlists_context_deferred_alloc(ctx, ring);
>   	if (ret)
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index df71c01f28f1..094ea87bf6be 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -240,7 +240,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>   	WARN_ON(overlay->active);
>   	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
>
> -	req = i915_gem_request_alloc(ring, ring->default_context);
> +	req = i915_gem_request_alloc(ring, dev_priv->kernel_context);
>   	if (IS_ERR(req))
>   		return PTR_ERR(req);
>
> @@ -283,7 +283,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
>   	if (tmp & (1 << 17))
>   		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
>
> -	req = i915_gem_request_alloc(ring, ring->default_context);
> +	req = i915_gem_request_alloc(ring, dev_priv->kernel_context);
>   	if (IS_ERR(req))
>   		return PTR_ERR(req);
>
> @@ -349,7 +349,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
>   	 * of the hw. Do it in both cases */
>   	flip_addr |= OFC_UPDATE;
>
> -	req = i915_gem_request_alloc(ring, ring->default_context);
> +	req = i915_gem_request_alloc(ring, dev_priv->kernel_context);
>   	if (IS_ERR(req))
>   		return PTR_ERR(req);
>
> @@ -423,7 +423,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>   		/* synchronous slowpath */
>   		struct drm_i915_gem_request *req;
>
> -		req = i915_gem_request_alloc(ring, ring->default_context);
> +		req = i915_gem_request_alloc(ring, dev_priv->kernel_context);
>   		if (req)
>   			return PTR_ERR(req);
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 3d4d5711aea9..868cc8d5abb3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -316,7 +316,6 @@ struct intel_engine_cs {
>   	u32 last_submitted_seqno;
>   	unsigned user_interrupts;
>
> -	struct intel_context *default_context;
>   	struct intel_context *last_context;
>
>   	struct intel_engine_hangcheck hangcheck;
>