[Intel-gfx] [PATCH 09/10] drm/i915/guc: Preemption! With GuC.

Thu Oct 5 15:00:31 UTC 2017

On Thu, 05 Oct 2017 11:20:04 +0200, Michał Winiarski  
<michal.winiarski at intel.com> wrote:

> Pretty similar to what we have on execlists.
> We're reusing most of the GEM code, however, due to GuC quirks we need a
> couple of extra bits.
> Preemption is implemented as GuC action, and actions can be pretty slow.
> Because of that, we're using a mutex to serialize them. Since we're
> requesting preemption from the tasklet, the task of creating a workitem
> and wrapping it in GuC action is delegated to a worker.
>
> To distinguish that preemption has finished, we're using additional
> piece of HWSP, and since we're not getting context switch interrupts,
> we're also adding a user interrupt.
>
> The fact that our special preempt context has completed unfortunately
> doesn't mean that we're ready to submit new work. We also need to wait
> for GuC to finish its own processing.
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Jeff Mcgee <jeff.mcgee at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Oscar Mateo <oscar.mateo at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
> ---

<snip>

> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c  
> b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 0f36bba9fc9e..21412ea3dc0e 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -498,6 +498,88 @@ static void guc_add_request(struct intel_guc *guc,
>  	spin_unlock(&client->wq_lock);
>  }
> +#define GUC_PREEMPT_FINISHED 0x1
> +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
> +static void inject_preempt_context(struct work_struct *work)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(work, typeof(*engine), guc_preempt_work);
> +	struct drm_i915_private *dev_priv = engine->i915;
> +	struct intel_guc *guc = &engine->i915->guc;

guc = &dev_priv->guc;

> +	struct i915_guc_client *client = guc->preempt_client;
> +	struct intel_context *ce = &client->owner->engine[engine->id];
> +	u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner,
> +								 engine));
> +	u32 *cs = ce->ring->vaddr + ce->ring->tail;
> +	u32 data[7];
> +
> +	if (engine->id == RCS) {
> +		cs = gen8_emit_ggtt_write_render(
> +				intel_hws_preempt_done_address(engine),
> +				GUC_PREEMPT_FINISHED, cs);
> +		*cs++ = MI_USER_INTERRUPT;
> +		*cs++ = MI_NOOP;
> +	} else {
> +		cs = gen8_emit_ggtt_write(
> +				intel_hws_preempt_done_address(engine),
> +				GUC_PREEMPT_FINISHED, cs);
> +		*cs++ = MI_USER_INTERRUPT;
> +		*cs++ = MI_NOOP;
> +		*cs++ = MI_NOOP;
> +		*cs++ = MI_NOOP;
> +	}

Maybe like this:

	cs = gen8_emit_ggtt_write_render(
		intel_hws_preempt_done_address(engine),
		GUC_PREEMPT_FINISHED, cs);
	*cs++ = MI_USER_INTERRUPT;
	*cs++ = MI_NOOP;
	if (engine->id != RCS) {
		*cs++ = MI_NOOP;
		*cs++ = MI_NOOP;
	}

> +
> +	ce->ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32);
> +	ce->ring->tail &= (ce->ring->size - 1);
> +
> +	if (i915_vma_is_map_and_fenceable(ce->ring->vma))
> +		POSTING_READ_FW(GUC_STATUS);
> +
> +	spin_lock_irq(&client->wq_lock);
> +	guc_wq_item_append(client, engine->guc_id, ctx_desc,
> +			   ce->ring->tail / sizeof(u64), 0);
> +	spin_unlock_irq(&client->wq_lock);
> +
> +	data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION;
> +	data[1] = client->stage_id;
> +	data[2] = INTEL_GUC_PREEMPT_OPTION_IMMEDIATE |
> +		  INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q |
> +		  INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q;
> +	data[3] = engine->guc_id;
> +	data[4] = guc->execbuf_client->priority;

Hmm, this is always GUC_CLIENT_PRIORITY_KMD_NORMAL, right ?

> +	data[5] = guc->execbuf_client->stage_id;
> +	data[6] = guc->shared_data_offset;
> +

Please pull above action code into new helper

	int guc_send_preemption_request(guc, engine_guc_id)
	{
		data[] { ... };
		return intel_guc_send(guc, data, ARRAY_SIZE(data));
	}

> +	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
> +		WRITE_ONCE(engine->execlists.preempt, false);
> +		tasklet_schedule(&engine->execlists.irq_tasklet);
> +	}
> +}
> +

<snip>

> @@ -177,6 +177,8 @@ static int guc_enable_communication(struct intel_guc  
> *guc)
> 	guc->shared_data_offset = guc_ggtt_offset(ctx->engine[RCS].state) +
>  		LRC_GUCSHR_PN * PAGE_SIZE;
> +	guc->shared_data_addr = (void*)ctx->engine[RCS].lrc_reg_state -
> +		LRC_STATE_PN * PAGE_SIZE;

Please pick better place (see my comment in 1/10)

> 	if (HAS_GUC_CT(dev_priv))
>  		return intel_guc_enable_ct(guc);
> diff --git a/drivers/gpu/drm/i915/intel_uc.h  
> b/drivers/gpu/drm/i915/intel_uc.h
> index c6c6f8513bbf..8e2818e5741e 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -124,6 +124,9 @@ struct intel_guc {
> 	/* Kernel context state ggtt offset, first page is shared with GuC */
>  	u32 shared_data_offset;
> +	void *shared_data_addr;
> +
> +	struct workqueue_struct *preempt_wq;

Please pick better place (see my comment in 1/10)

> 	/* GuC's FW specific send function */
>  	int (*send)(struct intel_guc *guc, const u32 *data, u32 len);