[Intel-gfx] drm/i915: Watchdog timeout: Ringbuffer command emission for gen8+

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Jan 7 12:21:08 UTC 2019


On 05/01/2019 02:39, Carlos Santa wrote:
> From: Michel Thierry <michel.thierry at intel.com>
> 
> Emit the required commands into the ring buffer for starting and
> stopping the watchdog timer before/after batch buffer start during
> batch buffer submission.
> 
> v2: Support watchdog threshold per context engine, merge lri commands,
> and move watchdog commands emission to emit_bb_start. Request space of
> combined start_watchdog, bb_start and stop_watchdog to avoid any error
> after emitting bb_start.
> 
> v3: There were too many req->engine in emit_bb_start.
> Use GEM_BUG_ON instead of returning a very late EINVAL in the remote
> case of watchdog misprogramming; set correct LRI cmd size in
> emit_stop_watchdog. (Chris)
> 
> v4: Rebase.
> v5: use to_intel_context instead of ctx->engine.
> v6: Rebase.
> 
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Antonio Argenziano <antonio.argenziano at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
> Signed-off-by: Carlos Santa <carlos.santa at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.h |  4 ++
>   drivers/gpu/drm/i915/intel_lrc.c        | 80 ++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 ++
>   3 files changed, 86 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index f6d870b1f73e..62f4eb04985b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -169,6 +169,10 @@ struct i915_gem_context {
>   		u32 *lrc_reg_state;
>   		u64 lrc_desc;
>   		int pin_count;
> +		/** watchdog_threshold: hw watchdog threshold value,
> +		 * in clock counts
> +		 */
> +		u32 watchdog_threshold;
>   
>   		const struct intel_context_ops *ops;
>   	} __engine[I915_NUM_ENGINES];
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e1dcdf545bee..0ea5a37c3357 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1872,12 +1872,33 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>   			      u64 offset, u32 len,
>   			      const unsigned int flags)
>   {
> +	struct intel_engine_cs *engine = rq->engine;
>   	u32 *cs;
> +	u32 num_dwords;
> +	bool watchdog_running = false;

Hm it's not really running but should be running, but ok. I'd just find 
something lie 'run_watchdog' or 'enable_watchdog' clearer.

>   
> -	cs = intel_ring_begin(rq, 6);
> +	/* bb_start only */
> +	num_dwords = 6;
> +
> +	/* check if watchdog will be required */
> +	if (to_intel_context(rq->gem_context, engine)->watchdog_threshold != 0) {
> +		GEM_BUG_ON(!engine->emit_start_watchdog ||
> +			   !engine->emit_stop_watchdog);
> +
> +		/* + start_watchdog (6) + stop_watchdog (4) */
> +		num_dwords += 10;
> +		watchdog_running = true;
> +        }
> +
> +	cs = intel_ring_begin(rq, num_dwords);
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> +	if (watchdog_running) {
> +		/* Start watchdog timer */
> +		cs = engine->emit_start_watchdog(rq, cs);
> +	}
> +
>   	/*
>   	 * WaDisableCtxRestoreArbitration:bdw,chv
>   	 *
> @@ -1906,8 +1927,12 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>   	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
>   	*cs++ = MI_NOOP;

It could be neater to avoid to many MI_NOOPs. I think if you'd make emit 
start/stop not emit it, and moved this line above to be last, a single 
MI_MOOP could be emitted conditionally if the total number of emitted 
(based on cs pointer) is odd.

>   
> -	intel_ring_advance(rq, cs);
> +	if (watchdog_running) {
> +		/* Cancel watchdog timer */
> +		cs = engine->emit_stop_watchdog(rq, cs);
> +	}
>   
> +	intel_ring_advance(rq, cs);
>   	return 0;
>   }
>   
> @@ -2091,6 +2116,49 @@ static void gen8_watchdog_irq_handler(unsigned long data)
>   	intel_uncore_forcewake_put(dev_priv, fw_domains);
>   }
>   
> +static u32 *gen8_emit_start_watchdog(struct i915_request *rq, u32 *cs)
> +{
> +	struct intel_engine_cs *engine = rq->engine;
> +	struct i915_gem_context *ctx = rq->gem_context;
> +	struct intel_context *ce = to_intel_context(ctx, engine);
> +
> +	/* XXX: no watchdog support in BCS engine */
> +	GEM_BUG_ON(engine->id == BCS);
> +
> +	/*
> +	 * watchdog register must never be programmed to zero. This would
> +	 * cause the watchdog counter to exceed and not allow the engine to

Waht does exceed mean here?

> +	 * go into IDLE state
> +	 */
> +	GEM_BUG_ON(ce->watchdog_threshold == 0);
> +
> +	/* Set counter period */
> +	*cs++ = MI_LOAD_REGISTER_IMM(2);
> +	*cs++ = i915_mmio_reg_offset(RING_THRESH(engine->mmio_base));
> +	*cs++ = ce->watchdog_threshold;
> +	/* Start counter */
> +	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
> +	*cs++ = GEN8_WATCHDOG_ENABLE;
> +	*cs++ = MI_NOOP;
> +
> +	return cs;
> +}
> +
> +static u32 *gen8_emit_stop_watchdog(struct i915_request *rq, u32 *cs)
> +{
> +	struct intel_engine_cs *engine = rq->engine;
> +
> +	/* XXX: no watchdog support in BCS engine */
> +	GEM_BUG_ON(engine->id == BCS);

I think there will be enough of these checks (including the previous 
patch) to warrant engine->flags |= I915_ENGINE_SUPPORTS_WATCHDOG.

> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
> +	*cs++ = get_watchdog_disable(engine);

Since this helper now potentially runs on every bb end perhaps it is 
worth storing the magic value in the engine instead of running the 
conditionals every time.

> +	*cs++ = MI_NOOP;
> +
> +	return cs;
> +}
> +
>   /*
>    * Reserve space for 2 NOOPs at the end of each request to be
>    * used as a workaround for not being allowed to do lite
> @@ -2366,6 +2434,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>   	engine->emit_flush = gen8_emit_flush_render;
>   	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
>   	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
> +	engine->emit_start_watchdog = gen8_emit_start_watchdog;
> +	engine->emit_stop_watchdog = gen8_emit_stop_watchdog;

I don't see that these two need to be vfuncs - am I missing something?

>   
>   	ret = logical_ring_init(engine);
>   	if (ret)
> @@ -2392,6 +2462,12 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
>   {
>   	logical_ring_setup(engine);
>   
> +	/* BCS engine does not have a watchdog-expired irq */
> +	if (engine->id != BCS) {
> +		engine->emit_start_watchdog = gen8_emit_start_watchdog;
> +		engine->emit_stop_watchdog = gen8_emit_stop_watchdog;
> +	}
> +
>   	return logical_ring_init(engine);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6cb8b4280035..c7aa842a2db1 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -469,6 +469,10 @@ struct intel_engine_cs {
>   	int		(*init_context)(struct i915_request *rq);
>   
>   	int		(*emit_flush)(struct i915_request *request, u32 mode);
> +	u32 *		(*emit_start_watchdog)(struct i915_request *rq,
> +					       u32 *cs);
> +	u32 *		(*emit_stop_watchdog)(struct i915_request *rq,
> +					      u32 *cs);
>   #define EMIT_INVALIDATE	BIT(0)
>   #define EMIT_FLUSH	BIT(1)
>   #define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)
> 

Regards,

Tvrtko


More information about the Intel-gfx mailing list