[Intel-gfx] [PATCH] drm/i915: Trace GEM steps between submit and wedging

Mika Kuoppala mika.kuoppala at linux.intel.com
Thu Mar 15 13:13:23 UTC 2018


Chris Wilson <chris at chris-wilson.co.uk> writes:

> We still have an odd race with wedging/unwedging as shown by igt/gem_eio
> that defies expectations. Add some more trace_printks to try and
> visualize the flow over the precipice.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c     | 14 ++++++++++++++
>  drivers/gpu/drm/i915/i915_request.c | 18 ++++++++++++++++++
>  2 files changed, 32 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 13d4b0e74641..2fbd622bba30 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3193,6 +3193,9 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
>  
>  static void nop_submit_request(struct i915_request *request)
>  {
> +	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
> +		  request->engine->name,
> +		  request->fence.context, request->fence.seqno);
>  	dma_fence_set_error(&request->fence, -EIO);
>  
>  	i915_request_submit(request);
> @@ -3202,6 +3205,9 @@ static void nop_complete_submit_request(struct i915_request *request)
>  {
>  	unsigned long flags;
>  
> +	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
> +		  request->engine->name,
> +		  request->fence.context, request->fence.seqno);

Looking at some example ftrace outputs, it should be easy to
make a distiction between the two phases.

Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>


>  	dma_fence_set_error(&request->fence, -EIO);
>  
>  	spin_lock_irqsave(&request->engine->timeline->lock, flags);
> @@ -3215,6 +3221,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
>  
> +	GEM_TRACE("start\n");
> +
>  	if (drm_debug & DRM_UT_DRIVER) {
>  		struct drm_printer p = drm_debug_printer(__func__);
>  
> @@ -3279,6 +3287,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
>  		i915_gem_reset_finish_engine(engine);
>  	}
>  
> +	GEM_TRACE("end\n");
> +
>  	wake_up_all(&i915->gpu_error.reset_queue);
>  }
>  
> @@ -3291,6 +3301,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
>  		return true;
>  
> +	GEM_TRACE("start\n");
> +
>  	/*
>  	 * Before unwedging, make sure that all pending operations
>  	 * are flushed and errored out - we may have requests waiting upon
> @@ -3341,6 +3353,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	intel_engines_reset_default_submission(i915);
>  	i915_gem_contexts_lost(i915);
>  
> +	GEM_TRACE("end\n");
> +
>  	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
>  	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
>  
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 1810fa1b81cb..fac1056422a5 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -381,6 +381,11 @@ static void i915_request_retire(struct i915_request *request)
>  	struct intel_engine_cs *engine = request->engine;
>  	struct i915_gem_active *active, *next;
>  
> +	GEM_TRACE("%s fence %llx:%d, global_seqno %d\n",
> +		  engine->name,
> +		  request->fence.context, request->fence.seqno,
> +		  request->global_seqno);
> +
>  	lockdep_assert_held(&request->i915->drm.struct_mutex);
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
>  	GEM_BUG_ON(!i915_request_completed(request));
> @@ -488,6 +493,11 @@ void __i915_request_submit(struct i915_request *request)
>  	struct intel_timeline *timeline;
>  	u32 seqno;
>  
> +	GEM_TRACE("%s fence %llx:%d -> global_seqno %d\n",
> +		  request->engine->name,
> +		  request->fence.context, request->fence.seqno,
> +		  engine->timeline->seqno);
> +
>  	GEM_BUG_ON(!irqs_disabled());
>  	lockdep_assert_held(&engine->timeline->lock);
>  
> @@ -537,6 +547,11 @@ void __i915_request_unsubmit(struct i915_request *request)
>  	struct intel_engine_cs *engine = request->engine;
>  	struct intel_timeline *timeline;
>  
> +	GEM_TRACE("%s fence %llx:%d <- global_seqno %d\n",
> +		  request->engine->name,
> +		  request->fence.context, request->fence.seqno,
> +		  request->global_seqno);
> +
>  	GEM_BUG_ON(!irqs_disabled());
>  	lockdep_assert_held(&engine->timeline->lock);
>  
> @@ -996,6 +1011,9 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
>  	u32 *cs;
>  	int err;
>  
> +	GEM_TRACE("%s fence %llx:%d\n",
> +		  engine->name, request->fence.context, request->fence.seqno);
> +
>  	lockdep_assert_held(&request->i915->drm.struct_mutex);
>  	trace_i915_request_add(request);
>  
> -- 
> 2.16.2


More information about the Intel-gfx mailing list