[Intel-gfx] [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Apr 23 12:59:47 UTC 2019


On 17/04/2019 08:56, Chris Wilson wrote:
> Currently there is an underlying assumption that i915_request_unsubmit()
> is synchronous wrt the GPU -- that is the request is no longer in flight
> as we remove it. In the near future that may change, and this may upset
> our signaling as we can process an interrupt for that request while it
> is no longer in flight.
> 
> CPU0					CPU1
> intel_engine_breadcrumbs_irq
> (queue request completion)
> 					i915_request_cancel_signaling
> ...					...
> 					i915_request_enable_signaling
> dma_fence_signal
> 
> Hence in the time it took us to drop the lock to signal the request, a
> preemption event may have occurred and re-queued the request. In the
> process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
> so reused the rq->signal_link that was in use on CPU0, leading to bad
> pointer chasing in intel_engine_breadcrumbs_irq.
> 
> A related issue was that if someone started listening for a signal on a
> completed but no longer in-flight request, we missed the opportunity to
> immediately signal that request.
> 
> Furthermore, as intel_contexts may be immediately released during
> request retirement, in order to be entirely sure that
> intel_engine_breadcrumbs_irq may no longer dereference the intel_context
> (ce->signals and ce->signal_link), we must wait for irq spinlock.
> 
> In order to prevent the race, we use a bit in the fence.flags to signal
> the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
> For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
> quickly signals to any outside observer that the fence is indeed signaled.
> 
> Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/dma-buf/dma-fence.c              |  1 +
>   drivers/gpu/drm/i915/i915_request.c      |  1 +
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 52 ++++++++++++++----------
>   3 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 3aa8733f832a..9bf06042619a 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -29,6 +29,7 @@
>   
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
>   
>   static DEFINE_SPINLOCK(dma_fence_stub_lock);
>   static struct dma_fence dma_fence_stub;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index b836721d3b13..e0efc334463b 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -432,6 +432,7 @@ void __i915_request_submit(struct i915_request *request)
>   	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
>   
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
> +	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
>   	    !i915_request_enable_breadcrumb(request))
>   		intel_engine_queue_breadcrumbs(engine);
>   
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 3cbffd400b1b..e19f84b006cc 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/kthread.h>
> +#include <trace/events/dma_fence.h>
>   #include <uapi/linux/sched/types.h>
>   
>   #include "i915_drv.h"
> @@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   {
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	const ktime_t timestamp = ktime_get();
>   	struct intel_context *ce, *cn;
>   	struct list_head *pos, *next;
>   	LIST_HEAD(signal);
> @@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   
>   			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
>   					     &rq->fence.flags));
> +			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +
> +			if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +					     &rq->fence.flags))
> +				continue;

 From here to below is intimate coupling with the dma_fence_signal 
implementation, via open-coding it (with some optimizations as well).

I am thinking about this solution.. here we put:

	if (!__dma_fence_maybe_signal(&rq->fence))
		continue;

Add the low-level helper to dma-fence.c and export it.

And below..

>   
>   			/*
>   			 * Queue for execution after dropping the signaling
> @@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   			 * more signalers to the same context or engine.
>   			 */
>   			i915_request_get(rq);
> -
> -			/*
> -			 * We may race with direct invocation of
> -			 * dma_fence_signal(), e.g. i915_request_retire(),
> -			 * so we need to acquire our reference to the request
> -			 * before we cancel the breadcrumb.
> -			 */
> -			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   			list_add_tail(&rq->signal_link, &signal);
>   		}
>   
> @@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   	list_for_each_safe(pos, next, &signal) {
>   		struct i915_request *rq =
>   			list_entry(pos, typeof(*rq), signal_link);
> +		struct dma_fence_cb *cur, *tmp;
> +
> +		trace_dma_fence_signaled(&rq->fence);
> +
> +		rq->fence.timestamp = timestamp;
> +		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
> +
> +		spin_lock(&rq->lock);
> +		list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
> +			INIT_LIST_HEAD(&cur->node);
> +			cur->func(&rq->fence, cur);
> +		}
> +		INIT_LIST_HEAD(&rq->fence.cb_list);
> +		spin_unlock(&rq->lock);

..we do:

   __dma_fence_complete/force/finish_signal(&rq->fence, timestamp));

Also export etc, instead of the whole above block.

This way it is both self-documenting in our code and we remove the 
intimate coupling with dma-fence implementation details.

No need to export the tracepoint then either.

(You can have a prequel patch to optimise the list_del_init in 
dma_fence_signal.)

Thoughts?

Regards,

Tvrtko

>   
> -		dma_fence_signal(&rq->fence);
>   		i915_request_put(rq);
>   	}
>   }
> @@ -243,19 +255,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
>   
>   bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   {
> -	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
> -
> -	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> -
> -	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
> -		return true;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
> -	spin_lock(&b->irq_lock);
> -	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
> -	    !__request_completed(rq)) {
> +	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
> +		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   		struct intel_context *ce = rq->hw_context;
>   		struct list_head *pos;
>   
> +		spin_lock(&b->irq_lock);
> +		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> +
>   		__intel_breadcrumbs_arm_irq(b);
>   
>   		/*
> @@ -284,8 +294,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   			list_move_tail(&ce->signal_link, &b->signalers);
>   
>   		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +		spin_unlock(&b->irq_lock);
>   	}
> -	spin_unlock(&b->irq_lock);
>   
>   	return !__request_completed(rq);
>   }
> @@ -294,8 +304,8 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>   {
>   	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   
> -	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> -		return;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
>   	spin_lock(&b->irq_lock);
>   	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
> 


More information about the Intel-gfx mailing list