[Intel-gfx] [PATCH] drm/i915: Stop propagating fence errors by default
Jason Ekstrand
jason at jlekstrand.net
Mon May 10 01:36:16 UTC 2021
On May 7, 2021 03:35:33 Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com> wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> This is an alternative proposed fix for the below references bug report
> where dma fence error propagation is causing undesirable change in
> behaviour post GPU hang/reset.
>
> Approach in this patch is to simply stop propagating all dma fence errors
> by default since that seems to be the upstream ask.
>
> To handle the case where i915 needs error propagation for security, I add
> a new dma fence flag DMA_FENCE_FLAG_PROPAGATE_ERROR and make use of it in
> the command parsing chain only.
This sounds plausible. I can't review in full without doing a thorough
audit of the surrounding code, though. I'll try to get to that next week if
Daniel doesn't beat me to it. Thanks for working on this!
--Jason
> It sounds a plausible argument that fence propagation could be useful in
> which case a core flag to enable opt-in should be universally useful.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reported-by: Marcin Slusarz <marcin.slusarz at intel.com>
> Reported-by: Miroslav Bendik
> References: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already
> signaled fences")
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/3080
> Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 ++
> drivers/gpu/drm/i915/i915_sw_fence.c | 8 ++++----
> drivers/gpu/drm/i915/i915_sw_fence.h | 8 ++++++++
> include/linux/dma-fence.h | 1 +
> 4 files changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 297143511f99..6a516d1261d0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -2522,6 +2522,8 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
> }
>
> dma_fence_work_init(&pw->base, &eb_parse_ops);
> + /* Propagate errors for security. */
> + __set_bit(DMA_FENCE_FLAG_PROPAGATE_ERROR, &pw->base.dma.flags);
>
> pw->engine = eb->engine;
> pw->batch = eb->batch->vma;
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c
> b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 2744558f3050..2ee917932ccf 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -449,7 +449,7 @@ static void dma_i915_sw_fence_wake_timer(struct
> dma_fence *dma,
>
> fence = xchg(&cb->base.fence, NULL);
> if (fence) {
> - i915_sw_fence_set_error_once(fence, dma->error);
> + i915_sw_fence_propagate_dma_fence_error(fence, dma);
> i915_sw_fence_complete(fence);
> }
>
> @@ -480,7 +480,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence
> *fence,
> might_sleep_if(gfpflags_allow_blocking(gfp));
>
> if (dma_fence_is_signaled(dma)) {
> - i915_sw_fence_set_error_once(fence, dma->error);
> + i915_sw_fence_propagate_dma_fence_error(fence, dma);
> return 0;
> }
>
> @@ -496,7 +496,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence
> *fence,
> if (ret)
> return ret;
>
> - i915_sw_fence_set_error_once(fence, dma->error);
> + i915_sw_fence_propagate_dma_fence_error(fence, dma);
> return 0;
> }
>
> @@ -548,7 +548,7 @@ int __i915_sw_fence_await_dma_fence(struct
> i915_sw_fence *fence,
> debug_fence_assert(fence);
>
> if (dma_fence_is_signaled(dma)) {
> - i915_sw_fence_set_error_once(fence, dma->error);
> + i915_sw_fence_propagate_dma_fence_error(fence, dma);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h
> b/drivers/gpu/drm/i915/i915_sw_fence.h
> index 30a863353ee6..872ef80ebd10 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -116,4 +116,12 @@ i915_sw_fence_set_error_once(struct i915_sw_fence
> *fence, int error)
> cmpxchg(&fence->error, 0, error);
> }
>
> +static inline void
> +i915_sw_fence_propagate_dma_fence_error(struct i915_sw_fence *fence,
> + struct dma_fence *dma)
> +{
> + if (unlikely(test_bit(DMA_FENCE_FLAG_PROPAGATE_ERROR, &dma->flags)))
> + i915_sw_fence_set_error_once(fence, dma->error);
> +}
> +
> #endif /* _I915_SW_FENCE_H_ */
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 6ffb4b2c6371..8dabe1650f11 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
> DMA_FENCE_FLAG_SIGNALED_BIT,
> DMA_FENCE_FLAG_TIMESTAMP_BIT,
> DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> + DMA_FENCE_FLAG_PROPAGATE_ERROR,
> DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> };
>
> --
> 2.30.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20210509/6a21fa76/attachment.htm>
More information about the Intel-gfx
mailing list