[PATCH 7/7] dma-buf: rework the enable_signaling handling
Tvrtko Ursulin
tursulin at ursulin.net
Fri Sep 13 09:57:02 UTC 2024
On 11/09/2024 09:59, Christian König wrote:
> The enable_signaling callback is the only function the dma_fence
> objects calls with the fence lock held (the signaled callback might be
> called with the fence lock held as well, but that isn't guaranted).
>
> The background of that decision was to avoid races with other
> CPUs trying to signal the fence at the same time and potentially
> enforce an ordering of fence signaling.
>
> The only problem is that this never worked correctly.
>
> First of all the enabling_signaling call can still race with
> signaling a fence, it's just that informing the installed callbacks
> is blocking for the enable signaling to finish. If that is required
> (radeon is an example of that) then drivers can still grab the fence
> themselves, everybody else doesn't need that.
>
> Then regarding fence ordering it is perfectly possible that fences
> emitted in the order A,B,C call their installed callbacks in the
> order B, C, A. The background is that the optimization to signal
> fences from dma_fence_is_signaled() decouples the fence signaling
> from the interrupt handlers. The result is that fence C can signal
> because somebody queried it's state while A and B still wait for their
> interrupt to arrive.
>
> While those two reasons are just unnecessary churn the documentation
> is simply erroneous and suggests an illegal operation to
> implementations: "This function can be called from atomic context,
> but not from irq context, so normal spinlocks can be used.". Since
> the enable_signaling callback was called with interrupts disabled that
> practice could deadlock.
>
> Furtunately nobody actually ran into problems with that, but
> considering that we should probably re-work the locking to allow
> dma_fence objects to exists after their drivers were unloaded this
> patch re-works all this to not call the callback with the dma_fence
> spinlock held and rather move the handling into the drivers which
> actually need it.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/dma-buf/dma-fence-array.c | 7 +-
> drivers/dma-buf/dma-fence-chain.c | 13 ++--
> drivers/dma-buf/dma-fence.c | 68 +++++++------------
> drivers/dma-buf/st-dma-fence-chain.c | 4 +-
> drivers/dma-buf/st-dma-fence-unwrap.c | 22 +++---
> drivers/dma-buf/st-dma-fence.c | 16 ++---
> drivers/dma-buf/st-dma-resv.c | 10 +--
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 12 ++--
> drivers/gpu/drm/i915/i915_active.c | 2 +-
> drivers/gpu/drm/i915/i915_request.c | 12 +++-
> drivers/gpu/drm/nouveau/nouveau_fence.c | 9 ++-
> drivers/gpu/drm/radeon/radeon_fence.c | 17 +++--
> drivers/gpu/drm/ttm/ttm_bo.c | 2 +-
> drivers/gpu/drm/xe/xe_bo.c | 2 +-
> drivers/gpu/drm/xe/xe_hw_fence.c | 4 +-
> drivers/gpu/drm/xe/xe_preempt_fence.c | 3 +-
> drivers/gpu/drm/xe/xe_pt.c | 2 +-
> drivers/gpu/drm/xe/xe_sched_job.c | 2 +-
> drivers/gpu/drm/xe/xe_vm.c | 6 +-
> drivers/gpu/host1x/fence.c | 14 ++--
> include/linux/dma-fence.h | 35 +++-------
> 21 files changed, 123 insertions(+), 139 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
> index c74ac197d5fe..1022b08c9b42 100644
> --- a/drivers/dma-buf/dma-fence-array.c
> +++ b/drivers/dma-buf/dma-fence-array.c
> @@ -67,7 +67,7 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
> dma_fence_put(&array->base);
> }
>
> -static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
> +static void dma_fence_array_enable_signaling(struct dma_fence *fence)
> {
> struct dma_fence_array *array = to_dma_fence_array(fence);
> struct dma_fence_array_cb *cb = array->callbacks;
> @@ -92,12 +92,11 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
> dma_fence_put(&array->base);
> if (atomic_dec_and_test(&array->num_pending)) {
> dma_fence_array_clear_pending_error(array);
> - return false;
> + dma_fence_signal(&array->base);
> + return;
> }
> }
> }
> -
> - return true;
> }
>
> static bool dma_fence_array_signaled(struct dma_fence *fence)
> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> index 9663ba1bb6ac..f56baa214a6c 100644
> --- a/drivers/dma-buf/dma-fence-chain.c
> +++ b/drivers/dma-buf/dma-fence-chain.c
> @@ -9,7 +9,7 @@
>
> #include <linux/dma-fence-chain.h>
>
> -static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
> +static void dma_fence_chain_enable_signaling(struct dma_fence *fence);
>
> /**
> * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
> @@ -125,10 +125,7 @@ static void dma_fence_chain_irq_work(struct irq_work *work)
>
> chain = container_of(work, typeof(*chain), work);
>
> - /* Try to rearm the callback */
> - if (!dma_fence_chain_enable_signaling(&chain->base))
> - /* Ok, we are done. No more unsignaled fences left */
> - dma_fence_signal(&chain->base);
> + dma_fence_chain_enable_signaling(&chain->base);
> dma_fence_put(&chain->base);
> }
>
> @@ -142,7 +139,7 @@ static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> dma_fence_put(f);
> }
>
> -static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
> +static void dma_fence_chain_enable_signaling(struct dma_fence *fence)
> {
> struct dma_fence_chain *head = to_dma_fence_chain(fence);
>
> @@ -153,12 +150,12 @@ static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
> dma_fence_get(f);
> if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
> dma_fence_put(fence);
> - return true;
> + return;
> }
> dma_fence_put(f);
> }
> dma_fence_put(&head->base);
> - return false;
> + dma_fence_signal(fence);
> }
>
> static bool dma_fence_chain_signaled(struct dma_fence *fence)
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 325a263ac798..f0084a7aeebe 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -509,7 +509,7 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
>
> __dma_fence_might_wait();
>
> - dma_fence_enable_sw_signaling(fence);
> + dma_fence_enable_signaling(fence);
I like the rename since sw in the name always annoyed me. What about
splitting it out to a standalone patch though? If it comes first, this
one gets a bit smaller and easier to review.
>
> trace_dma_fence_wait_start(fence);
> if (fence->ops->wait)
> @@ -576,47 +576,30 @@ void dma_fence_free(struct dma_fence *fence)
> }
> EXPORT_SYMBOL(dma_fence_free);
>
> -static bool __dma_fence_enable_signaling(struct dma_fence *fence)
> +/**
> + * dma_fence_enable_signaling - tell implementation that fence must signal
> + * @fence: the fence to enable
> + *
> + * This will request for signaling to be enabled, to make the fence complete
> + * as soon as possible. This calls &dma_fence_ops.enable_signaling internally.
> + */
> +void dma_fence_enable_signaling(struct dma_fence *fence)
> {
> bool was_set;
>
> - lockdep_assert_held(fence->lock);
> -
> was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> &fence->flags);
>
> if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> - return false;
> + return;
>
> if (!was_set && fence->ops->enable_signaling) {
> trace_dma_fence_enable_signal(fence);
>
> - if (!fence->ops->enable_signaling(fence)) {
> - dma_fence_signal_locked(fence);
> - return false;
> - }
> + fence->ops->enable_signaling(fence);
> }
> -
> - return true;
> -}
> -
> -/**
> - * dma_fence_enable_sw_signaling - enable signaling on fence
> - * @fence: the fence to enable
> - *
> - * This will request for sw signaling to be enabled, to make the fence
> - * complete as soon as possible. This calls &dma_fence_ops.enable_signaling
> - * internally.
> - */
> -void dma_fence_enable_sw_signaling(struct dma_fence *fence)
> -{
> - unsigned long flags;
> -
> - spin_lock_irqsave(fence->lock, flags);
> - __dma_fence_enable_signaling(fence);
> - spin_unlock_irqrestore(fence->lock, flags);
> }
> -EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
> +EXPORT_SYMBOL(dma_fence_enable_signaling);
>
> /**
> * dma_fence_add_callback - add a callback to be called when the fence
> @@ -644,29 +627,30 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
> dma_fence_func_t func)
> {
> unsigned long flags;
> - int ret = 0;
>
> if (WARN_ON(!fence || !func))
> return -EINVAL;
>
> - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
> - INIT_LIST_HEAD(&cb->node);
> - return -ENOENT;
> - }
> + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> + goto out_signaled;
> +
> + dma_fence_enable_signaling(fence);
>
> spin_lock_irqsave(fence->lock, flags);
On a low-level, if time comes to that after the high-level discussion,
this affectively drops and re-acquires the lock. Which raises questions
about correctness and, if not that, why not optimize it.
> + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> + goto out_unlock;
>
> - if (__dma_fence_enable_signaling(fence)) {
> - cb->func = func;
> - list_add_tail(&cb->node, &fence->cb_list);
> - } else {
> - INIT_LIST_HEAD(&cb->node);
> - ret = -ENOENT;
> - }
> + cb->func = func;
> + list_add_tail(&cb->node, &fence->cb_list);
> + spin_unlock_irqrestore(fence->lock, flags);
> + return 0;
>
> +out_unlock:
> spin_unlock_irqrestore(fence->lock, flags);
>
> - return ret;
> +out_signaled:
> + INIT_LIST_HEAD(&cb->node);
> + return -ENOENT;
> }
> EXPORT_SYMBOL(dma_fence_add_callback);
>
> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
> index ed4b323886e4..455f6f6c3b90 100644
> --- a/drivers/dma-buf/st-dma-fence-chain.c
> +++ b/drivers/dma-buf/st-dma-fence-chain.c
> @@ -85,7 +85,7 @@ static int sanitycheck(void *arg)
>
> chain = mock_chain(NULL, f, 1);
> if (chain)
> - dma_fence_enable_sw_signaling(chain);
> + dma_fence_enable_signaling(chain);
> else
> err = -ENOMEM;
>
> @@ -146,7 +146,7 @@ static int fence_chains_init(struct fence_chains *fc, unsigned int count,
>
> fc->tail = fc->chains[i];
>
> - dma_fence_enable_sw_signaling(fc->chains[i]);
> + dma_fence_enable_signaling(fc->chains[i]);
> }
>
> fc->chain_length = i;
> diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c
> index f0cee984b6c7..1a6cef01fda9 100644
> --- a/drivers/dma-buf/st-dma-fence-unwrap.c
> +++ b/drivers/dma-buf/st-dma-fence-unwrap.c
> @@ -102,7 +102,7 @@ static int sanitycheck(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> array = mock_array(1, f);
> if (!array)
> @@ -126,7 +126,7 @@ static int unwrap_array(void *arg)
> if (!f1)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f1);
> + dma_fence_enable_signaling(f1);
>
> f2 = mock_fence();
> if (!f2) {
> @@ -134,7 +134,7 @@ static int unwrap_array(void *arg)
> return -ENOMEM;
> }
>
> - dma_fence_enable_sw_signaling(f2);
> + dma_fence_enable_signaling(f2);
>
> array = mock_array(2, f1, f2);
> if (!array)
> @@ -170,7 +170,7 @@ static int unwrap_chain(void *arg)
> if (!f1)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f1);
> + dma_fence_enable_signaling(f1);
>
> f2 = mock_fence();
> if (!f2) {
> @@ -178,7 +178,7 @@ static int unwrap_chain(void *arg)
> return -ENOMEM;
> }
>
> - dma_fence_enable_sw_signaling(f2);
> + dma_fence_enable_signaling(f2);
>
> chain = mock_chain(f1, f2);
> if (!chain)
> @@ -214,7 +214,7 @@ static int unwrap_chain_array(void *arg)
> if (!f1)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f1);
> + dma_fence_enable_signaling(f1);
>
> f2 = mock_fence();
> if (!f2) {
> @@ -222,7 +222,7 @@ static int unwrap_chain_array(void *arg)
> return -ENOMEM;
> }
>
> - dma_fence_enable_sw_signaling(f2);
> + dma_fence_enable_signaling(f2);
>
> array = mock_array(2, f1, f2);
> if (!array)
> @@ -262,7 +262,7 @@ static int unwrap_merge(void *arg)
> if (!f1)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f1);
> + dma_fence_enable_signaling(f1);
>
> f2 = mock_fence();
> if (!f2) {
> @@ -270,7 +270,7 @@ static int unwrap_merge(void *arg)
> goto error_put_f1;
> }
>
> - dma_fence_enable_sw_signaling(f2);
> + dma_fence_enable_signaling(f2);
>
> f3 = dma_fence_unwrap_merge(f1, f2);
> if (!f3) {
> @@ -314,13 +314,13 @@ static int unwrap_merge_complex(void *arg)
> if (!f1)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f1);
> + dma_fence_enable_signaling(f1);
>
> f2 = mock_fence();
> if (!f2)
> goto error_put_f1;
>
> - dma_fence_enable_sw_signaling(f2);
> + dma_fence_enable_signaling(f2);
>
> f3 = dma_fence_unwrap_merge(f1, f2);
> if (!f3)
> diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c
> index 6a1bfcd0cc21..eb740b4fe4fe 100644
> --- a/drivers/dma-buf/st-dma-fence.c
> +++ b/drivers/dma-buf/st-dma-fence.c
> @@ -102,7 +102,7 @@ static int sanitycheck(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_fence_signal(f);
> dma_fence_put(f);
> @@ -119,7 +119,7 @@ static int test_signaling(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> if (dma_fence_is_signaled(f)) {
> pr_err("Fence unexpectedly signaled on creation\n");
> @@ -194,7 +194,7 @@ static int test_late_add_callback(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_fence_signal(f);
>
> @@ -288,7 +288,7 @@ static int test_status(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> if (dma_fence_get_status(f)) {
> pr_err("Fence unexpectedly has signaled status on creation\n");
> @@ -316,7 +316,7 @@ static int test_error(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_fence_set_error(f, -EIO);
>
> @@ -347,7 +347,7 @@ static int test_wait(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> if (dma_fence_wait_timeout(f, false, 0) != -ETIME) {
> pr_err("Wait reported complete before being signaled\n");
> @@ -391,7 +391,7 @@ static int test_wait_timeout(void *arg)
> if (!wt.f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(wt.f);
> + dma_fence_enable_signaling(wt.f);
>
> if (dma_fence_wait_timeout(wt.f, false, 1) != -ETIME) {
> pr_err("Wait reported complete before being signaled\n");
> @@ -472,7 +472,7 @@ static int thread_signal_callback(void *arg)
> break;
> }
>
> - dma_fence_enable_sw_signaling(f1);
> + dma_fence_enable_signaling(f1);
>
> rcu_assign_pointer(t->fences[t->id], f1);
> smp_wmb();
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index 15dbea1462ed..f030b43ee4da 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -45,7 +45,7 @@ static int sanitycheck(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_fence_signal(f);
> dma_fence_put(f);
> @@ -71,7 +71,7 @@ static int test_signaling(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> @@ -118,7 +118,7 @@ static int test_for_each(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> @@ -179,7 +179,7 @@ static int test_for_each_unlocked(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> @@ -252,7 +252,7 @@ static int test_get_fences(void *arg)
> if (!f)
> return -ENOMEM;
>
> - dma_fence_enable_sw_signaling(f);
> + dma_fence_enable_signaling(f);
>
> dma_resv_init(&resv);
> r = dma_resv_lock(&resv, NULL);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> index 1ef758ac5076..9ec4daa23665 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> @@ -116,24 +116,24 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
> *
> * @f: dma_fence
> */
> -static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
> +static void amdkfd_fence_enable_signaling(struct dma_fence *f)
> {
> struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>
> if (!fence)
> - return false;
> + return;
>
> if (dma_fence_is_signaled(f))
> - return true;
> + return;
>
> if (!fence->svm_bo) {
> if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
> - return true;
> + return;
> } else {
> if (!svm_range_schedule_evict_svm_bo(fence))
> - return true;
> + return;
> }
> - return false;
> + dma_fence_signal(f);
> }
>
> /**
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 5ec293011d99..3f2425966263 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -562,7 +562,7 @@ static void enable_signaling(struct i915_active_fence *active)
> if (!fence)
> return;
>
> - dma_fence_enable_sw_signaling(fence);
> + dma_fence_enable_signaling(fence);
> dma_fence_put(fence);
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 519e096c607c..db4891c7cc7b 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -90,9 +90,17 @@ static bool i915_fence_signaled(struct dma_fence *fence)
> return i915_request_completed(to_request(fence));
> }
>
> -static bool i915_fence_enable_signaling(struct dma_fence *fence)
> +static void i915_fence_enable_signaling(struct dma_fence *fence)
> {
> - return i915_request_enable_breadcrumb(to_request(fence));
> + struct i915_request *rq = to_request(fence);
> + unsigned long flags;
> + bool ret;
> +
> + spin_lock_irqsave(&rq->lock, flags);
> + ret = i915_request_enable_breadcrumb(to_request(fence));
> + spin_unlock_irqrestore(&rq->lock, flags);
> + if (!ret)
> + dma_fence_signal(fence);
I didn't immediately pick up where do you add (document?) the
requirement to signal from enable signaling?
Also, again on a low level, here too would be nicer if re-lock dance was
avoided here (and in the other call backs).
> }
>
> static signed long i915_fence_wait(struct dma_fence *fence,
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 61d193e41f8c..1ad692a033c0 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -503,10 +503,11 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy = {
> .release = nouveau_fence_release
> };
>
> -static bool nouveau_fence_enable_signaling(struct dma_fence *f)
> +static void nouveau_fence_enable_signaling(struct dma_fence *f)
> {
> struct nouveau_fence *fence = from_fence(f);
> struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
> + unsigned long flags;
>
> /*
> * caller should have a reference on the fence,
> @@ -514,6 +515,7 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f)
> */
> WARN_ON(kref_read(&fence->base.refcount) <= 1);
>
> + spin_lock_irqsave(&fctx->lock, flags);
> if (!fctx->notify_ref++)
> nvif_event_allow(&fctx->event);
>
> @@ -523,10 +525,11 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f)
> dma_fence_put(&fence->base);
> if (!--fctx->notify_ref)
> nvif_event_block(&fctx->event);
> - return false;
> + spin_unlock_irqrestore(&fctx->lock, flags);
> + dma_fence_signal(f);
> } else {
> set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
> - return true;
> + spin_unlock_irqrestore(&fctx->lock, flags);
> }
> }
>
> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> index daff61586be5..fd1d617c3331 100644
> --- a/drivers/gpu/drm/radeon/radeon_fence.c
> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> @@ -378,14 +378,19 @@ static bool radeon_fence_is_signaled(struct dma_fence *f)
> * to fence_queue that checks if this fence is signaled, and if so it
> * signals the fence and removes itself.
> */
> -static bool radeon_fence_enable_signaling(struct dma_fence *f)
> +static void radeon_fence_enable_signaling(struct dma_fence *f)
> {
> struct radeon_fence *fence = to_radeon_fence(f);
> struct radeon_device *rdev = fence->rdev;
> + unsigned long flags;
>
> - if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
> - return false;
> + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq)
> + >= fence->seq) {
> + dma_fence_signal(f);
> + return;
> + }
>
> + spin_lock_irqsave(&rdev->fence_queue.lock, flags);
> if (down_read_trylock(&rdev->exclusive_lock)) {
> radeon_irq_kms_sw_irq_get(rdev, fence->ring);
>
> @@ -396,7 +401,9 @@ static bool radeon_fence_enable_signaling(struct dma_fence *f)
> if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
> radeon_irq_kms_sw_irq_put(rdev, fence->ring);
> up_read(&rdev->exclusive_lock);
> - return false;
> + spin_unlock_irqrestore(&rdev->fence_queue.lock, flags);
> + dma_fence_signal(f);
> + return;
> }
>
> up_read(&rdev->exclusive_lock);
> @@ -412,7 +419,7 @@ static bool radeon_fence_enable_signaling(struct dma_fence *f)
> fence->fence_wake.func = radeon_fence_check_signaled;
> __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
> dma_fence_get(f);
> - return true;
> + spin_unlock_irqrestore(&rdev->fence_queue.lock, flags);
> }
>
> /**
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 320592435252..9b060a82c6df 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -219,7 +219,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
> dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
> dma_resv_for_each_fence_unlocked(&cursor, fence) {
> if (!fence->ops->signaled)
> - dma_fence_enable_sw_signaling(fence);
> + dma_fence_enable_signaling(fence);
> }
> dma_resv_iter_end(&cursor);
> }
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 25d0c939ba31..3a1840560ef9 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -495,7 +495,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
> dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
> DMA_RESV_USAGE_BOOKKEEP);
> dma_resv_for_each_fence_unlocked(&cursor, fence)
> - dma_fence_enable_sw_signaling(fence);
> + dma_fence_enable_signaling(fence);
> dma_resv_iter_end(&cursor);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
> index 0b4f12be3692..77c5b16f835d 100644
> --- a/drivers/gpu/drm/xe/xe_hw_fence.c
> +++ b/drivers/gpu/drm/xe/xe_hw_fence.c
> @@ -168,7 +168,7 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
> !__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops);
> }
>
> -static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
> +static void xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
> {
> struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
> struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence);
> @@ -179,8 +179,6 @@ static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
> /* SW completed (no HW IRQ) so kick handler to signal fence */
> if (xe_hw_fence_signaled(dma_fence))
> xe_hw_fence_irq_run(irq);
> -
> - return true;
> }
>
> static void xe_hw_fence_release(struct dma_fence *dma_fence)
> diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
> index 83fbeea5aa20..963af6825875 100644
> --- a/drivers/gpu/drm/xe/xe_preempt_fence.c
> +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
> @@ -56,7 +56,7 @@ preempt_fence_get_timeline_name(struct dma_fence *fence)
> return "preempt";
> }
>
> -static bool preempt_fence_enable_signaling(struct dma_fence *fence)
> +static void preempt_fence_enable_signaling(struct dma_fence *fence)
> {
> struct xe_preempt_fence *pfence =
> container_of(fence, typeof(*pfence), base);
> @@ -64,7 +64,6 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
>
> pfence->error = q->ops->suspend(q);
> queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
> - return true;
> }
>
> static const struct dma_fence_ops preempt_fence_ops = {
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 579ed31b46db..7556bc45e592 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1236,7 +1236,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
> dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
> DMA_RESV_USAGE_BOOKKEEP);
> dma_resv_for_each_fence_unlocked(&cursor, fence)
> - dma_fence_enable_sw_signaling(fence);
> + dma_fence_enable_signaling(fence);
> dma_resv_iter_end(&cursor);
>
> err = dma_resv_wait_timeout(xe_vm_resv(vm),
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
> index 55d47450b2c6..fe2d5edac92d 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -210,7 +210,7 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error)
>
> trace_xe_sched_job_set_error(job);
>
> - dma_fence_enable_sw_signaling(job->fence);
> + dma_fence_enable_signaling(job->fence);
> xe_hw_fence_irq_run(job->q->fence_irq);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 3eb76d874eb2..b33636976669 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -259,7 +259,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
> */
> wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
> if (wait)
> - dma_fence_enable_sw_signaling(pfence);
> + dma_fence_enable_signaling(pfence);
>
> up_read(&vm->userptr.notifier_lock);
>
> @@ -289,7 +289,7 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
> --vm->preempt.num_exec_queues;
> }
> if (q->lr.pfence) {
> - dma_fence_enable_sw_signaling(q->lr.pfence);
> + dma_fence_enable_signaling(q->lr.pfence);
> dma_fence_put(q->lr.pfence);
> q->lr.pfence = NULL;
> }
> @@ -634,7 +634,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
> dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
> DMA_RESV_USAGE_BOOKKEEP);
> dma_resv_for_each_fence_unlocked(&cursor, fence)
> - dma_fence_enable_sw_signaling(fence);
> + dma_fence_enable_signaling(fence);
> dma_resv_iter_end(&cursor);
>
> err = dma_resv_wait_timeout(xe_vm_resv(vm),
> diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
> index 139ad1afd935..0581aa23cd41 100644
> --- a/drivers/gpu/host1x/fence.c
> +++ b/drivers/gpu/host1x/fence.c
> @@ -30,12 +30,17 @@ static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f)
> return container_of(f, struct host1x_syncpt_fence, base);
> }
>
> -static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
> +static void host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
> {
> struct host1x_syncpt_fence *sf = to_host1x_fence(f);
> + unsigned long flags;
>
> - if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
> - return false;
> + spin_lock_irqsave(f->lock, flags);
> + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) {
> + spin_unlock_irqrestore(f->lock, flags);
> + dma_fence_signal(f);
> + return;
> + }
>
> /* Reference for interrupt path. */
> dma_fence_get(f);
> @@ -62,8 +67,7 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
> * so we need to initialize all state used by signalling
> * before it.
> */
> -
> - return true;
> + spin_unlock_irqrestore(f->lock, flags);
> }
>
> static const struct dma_fence_ops host1x_syncpt_fence_ops = {
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index e7ad819962e3..93b6176bbf48 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -158,39 +158,24 @@ struct dma_fence_ops {
> /**
> * @enable_signaling:
> *
> - * Enable software signaling of fence.
> + * Guarantee that installed callbacks are called at some point.
This is one of the big/key changes, right? I would suggest to improve
the commit message to talk about its what why and how directly and
explicity.
I am not sure how it guarantees it when it is left to driver
implementations? That is, dma_fence_signal_locked is removed from
dma_fence_enable_signaling.
Or you mean because you added dma_fence_enable_signaling to
dma_fence_add_callback?
But then there is a lock re-acquire there and if signal raced
"correctly" with dma_fence_add_callback, callback may not get called.
> *
> - * For fence implementations that have the capability for hw->hw
> - * signaling, they can implement this op to enable the necessary
> - * interrupts, or insert commands into cmdstream, etc, to avoid these
> - * costly operations for the common case where only hw->hw
> - * synchronization is required. This is called in the first
> - * dma_fence_wait() or dma_fence_add_callback() path to let the fence
> - * implementation know that there is another driver waiting on the
> - * signal (ie. hw->sw case).
> + * Drivers can implement this op to enable the necessary interrupts or
> + * tell the hardware through other means that somebody is requesting
> + * this fence to signal. This is called in the first dma_fence_wait()
> + * or dma_fence_add_callback() path.
> *
> * This function can be called from atomic context, but not
> * from irq context, so normal spinlocks can be used.
I thought commit message stated this paragraph was wrong. Or it becomes
right after this patch?
Regards,
Tvrtko
> *
> - * A return value of false indicates the fence already passed,
> - * or some failure occurred that made it impossible to enable
> - * signaling. True indicates successful enabling.
> - *
> - * &dma_fence.error may be set in enable_signaling, but only when false
> - * is returned.
> - *
> - * Since many implementations can call dma_fence_signal() even when before
> - * @enable_signaling has been called there's a race window, where the
> - * dma_fence_signal() might result in the final fence reference being
> - * released and its memory freed. To avoid this, implementations of this
> - * callback should grab their own reference using dma_fence_get(), to be
> - * released when the fence is signalled (through e.g. the interrupt
> - * handler).
> + * dma_fence_set_error() as well as dma_fence_signal() may be used in
> + * enable_signaling if the implementation detects an error or that the
> + * fence already completed succesfully.
> *
> * This callback is optional. If this callback is not present, then the
> * driver must always have signaling enabled.
> */
> - bool (*enable_signaling)(struct dma_fence *fence);
> + void (*enable_signaling)(struct dma_fence *fence);
>
> /**
> * @signaled:
> @@ -400,7 +385,7 @@ int dma_fence_add_callback(struct dma_fence *fence,
> dma_fence_func_t func);
> bool dma_fence_remove_callback(struct dma_fence *fence,
> struct dma_fence_cb *cb);
> -void dma_fence_enable_sw_signaling(struct dma_fence *fence);
> +void dma_fence_enable_signaling(struct dma_fence *fence);
>
> /**
> * dma_fence_is_signaled_locked - Return an indication if the fence
More information about the dri-devel
mailing list