[PATCH 4/4] dma-buf/fence-chain: Speed up processing of rearmed callbacks

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Mon Aug 18 14:30:54 UTC 2025


Hi Christian,

On Thursday, 14 August 2025 14:24:29 CEST Christian König wrote:
> 
> On 14.08.25 10:16, Janusz Krzysztofik wrote:
> > When first user starts waiting on a not yet signaled fence of a chain
> > link, a dma_fence_chain callback is added to a user fence of that link.
> > When the user fence of that chain link is then signaled, the chain is
> > traversed in search for a first not signaled link and the callback is
> > rearmed on a user fence of that link.
> > 
> > Since chain fences may be exposed to user space, e.g. over drm_syncobj
> > IOCTLs, users may start waiting on any link of the chain, then many links
> > of a chain may have signaling enabled and their callbacks added to their
> > user fences.  Once an arbitrary user fence is signaled, all
> > dma_fence_chain callbacks added to it so far must be rearmed to another
> > user fence of the chain.  In extreme scenarios, when all N links of a
> > chain are awaited and then signaled in reverse order, the dma_fence_chain
> > callback may be called up to N * (N + 1) / 2 times (an arithmetic series).
> > 
> > To avoid that potential excessive accumulation of dma_fence_chain
> > callbacks, rearm a trimmed-down, signal only callback version to the base
> > fence of a previous link, if not yet signaled, otherwise just signal the
> > base fence of the current link instead of traversing the chain in search
> > for a first not signaled link and moving all callbacks collected so far to
> > a user fence of that link.
> 
> Well clear NAK to that! You can easily overflow the kernel stack with that!

I'll be happy to propose a better solution, but for that I need to understand 
better your message.  Could you please point out an exact piece of the 
proposed code and/or describe a scenario where you can see the risk of stack 
overflow?

> 
> Additional to this messing with the fence ops outside of the dma_fence code is an absolute no-go.

Could you please explain what piece of code you are referring to when you say 
"messing with the fence ops outside the dma_fence code"?  If not this patch 
then which particular one of this series did you mean?  I'm assuming you 
didn't mean drm_syncobj code that I mentioned in my commit descriptions.

Thanks,
Janusz

> 
> Regards,
> Christian.
> 
> > 
> > Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12904
> > Suggested-by: Chris Wilson <chris.p.wilson at linux.intel.com>
> > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
> > ---
> >  drivers/dma-buf/dma-fence-chain.c | 101 +++++++++++++++++++++++++-----
> >  1 file changed, 84 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> > index a8a90acf4f34d..90eff264ee05c 100644
> > --- a/drivers/dma-buf/dma-fence-chain.c
> > +++ b/drivers/dma-buf/dma-fence-chain.c
> > @@ -119,46 +119,113 @@ static const char *dma_fence_chain_get_timeline_name(struct dma_fence *fence)
> >          return "unbound";
> >  }
> >  
> > -static void dma_fence_chain_irq_work(struct irq_work *work)
> > +static void signal_irq_work(struct irq_work *work)
> >  {
> >  	struct dma_fence_chain *chain;
> >  
> >  	chain = container_of(work, typeof(*chain), work);
> >  
> > -	/* Try to rearm the callback */
> > -	if (!dma_fence_chain_enable_signaling(&chain->base))
> > -		/* Ok, we are done. No more unsignaled fences left */
> > -		dma_fence_signal(&chain->base);
> > +	dma_fence_signal(&chain->base);
> >  	dma_fence_put(&chain->base);
> >  }
> >  
> > -static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> > +static void signal_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> > +{
> > +	struct dma_fence_chain *chain;
> > +
> > +	chain = container_of(cb, typeof(*chain), cb);
> > +	init_irq_work(&chain->work, signal_irq_work);
> > +	irq_work_queue(&chain->work);
> > +}
> > +
> > +static void rearm_irq_work(struct irq_work *work)
> > +{
> > +	struct dma_fence_chain *chain;
> > +	struct dma_fence *prev;
> > +
> > +	chain = container_of(work, typeof(*chain), work);
> > +
> > +	rcu_read_lock();
> > +	prev = rcu_dereference(chain->prev);
> > +	if (prev && dma_fence_add_callback(prev, &chain->cb, signal_cb))
> > +		prev = NULL;
> > +	rcu_read_unlock();
> > +	if (prev)
> > +		return;
> > +
> > +	/* Ok, we are done. No more unsignaled fences left */
> > +	signal_irq_work(work);
> > +}
> > +
> > +static inline bool fence_is_signaled__nested(struct dma_fence *fence)
> > +{
> > +	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> > +		return true;
> > +
> > +	if (fence->ops->signaled && fence->ops->signaled(fence)) {
> > +		unsigned long flags;
> > +
> > +		spin_lock_irqsave_nested(fence->lock, flags, SINGLE_DEPTH_NESTING);
> > +		dma_fence_signal_locked(fence);
> > +		spin_unlock_irqrestore(fence->lock, flags);
> > +
> > +		return true;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> > +static bool prev_is_signaled(struct dma_fence_chain *chain)
> > +{
> > +	struct dma_fence *prev;
> > +	bool result;
> > +
> > +	rcu_read_lock();
> > +	prev = rcu_dereference(chain->prev);
> > +	result = !prev || fence_is_signaled__nested(prev);
> > +	rcu_read_unlock();
> > +
> > +	return result;
> > +}
> > +
> > +static void rearm_or_signal_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> >  {
> >  	struct dma_fence_chain *chain;
> >  
> >  	chain = container_of(cb, typeof(*chain), cb);
> > -	init_irq_work(&chain->work, dma_fence_chain_irq_work);
> > +	if (prev_is_signaled(chain)) {
> > +		/* Ok, we are done. No more unsignaled fences left */
> > +		init_irq_work(&chain->work, signal_irq_work);
> > +	} else {
> > +		/* Try to rearm the callback */
> > +		init_irq_work(&chain->work, rearm_irq_work);
> > +	}
> > +
> >  	irq_work_queue(&chain->work);
> > -	dma_fence_put(f);
> >  }
> >  
> >  static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
> >  {
> >  	struct dma_fence_chain *head = to_dma_fence_chain(fence);
> > +	int err = -ENOENT;
> >  
> > -	dma_fence_get(&head->base);
> > -	dma_fence_chain_for_each(fence, &head->base) {
> > -		struct dma_fence *f = dma_fence_chain_contained(fence);
> > +	if (WARN_ON(!head))
> > +		return false;
> >  
> > -		dma_fence_get(f);
> > -		if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
> > +	dma_fence_get(fence);
> > +	if (head->fence)
> > +		err = dma_fence_add_callback(head->fence, &head->cb, rearm_or_signal_cb);
> > +	if (err) {
> > +		if (prev_is_signaled(head)) {
> >  			dma_fence_put(fence);
> > -			return true;
> > +		} else {
> > +			init_irq_work(&head->work, rearm_irq_work);
> > +			irq_work_queue(&head->work);
> > +			err = 0;
> >  		}
> > -		dma_fence_put(f);
> >  	}
> > -	dma_fence_put(&head->base);
> > -	return false;
> > +
> > +	return !err;
> >  }
> >  
> >  static bool dma_fence_chain_signaled(struct dma_fence *fence)
> 
> 






More information about the Intel-xe mailing list