[RFC PATCH 1/2] dma-fence: Avoid establishing a locking order between fence classes

Thomas Hellström thomas.hellstrom at linux.intel.com
Tue Nov 30 12:31:08 UTC 2021


On 11/30/21 13:25, Maarten Lankhorst wrote:
> On 30-11-2021 13:19, Thomas Hellström wrote:
>> The locking order for taking two fence locks is implicitly defined in
>> at least two ways in the code:
>>
>> 1) Fence containers first and other fences next, which is defined by
>> the enable_signaling() callbacks of dma_fence_chain and
>> dma_fence_array.
>> 2) Reverse signal order, which is used by __i915_active_fence_set().
>>
>> Now 1) implies 2), except for the signal_on_any mode of dma_fence_array
>> and 2) does not imply 1), and also 1) makes locking order between
>> different containers confusing.
>>
>> Establish 2) and fix up the signal_on_any mode by calling
>> enable_signaling() on such fences unlocked at creation.
>>
>> Cc: linaro-mm-sig at lists.linaro.org
>> Cc: dri-devel at lists.freedesktop.org
>> Cc: Christian König <christian.koenig at amd.com>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> ---
>>   drivers/dma-buf/dma-fence-array.c | 13 +++--
>>   drivers/dma-buf/dma-fence-chain.c |  3 +-
>>   drivers/dma-buf/dma-fence.c       | 79 +++++++++++++++++++++----------
>>   include/linux/dma-fence.h         |  3 ++
>>   4 files changed, 69 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
>> index 3e07f961e2f3..0322b92909fe 100644
>> --- a/drivers/dma-buf/dma-fence-array.c
>> +++ b/drivers/dma-buf/dma-fence-array.c
>> @@ -84,8 +84,8 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
>>   		 * insufficient).
>>   		 */
>>   		dma_fence_get(&array->base);
>> -		if (dma_fence_add_callback(array->fences[i], &cb[i].cb,
>> -					   dma_fence_array_cb_func)) {
>> +		if (dma_fence_add_callback_nested(array->fences[i], &cb[i].cb,
>> +						  dma_fence_array_cb_func)) {
>>   			int error = array->fences[i]->error;
>>   
>>   			dma_fence_array_set_pending_error(array, error);
>> @@ -158,6 +158,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
>>   {
>>   	struct dma_fence_array *array;
>>   	size_t size = sizeof(*array);
>> +	struct dma_fence *fence;
>>   
>>   	/* Allocate the callback structures behind the array. */
>>   	size += num_fences * sizeof(struct dma_fence_array_cb);
>> @@ -165,8 +166,9 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
>>   	if (!array)
>>   		return NULL;
>>   
>> +	fence = &array->base;
>>   	spin_lock_init(&array->lock);
>> -	dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock,
>> +	dma_fence_init(fence, &dma_fence_array_ops, &array->lock,
>>   		       context, seqno);
>>   	init_irq_work(&array->work, irq_dma_fence_array_work);
>>   
>> @@ -174,7 +176,10 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
>>   	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
>>   	array->fences = fences;
>>   
>> -	array->base.error = PENDING_ERROR;
>> +	fence->error = PENDING_ERROR;
>> +
>> +	if (signal_on_any)
>> +		dma_fence_enable_sw_signaling(fence);
>>   
>>   	return array;
>>   }
>> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
>> index 1b4cb3e5cec9..0518e53880f6 100644
>> --- a/drivers/dma-buf/dma-fence-chain.c
>> +++ b/drivers/dma-buf/dma-fence-chain.c
>> @@ -152,7 +152,8 @@ static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>>   		struct dma_fence *f = chain ? chain->fence : fence;
>>   
>>   		dma_fence_get(f);
>> -		if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
>> +		if (!dma_fence_add_callback_nested(f, &head->cb,
>> +						   dma_fence_chain_cb)) {
>>   			dma_fence_put(fence);
>>   			return true;
>>   		}
>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>> index 066400ed8841..90a3d5121746 100644
>> --- a/drivers/dma-buf/dma-fence.c
>> +++ b/drivers/dma-buf/dma-fence.c
>> @@ -610,6 +610,37 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
>>   }
>>   EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
>>   
>> +static int __dma_fence_add_callback(struct dma_fence *fence,
>> +				    struct dma_fence_cb *cb,
>> +				    dma_fence_func_t func,
>> +				    int nest_level)
>> +{
>> +	unsigned long flags;
>> +	int ret = 0;
>> +
>> +	if (WARN_ON(!fence || !func))
>> +		return -EINVAL;
>> +
>> +	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
>> +		INIT_LIST_HEAD(&cb->node);
>> +		return -ENOENT;
>> +	}
>> +
>> +	spin_lock_irqsave_nested(fence->lock, flags, 0);
> Forgot to hook up nest_level here?

Ah Yes :)


>> +
>> +	if (__dma_fence_enable_signaling(fence)) {
>> +		cb->func = func;
>> +		list_add_tail(&cb->node, &fence->cb_list);
>> +	} else {
>> +		INIT_LIST_HEAD(&cb->node);
>> +		ret = -ENOENT;
>> +	}
>> +
>> +	spin_unlock_irqrestore(fence->lock, flags);
>> +
>> +	return ret;
>> +}
>> +
>>   /**
>>    * dma_fence_add_callback - add a callback to be called when the fence
>>    * is signaled
>> @@ -635,33 +666,33 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
>>   int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
>>   			   dma_fence_func_t func)
>>   {
>> -	unsigned long flags;
>> -	int ret = 0;
>> -
>> -	if (WARN_ON(!fence || !func))
>> -		return -EINVAL;
>> -
>> -	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
>> -		INIT_LIST_HEAD(&cb->node);
>> -		return -ENOENT;
>> -	}
>> -
>> -	spin_lock_irqsave(fence->lock, flags);
>> -
>> -	if (__dma_fence_enable_signaling(fence)) {
>> -		cb->func = func;
>> -		list_add_tail(&cb->node, &fence->cb_list);
>> -	} else {
>> -		INIT_LIST_HEAD(&cb->node);
>> -		ret = -ENOENT;
>> -	}
>> -
>> -	spin_unlock_irqrestore(fence->lock, flags);
>> -
>> -	return ret;
>> +	return __dma_fence_add_callback(fence, cb, func, 0);
>>   }
>>   EXPORT_SYMBOL(dma_fence_add_callback);
>>   
> Other than that, I didn't investigate the nesting fails enough to say I can accurately review this. :)

Basically the problem is that within enable_signaling() which is called 
with the dma_fence lock held, we take the dma_fence lock of another 
fence. If that other fence is a dma_fence_array, or a dma_fence_chain 
which in turn tries to lock a dma_fence_array we hit a splat.

But I'll update the commit message with a typical splat.

/Thomas


>
> ~Maarten
>
>


More information about the dri-devel mailing list