[Intel-gfx] [RFC PATCH 1/2] dma-fence: Avoid establishing a locking order between fence classes
Thomas Hellström
thomas.hellstrom at linux.intel.com
Tue Nov 30 12:31:08 UTC 2021
On 11/30/21 13:25, Maarten Lankhorst wrote:
> On 30-11-2021 13:19, Thomas Hellström wrote:
>> The locking order for taking two fence locks is implicitly defined in
>> at least two ways in the code:
>>
>> 1) Fence containers first and other fences next, which is defined by
>> the enable_signaling() callbacks of dma_fence_chain and
>> dma_fence_array.
>> 2) Reverse signal order, which is used by __i915_active_fence_set().
>>
>> Now 1) implies 2), except for the signal_on_any mode of dma_fence_array
>> and 2) does not imply 1), and also 1) makes locking order between
>> different containers confusing.
>>
>> Establish 2) and fix up the signal_on_any mode by calling
>> enable_signaling() on such fences unlocked at creation.
>>
>> Cc: linaro-mm-sig at lists.linaro.org
>> Cc: dri-devel at lists.freedesktop.org
>> Cc: Christian König <christian.koenig at amd.com>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> ---
>> drivers/dma-buf/dma-fence-array.c | 13 +++--
>> drivers/dma-buf/dma-fence-chain.c | 3 +-
>> drivers/dma-buf/dma-fence.c | 79 +++++++++++++++++++++----------
>> include/linux/dma-fence.h | 3 ++
>> 4 files changed, 69 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
>> index 3e07f961e2f3..0322b92909fe 100644
>> --- a/drivers/dma-buf/dma-fence-array.c
>> +++ b/drivers/dma-buf/dma-fence-array.c
>> @@ -84,8 +84,8 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
>> * insufficient).
>> */
>> dma_fence_get(&array->base);
>> - if (dma_fence_add_callback(array->fences[i], &cb[i].cb,
>> - dma_fence_array_cb_func)) {
>> + if (dma_fence_add_callback_nested(array->fences[i], &cb[i].cb,
>> + dma_fence_array_cb_func)) {
>> int error = array->fences[i]->error;
>>
>> dma_fence_array_set_pending_error(array, error);
>> @@ -158,6 +158,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
>> {
>> struct dma_fence_array *array;
>> size_t size = sizeof(*array);
>> + struct dma_fence *fence;
>>
>> /* Allocate the callback structures behind the array. */
>> size += num_fences * sizeof(struct dma_fence_array_cb);
>> @@ -165,8 +166,9 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
>> if (!array)
>> return NULL;
>>
>> + fence = &array->base;
>> spin_lock_init(&array->lock);
>> - dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock,
>> + dma_fence_init(fence, &dma_fence_array_ops, &array->lock,
>> context, seqno);
>> init_irq_work(&array->work, irq_dma_fence_array_work);
>>
>> @@ -174,7 +176,10 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
>> atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
>> array->fences = fences;
>>
>> - array->base.error = PENDING_ERROR;
>> + fence->error = PENDING_ERROR;
>> +
>> + if (signal_on_any)
>> + dma_fence_enable_sw_signaling(fence);
>>
>> return array;
>> }
>> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
>> index 1b4cb3e5cec9..0518e53880f6 100644
>> --- a/drivers/dma-buf/dma-fence-chain.c
>> +++ b/drivers/dma-buf/dma-fence-chain.c
>> @@ -152,7 +152,8 @@ static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>> struct dma_fence *f = chain ? chain->fence : fence;
>>
>> dma_fence_get(f);
>> - if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
>> + if (!dma_fence_add_callback_nested(f, &head->cb,
>> + dma_fence_chain_cb)) {
>> dma_fence_put(fence);
>> return true;
>> }
>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>> index 066400ed8841..90a3d5121746 100644
>> --- a/drivers/dma-buf/dma-fence.c
>> +++ b/drivers/dma-buf/dma-fence.c
>> @@ -610,6 +610,37 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
>> }
>> EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
>>
>> +static int __dma_fence_add_callback(struct dma_fence *fence,
>> + struct dma_fence_cb *cb,
>> + dma_fence_func_t func,
>> + int nest_level)
>> +{
>> + unsigned long flags;
>> + int ret = 0;
>> +
>> + if (WARN_ON(!fence || !func))
>> + return -EINVAL;
>> +
>> + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
>> + INIT_LIST_HEAD(&cb->node);
>> + return -ENOENT;
>> + }
>> +
>> + spin_lock_irqsave_nested(fence->lock, flags, 0);
> Forgot to hook up nest_level here?
Ah Yes :)
>> +
>> + if (__dma_fence_enable_signaling(fence)) {
>> + cb->func = func;
>> + list_add_tail(&cb->node, &fence->cb_list);
>> + } else {
>> + INIT_LIST_HEAD(&cb->node);
>> + ret = -ENOENT;
>> + }
>> +
>> + spin_unlock_irqrestore(fence->lock, flags);
>> +
>> + return ret;
>> +}
>> +
>> /**
>> * dma_fence_add_callback - add a callback to be called when the fence
>> * is signaled
>> @@ -635,33 +666,33 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
>> int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
>> dma_fence_func_t func)
>> {
>> - unsigned long flags;
>> - int ret = 0;
>> -
>> - if (WARN_ON(!fence || !func))
>> - return -EINVAL;
>> -
>> - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
>> - INIT_LIST_HEAD(&cb->node);
>> - return -ENOENT;
>> - }
>> -
>> - spin_lock_irqsave(fence->lock, flags);
>> -
>> - if (__dma_fence_enable_signaling(fence)) {
>> - cb->func = func;
>> - list_add_tail(&cb->node, &fence->cb_list);
>> - } else {
>> - INIT_LIST_HEAD(&cb->node);
>> - ret = -ENOENT;
>> - }
>> -
>> - spin_unlock_irqrestore(fence->lock, flags);
>> -
>> - return ret;
>> + return __dma_fence_add_callback(fence, cb, func, 0);
>> }
>> EXPORT_SYMBOL(dma_fence_add_callback);
>>
> Other than that, I didn't investigate the nesting fails enough to say I can accurately review this. :)
Basically the problem is that within enable_signaling() which is called
with the dma_fence lock held, we take the dma_fence lock of another
fence. If that other fence is a dma_fence_array, or a dma_fence_chain
which in turn tries to lock a dma_fence_array we hit a splat.
But I'll update the commit message with a typical splat.
/Thomas
>
> ~Maarten
>
>
More information about the Intel-gfx
mailing list