[PATCH 02/11] dma-buf: add new dma_fence_chain container v2

Chunming Zhou zhoucm1 at amd.com
Mon Dec 3 13:44:16 UTC 2018



在 2018/12/3 21:28, Christian König 写道:
> Am 03.12.18 um 14:18 schrieb Chunming Zhou:
>>
>> 在 2018/12/3 19:00, Christian König 写道:
>>> Am 03.12.18 um 06:25 schrieb zhoucm1:
>>>>
>>>> On 2018年11月28日 22:50, Christian König wrote:
>>>>> Lockless container implementation similar to a dma_fence_array, but
>>>>> with
>>>>> only two elements per node and automatic garbage collection.
>>>>>
>>>>> v2: properly document dma_fence_chain_for_each, add
>>>>> dma_fence_chain_find_seqno,
>>>>>       drop prev reference during garbage collection if it's not a
>>>>> chain fence.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>>> ---
>>>>>    drivers/dma-buf/Makefile          |   3 +-
>>>>>    drivers/dma-buf/dma-fence-chain.c | 235
>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>    include/linux/dma-fence-chain.h   |  79 +++++++++++++
>>>>>    3 files changed, 316 insertions(+), 1 deletion(-)
>>>>>    create mode 100644 drivers/dma-buf/dma-fence-chain.c
>>>>>    create mode 100644 include/linux/dma-fence-chain.h
>>>>>
>>>>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
>>>>> index 0913a6ccab5a..1f006e083eb9 100644
>>>>> --- a/drivers/dma-buf/Makefile
>>>>> +++ b/drivers/dma-buf/Makefile
>>>>> @@ -1,4 +1,5 @@
>>>>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o
>>>>> seqno-fence.o
>>>>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
>>>>> +     reservation.o seqno-fence.o
>>>>>    obj-$(CONFIG_SYNC_FILE)        += sync_file.o
>>>>>    obj-$(CONFIG_SW_SYNC)        += sw_sync.o sync_debug.o
>>>>>    obj-$(CONFIG_UDMABUF)        += udmabuf.o
>>>>> diff --git a/drivers/dma-buf/dma-fence-chain.c
>>>>> b/drivers/dma-buf/dma-fence-chain.c
>>>>> new file mode 100644
>>>>> index 000000000000..de05101fc48d
>>>>> --- /dev/null
>>>>> +++ b/drivers/dma-buf/dma-fence-chain.c
>>>>> @@ -0,0 +1,235 @@
>>>>> +/*
>>>>> + * fence-chain: chain fences together in a timeline
>>>>> + *
>>>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>>>>> + * Authors:
>>>>> + *    Christian König <christian.koenig at amd.com>
>>>>> + *
>>>>> + * This program is free software; you can redistribute it and/or
>>>>> modify it
>>>>> + * under the terms of the GNU General Public License version 2 as
>>>>> published by
>>>>> + * the Free Software Foundation.
>>>>> + *
>>>>> + * This program is distributed in the hope that it will be useful,
>>>>> but WITHOUT
>>>>> + * ANY WARRANTY; without even the implied warranty of
>>>>> MERCHANTABILITY or
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>>> License for
>>>>> + * more details.
>>>>> + */
>>>>> +
>>>>> +#include <linux/dma-fence-chain.h>
>>>>> +
>>>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence 
>>>>> *fence);
>>>>> +
>>>>> +/**
>>>>> + * dma_fence_chain_get_prev - use RCU to get a reference to the
>>>>> previous fence
>>>>> + * @chain: chain node to get the previous node from
>>>>> + *
>>>>> + * Use dma_fence_get_rcu_safe to get a reference to the previous
>>>>> fence of the
>>>>> + * chain node.
>>>>> + */
>>>>> +static struct dma_fence *dma_fence_chain_get_prev(struct
>>>>> dma_fence_chain *chain)
>>>>> +{
>>>>> +    struct dma_fence *prev;
>>>>> +
>>>>> +    rcu_read_lock();
>>>>> +    prev = dma_fence_get_rcu_safe(&chain->prev);
>>>>> +    rcu_read_unlock();
>>>>> +    return prev;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * dma_fence_chain_walk - chain walking function
>>>>> + * @fence: current chain node
>>>>> + *
>>>>> + * Walk the chain to the next node. Returns the next fence or NULL
>>>>> if we are at
>>>>> + * the end of the chain. Garbage collects chain nodes which are
>>>>> already
>>>>> + * signaled.
>>>>> + */
>>>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>>>>> +{
>>>>> +    struct dma_fence_chain *chain, *prev_chain;
>>>>> +    struct dma_fence *prev, *replacement, *tmp;
>>>>> +
>>>>> +    chain = to_dma_fence_chain(fence);
>>>>> +    if (!chain) {
>>>>> +        dma_fence_put(fence);
>>>>> +        return NULL;
>>>>> +    }
>>>>> +
>>>>> +    while ((prev = dma_fence_chain_get_prev(chain))) {
>>>>> +
>>>>> +        prev_chain = to_dma_fence_chain(prev);
>>>>> +        if (prev_chain) {
>>>>> +            if (!dma_fence_is_signaled(prev_chain->fence))
>>>>> +                break;
>>>>> +
>>>>> +            replacement = dma_fence_chain_get_prev(prev_chain);
>>>>> +        } else {
>>>>> +            if (!dma_fence_is_signaled(prev))
>>>>> +                break;
>>>>> +
>>>>> +            replacement = NULL;
>>>>> +        }
>>>>> +
>>>>> +        tmp = cmpxchg(&chain->prev, prev, replacement);
>>>>> +        if (tmp == prev)
>>>>> +            dma_fence_put(tmp);
>>>>> +        else
>>>>> +            dma_fence_put(replacement);
>>>>> +        dma_fence_put(prev);
>>>>> +    }
>>>>> +
>>>>> +    dma_fence_put(fence);
>>>>> +    return prev;
>>>>> +}
>>>>> +EXPORT_SYMBOL(dma_fence_chain_walk);
>>>>> +
>>>>> +/**
>>>>> + * dma_fence_chain_find_seqno - find fence chain node by seqno
>>>>> + * @pfence: pointer to the chain node where to start
>>>>> + * @seqno: the sequence number to search for
>>>>> + *
>>>>> + * Advance the fence pointer to the chain node which will signal
>>>>> this sequence
>>>>> + * number. If no sequence number is provided then this is a no-op.
>>>>> + *
>>>>> + * Returns EINVAL if the fence is not a chain node or the sequence
>>>>> number has
>>>>> + * not yet advanced far enough.
>>>>> + */
>>>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>>>>> seqno)
>>>>> +{
>>>>> +    struct dma_fence_chain *chain;
>>>>> +
>>>>> +    if (!seqno)
>>>>> +        return 0;
>>>>> +
>>>>> +    chain = to_dma_fence_chain(*pfence);
>>>>> +    if (!chain || chain->base.seqno < seqno)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    dma_fence_chain_for_each(*pfence) {
>>>>> +        if ((*pfence)->context != chain->base.context ||
>>>>> +            to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>>>>> +            break;
>>>>> +    }
>>>>> +    dma_fence_put(&chain->base);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
>>>>> +
>>>>> +static const char *dma_fence_chain_get_driver_name(struct dma_fence
>>>>> *fence)
>>>>> +{
>>>>> +        return "dma_fence_chain";
>>>>> +}
>>>>> +
>>>>> +static const char *dma_fence_chain_get_timeline_name(struct
>>>>> dma_fence *fence)
>>>>> +{
>>>>> +        return "unbound";
>>>>> +}
>>>>> +
>>>>> +static void dma_fence_chain_irq_work(struct irq_work *work)
>>>>> +{
>>>>> +    struct dma_fence_chain *chain;
>>>>> +
>>>>> +    chain = container_of(work, typeof(*chain), work);
>>>>> +
>>>>> +    /* Try to rearm the callback */
>>>>> +    if (!dma_fence_chain_enable_signaling(&chain->base))
>>>>> +        /* Ok, we are done. No more unsignaled fences left */
>>>>> +        dma_fence_signal(&chain->base);
>>>>> +}
>>>>> +
>>>>> +static void dma_fence_chain_cb(struct dma_fence *f, struct
>>>>> dma_fence_cb *cb)
>>>>> +{
>>>>> +    struct dma_fence_chain *chain;
>>>>> +
>>>>> +    chain = container_of(cb, typeof(*chain), cb);
>>>>> +    irq_work_queue(&chain->work);
>>>>> +    dma_fence_put(f);
>>>>> +}
>>>>> +
>>>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence 
>>>>> *fence)
>>>>> +{
>>>>> +    struct dma_fence_chain *head = to_dma_fence_chain(fence);
>>>>> +
>>>>> +    dma_fence_chain_for_each(fence) {
>>>>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>>>> +        struct dma_fence *f = chain ? chain->fence : fence;
>>>>> +
>>>>> +        if (!dma_fence_add_callback(f, &head->cb, 
>>>>> dma_fence_chain_cb))
>>>>> +            return true;
>>>>> +    }
>>>>> +    return false;
>>>>> +}
>>>>> +
>>>>> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
>>>>> +{
>>>>> +    dma_fence_chain_for_each(fence) {
>>>>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>>>> +        struct dma_fence *f = chain ? chain->fence : fence;
>>>>> +
>>>>> +        if (!dma_fence_is_signaled(f)) {
>>>>> +            dma_fence_put(fence);
>>>>> +            return false;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    return true;
>>>>> +}
>>>>> +
>>>>> +static void dma_fence_chain_release(struct dma_fence *fence)
>>>>> +{
>>>>> +    struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>>>> +
>>>>> +    dma_fence_put(chain->prev);
>>>>> +    dma_fence_put(chain->fence);
>>>>> +    dma_fence_free(fence);
>>>>> +}
>>>>> +
>>>>> +const struct dma_fence_ops dma_fence_chain_ops = {
>>>>> +    .get_driver_name = dma_fence_chain_get_driver_name,
>>>>> +    .get_timeline_name = dma_fence_chain_get_timeline_name,
>>>>> +    .enable_signaling = dma_fence_chain_enable_signaling,
>>>>> +    .signaled = dma_fence_chain_signaled,
>>>>> +    .release = dma_fence_chain_release,
>>>>> +};
>>>>> +EXPORT_SYMBOL(dma_fence_chain_ops);
>>>>> +
>>>>> +/**
>>>>> + * dma_fence_chain_init - initialize a fence chain
>>>>> + * @chain: the chain node to initialize
>>>>> + * @prev: the previous fence
>>>>> + * @fence: the current fence
>>>>> + *
>>>>> + * Initialize a new chain node and either start a new chain or add
>>>>> the node to
>>>>> + * the existing chain of the previous fence.
>>>>> + */
>>>>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>>>>> +              struct dma_fence *prev,
>>>>> +              struct dma_fence *fence,
>>>>> +              uint64_t seqno)
>>>>> +{
>>>>> +    struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
>>>>> +    uint64_t context;
>>>>> +
>>>>> +    spin_lock_init(&chain->lock);
>>>>> +    chain->prev = prev;
>>>>> +    chain->fence = fence;
>>>>> +    chain->prev_seqno = 0;
>>>>> +    init_irq_work(&chain->work, dma_fence_chain_irq_work);
>>>>> +
>>>>> +    /* Try to reuse the context of the previous chain node. */
>>>>> +    if (prev_chain && seqno > prev->seqno &&
>>>>> +        __dma_fence_is_later(seqno, prev->seqno)) {
>>>> As your patch#1 makes __dma_fence_is_later only be valid for 32bit,
>>>> we cannot use it for 64bit here, we should remove it from here, just
>>>> compare seqno directly.
>>> That is intentional. We must make sure that the number both increments
>>> as 64bit number as well as not wraps around as 32bit number.
>>>
>>> In other words the largest difference between two sequence numbers
>>> userspace is allowed to submit is 1<<31.
>> Why? no one can make sure that, application users would only think it is
>> an uint64 sequence nubmer, and they can signal any advanced point. I
>> already see umd guys writing timeline test use max_uint64-1 as a final
>> signal.
>> We shouldn't add this limitation here.
>
> We need to be backward compatible to hardware which can only do 32bit 
> signaling with the dma-fence implementation.
I see that, you already explained that before.
but can't we just grep low 32bit of seqno only when 32bit hardware try 
to use?

then we can make dma_fence_later use 64bit comparation.

>
> Otherwise dma_fence_later() could return an inconsistent result and 
> break at other places.
>
> So if userspace wants to use more than 1<<31 difference between 
> sequence numbers we need to push back on this.

It's rare case, but I don't think we can convince them add this 
limitation. So we cannot add this limitation here.

-David

>
> Christian.
>
>>
>> -David
>>
>>>>> +        context = prev->context;
>>>>> +        chain->prev_seqno = prev->seqno;
>>>>> +    } else {
>>>>> +        context = dma_fence_context_alloc(1);
>>>>> +        /* Make sure that we always have a valid sequence number. */
>>>>> +        if (prev_chain)
>>>>> +            seqno = max(prev->seqno, seqno);
>>>> I still cannot judge if this case is proper, but I prefer we just
>>>> drop it when seqno is <= last seqno.
>>>> we can image that when we enable export/import, we could mess them.
>>>> So we shouldn't change timeline existing signal point any time.
>>> Yeah, but we can't lose fences either. This is the most defensive
>>> approach I can think of.
>>>
>>> E.g. we still ad the fence, but start a new timeline so all queries
>>> for all previous sequence number will always wait for everything.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> -David
>>>>> +    }
>>>>> +
>>>>> +    dma_fence_init(&chain->base, &dma_fence_chain_ops,
>>>>> +               &chain->lock, context, seqno);
>>>>> +}
>>>>> +EXPORT_SYMBOL(dma_fence_chain_init);
>>>>> diff --git a/include/linux/dma-fence-chain.h
>>>>> b/include/linux/dma-fence-chain.h
>>>>> new file mode 100644
>>>>> index 000000000000..3bb0efd6bc65
>>>>> --- /dev/null
>>>>> +++ b/include/linux/dma-fence-chain.h
>>>>> @@ -0,0 +1,79 @@
>>>>> +/*
>>>>> + * fence-chain: chain fences together in a timeline
>>>>> + *
>>>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>>>>> + * Authors:
>>>>> + *    Christian König <christian.koenig at amd.com>
>>>>> + *
>>>>> + * This program is free software; you can redistribute it and/or
>>>>> modify it
>>>>> + * under the terms of the GNU General Public License version 2 as
>>>>> published by
>>>>> + * the Free Software Foundation.
>>>>> + *
>>>>> + * This program is distributed in the hope that it will be useful,
>>>>> but WITHOUT
>>>>> + * ANY WARRANTY; without even the implied warranty of
>>>>> MERCHANTABILITY or
>>>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>>> License for
>>>>> + * more details.
>>>>> + */
>>>>> +
>>>>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
>>>>> +#define __LINUX_DMA_FENCE_CHAIN_H
>>>>> +
>>>>> +#include <linux/dma-fence.h>
>>>>> +#include <linux/irq_work.h>
>>>>> +
>>>>> +/**
>>>>> + * struct dma_fence_chain - fence to represent an node of a fence
>>>>> chain
>>>>> + * @base: fence base class
>>>>> + * @lock: spinlock for fence handling
>>>>> + * @prev: previous fence of the chain
>>>>> + * @prev_seqno: original previous seqno before garbage collection
>>>>> + * @fence: encapsulated fence
>>>>> + * @cb: callback structure for signaling
>>>>> + * @work: irq work item for signaling
>>>>> + */
>>>>> +struct dma_fence_chain {
>>>>> +    struct dma_fence base;
>>>>> +    spinlock_t lock;
>>>>> +    struct dma_fence *prev;
>>>>> +    u64 prev_seqno;
>>>>> +    struct dma_fence *fence;
>>>>> +    struct dma_fence_cb cb;
>>>>> +    struct irq_work work;
>>>>> +};
>>>>> +
>>>>> +extern const struct dma_fence_ops dma_fence_chain_ops;
>>>>> +
>>>>> +/**
>>>>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
>>>>> + * @fence: fence to cast to a dma_fence_array
>>>>> + *
>>>>> + * Returns NULL if the fence is not a dma_fence_chain,
>>>>> + * or the dma_fence_chain otherwise.
>>>>> + */
>>>>> +static inline struct dma_fence_chain *
>>>>> +to_dma_fence_chain(struct dma_fence *fence)
>>>>> +{
>>>>> +    if (!fence || fence->ops != &dma_fence_chain_ops)
>>>>> +        return NULL;
>>>>> +
>>>>> +    return container_of(fence, struct dma_fence_chain, base);
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * dma_fence_chain_for_each - iterate over all fences in chain
>>>>> + * @fence: starting point as well as current fence
>>>>> + *
>>>>> + * Iterate over all fences in the chain. We keep a reference to the
>>>>> current
>>>>> + * fence while inside the loop which must be dropped when breaking
>>>>> out.
>>>>> + */
>>>>> +#define dma_fence_chain_for_each(fence)    \
>>>>> +    for 
>>>>> (dma_fence_get(fence);fence;fence=dma_fence_chain_walk(fence))
>>>>> +
>>>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
>>>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>>>>> seqno);
>>>>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>>>>> +              struct dma_fence *prev,
>>>>> +              struct dma_fence *fence,
>>>>> +              uint64_t seqno);
>>>>> +
>>>>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>



More information about the amd-gfx mailing list