[RFC PATCH 2/2 with seqcount v3] reservation: add suppport for read-only access using rcu
Thomas Hellstrom
thellstrom at vmware.com
Mon May 19 06:42:53 PDT 2014
Hi, Maarten!
Some nitpicks, and that krealloc within rcu lock still worries me.
Otherwise looks good.
/Thomas
On 04/23/2014 12:15 PM, Maarten Lankhorst wrote:
> This adds 4 more functions to deal with rcu.
>
> reservation_object_get_fences_rcu() will obtain the list of shared
> and exclusive fences without obtaining the ww_mutex.
>
> reservation_object_wait_timeout_rcu() will wait on all fences of the
> reservation_object, without obtaining the ww_mutex.
>
> reservation_object_test_signaled_rcu() will test if all fences of the
> reservation_object are signaled without using the ww_mutex.
>
> reservation_object_get_excl() is added because touching the fence_excl
> member directly will trigger a sparse warning.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
> ---
> Using seqcount and fixing some lockdep bugs.
> Changes since v2:
> - Fix some crashes, remove some unneeded barriers when provided by
> seqcount writes
> - Fix code to work correctly with sparse's RCU annotations.
> - Create a global string for the seqcount lock to make lockdep happy.
>
> Can I get this version reviewed? If it looks correct I'll mail the
> full series
> because it's intertwined with the TTM conversion to use this code.
>
> See
> https://urldefense.proofpoint.com/v1/url?u=http://cgit.freedesktop.org/~mlankhorst/linux/log/?h%3Dvmwgfx_wip&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=l5Ago9ekmVFZ3c4M6eauqrJWGwjf6fTb%2BP3CxbBFkVM%3D%0A&m=SvhtRcVv7pxLHaMBAL4xy2Rr2XDJ%2B95q18FDS13r8FQ%3D%0A&s=07fbe960788dca202856a797e8c915c44f05746a04d899c76459653042ea0112
> ---
> diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
> index d89a98d2c37b..0df673f812eb 100644
> --- a/drivers/base/dma-buf.c
> +++ b/drivers/base/dma-buf.c
> @@ -137,7 +137,7 @@ static unsigned int dma_buf_poll(struct file
> *file, poll_table *poll)
> struct reservation_object_list *fobj;
> struct fence *fence_excl;
> unsigned long events;
> - unsigned shared_count;
> + unsigned shared_count, seq;
>
> dmabuf = file->private_data;
> if (!dmabuf || !dmabuf->resv)
> @@ -151,14 +151,20 @@ static unsigned int dma_buf_poll(struct file
> *file, poll_table *poll)
> if (!events)
> return 0;
>
> - ww_mutex_lock(&resv->lock, NULL);
> +retry:
> + seq = read_seqcount_begin(&resv->seq);
> + rcu_read_lock();
>
> - fobj = resv->fence;
> - if (!fobj)
> - goto out;
> -
> - shared_count = fobj->shared_count;
> - fence_excl = resv->fence_excl;
> + fobj = rcu_dereference(resv->fence);
> + if (fobj)
> + shared_count = fobj->shared_count;
> + else
> + shared_count = 0;
> + fence_excl = rcu_dereference(resv->fence_excl);
> + if (read_seqcount_retry(&resv->seq, seq)) {
> + rcu_read_unlock();
> + goto retry;
> + }
>
> if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) {
> struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
> @@ -176,14 +182,20 @@ static unsigned int dma_buf_poll(struct file
> *file, poll_table *poll)
> spin_unlock_irq(&dmabuf->poll.lock);
>
> if (events & pevents) {
> - if (!fence_add_callback(fence_excl, &dcb->cb,
> + if (!fence_get_rcu(fence_excl)) {
> + /* force a recheck */
> + events &= ~pevents;
> + dma_buf_poll_cb(NULL, &dcb->cb);
> + } else if (!fence_add_callback(fence_excl, &dcb->cb,
> dma_buf_poll_cb)) {
> events &= ~pevents;
> + fence_put(fence_excl);
> } else {
> /*
> * No callback queued, wake up any additional
> * waiters.
> */
> + fence_put(fence_excl);
> dma_buf_poll_cb(NULL, &dcb->cb);
> }
> }
> @@ -205,13 +217,26 @@ static unsigned int dma_buf_poll(struct file
> *file, poll_table *poll)
> goto out;
>
> for (i = 0; i < shared_count; ++i) {
> - struct fence *fence = fobj->shared[i];
> + struct fence *fence = rcu_dereference(fobj->shared[i]);
>
> + if (!fence_get_rcu(fence)) {
> + /*
> + * fence refcount dropped to zero, this means
> + * that fobj has been freed
> + *
> + * call dma_buf_poll_cb and force a recheck!
> + */
> + events &= ~POLLOUT;
> + dma_buf_poll_cb(NULL, &dcb->cb);
> + break;
> + }
> if (!fence_add_callback(fence, &dcb->cb,
> dma_buf_poll_cb)) {
> + fence_put(fence);
> events &= ~POLLOUT;
> break;
> }
> + fence_put(fence);
> }
>
> /* No callback queued, wake up any additional waiters. */
> @@ -220,7 +245,7 @@ static unsigned int dma_buf_poll(struct file
> *file, poll_table *poll)
> }
>
> out:
> - ww_mutex_unlock(&resv->lock);
> + rcu_read_unlock();
> return events;
> }
>
> diff --git a/drivers/base/reservation.c b/drivers/base/reservation.c
> index b82a5b630a8e..901bbf19c868 100644
> --- a/drivers/base/reservation.c
> +++ b/drivers/base/reservation.c
> @@ -38,6 +38,11 @@
> DEFINE_WW_CLASS(reservation_ww_class);
> EXPORT_SYMBOL(reservation_ww_class);
>
> +struct lock_class_key reservation_seqcount_class;
> +EXPORT_SYMBOL(reservation_seqcount_class);
> +
> +const char reservation_seqcount_string[] = "reservation_seqcount";
> +EXPORT_SYMBOL(reservation_seqcount_string);
> /*
> * Reserve space to add a shared fence to a reservation_object,
> * must be called with obj->lock held.
> @@ -55,8 +60,8 @@ int reservation_object_reserve_shared(struct
> reservation_object *obj)
> kfree(obj->staged);
> obj->staged = NULL;
> return 0;
> - }
> - max = old->shared_max * 2;
> + } else
> + max = old->shared_max * 2;
Perhaps as a separate reformatting patch?
> } else
> max = 4;
>
> @@ -82,27 +87,37 @@ reservation_object_add_shared_inplace(struct
> reservation_object *obj,
> {
> u32 i;
>
> + fence_get(fence);
> +
> + preempt_disable();
> + write_seqcount_begin(&obj->seq);
> +
> for (i = 0; i < fobj->shared_count; ++i) {
> - if (fobj->shared[i]->context == fence->context) {
> - struct fence *old_fence = fobj->shared[i];
> + struct fence *old_fence;
>
> - fence_get(fence);
> + old_fence = rcu_dereference_protected(fobj->shared[i],
> + reservation_object_held(obj));
>
> - fobj->shared[i] = fence;
> + if (old_fence->context == fence->context) {
> + /* memory barrier is added by write_seqcount_begin */
> + RCU_INIT_POINTER(fobj->shared[i], fence);
> + write_seqcount_end(&obj->seq);
> + preempt_enable();
>
> fence_put(old_fence);
> return;
> }
> }
>
> - fence_get(fence);
> - fobj->shared[fobj->shared_count] = fence;
> /*
> - * make the new fence visible before incrementing
> - * fobj->shared_count
> + * memory barrier is added by write_seqcount_begin,
> + * fobj->shared_count is protected by this lock too
> */
> - smp_wmb();
> + RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
> fobj->shared_count++;
> +
> + write_seqcount_end(&obj->seq);
> + preempt_enable();
> }
>
> static void
> @@ -112,11 +127,12 @@ reservation_object_add_shared_replace(struct
> reservation_object *obj,
> struct fence *fence)
> {
> unsigned i;
> + struct fence *old_fence = NULL;
>
> fence_get(fence);
>
> if (!old) {
> - fobj->shared[0] = fence;
> + RCU_INIT_POINTER(fobj->shared[0], fence);
> fobj->shared_count = 1;
> goto done;
> }
> @@ -130,19 +146,38 @@ reservation_object_add_shared_replace(struct
> reservation_object *obj,
> fobj->shared_count = old->shared_count;
>
> for (i = 0; i < old->shared_count; ++i) {
> - if (fence && old->shared[i]->context == fence->context) {
> - fence_put(old->shared[i]);
> - fobj->shared[i] = fence;
> - fence = NULL;
> + struct fence *check;
> +
> + check = rcu_dereference_protected(old->shared[i],
> + reservation_object_held(obj));
> +
> + if (!old_fence && check->context == fence->context) {
> + old_fence = check;
> + RCU_INIT_POINTER(fobj->shared[i], fence);
> } else
> - fobj->shared[i] = old->shared[i];
> + RCU_INIT_POINTER(fobj->shared[i], check);
> + }
> + if (!old_fence) {
> + RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
> + fobj->shared_count++;
> }
> - if (fence)
> - fobj->shared[fobj->shared_count++] = fence;
>
> done:
> - obj->fence = fobj;
> - kfree(old);
> + preempt_disable();
> + write_seqcount_begin(&obj->seq);
> + /*
> + * RCU_INIT_POINTER can be used here,
> + * seqcount provides the necessary barriers
> + */
> + RCU_INIT_POINTER(obj->fence, fobj);
> + write_seqcount_end(&obj->seq);
> + preempt_enable();
> +
> + if (old)
> + kfree_rcu(old, rcu);
> +
> + if (old_fence)
> + fence_put(old_fence);
> }
>
> /*
> @@ -158,7 +193,7 @@ void reservation_object_add_shared_fence(struct
> reservation_object *obj,
> obj->staged = NULL;
>
> if (!fobj) {
> - BUG_ON(old->shared_count == old->shared_max);
> + BUG_ON(old->shared_count >= old->shared_max);
> reservation_object_add_shared_inplace(obj, old, fence);
> } else
> reservation_object_add_shared_replace(obj, old, fobj, fence);
> @@ -168,26 +203,266 @@
> EXPORT_SYMBOL(reservation_object_add_shared_fence);
> void reservation_object_add_excl_fence(struct reservation_object *obj,
> struct fence *fence)
> {
> - struct fence *old_fence = obj->fence_excl;
> + struct fence *old_fence = reservation_object_get_excl(obj);
> struct reservation_object_list *old;
> u32 i = 0;
>
> old = reservation_object_get_list(obj);
> - if (old) {
> + if (old)
> i = old->shared_count;
> - old->shared_count = 0;
> - }
>
> if (fence)
> fence_get(fence);
>
> - obj->fence_excl = fence;
> + preempt_disable();
> + write_seqcount_begin(&obj->seq);
> + /* write_seqcount_begin provides the necessary memory barrier */
> + RCU_INIT_POINTER(obj->fence_excl, fence);
> + if (old)
> + old->shared_count = 0;
> + write_seqcount_end(&obj->seq);
> + preempt_enable();
>
> /* inplace update, no shared fences */
> while (i--)
> - fence_put(old->shared[i]);
> + fence_put(rcu_dereference_protected(old->shared[i],
> + reservation_object_held(obj)));
>
> if (old_fence)
> fence_put(old_fence);
> }
> EXPORT_SYMBOL(reservation_object_add_excl_fence);
> +
> +int reservation_object_get_fences_rcu(struct reservation_object *obj,
> + struct fence **pfence_excl,
> + unsigned *pshared_count,
> + struct fence ***pshared)
> +{
> + unsigned shared_count = 0;
> + unsigned retry = 1;
> + struct fence **shared = NULL, *fence_excl = NULL;
> + int ret = 0;
> +
> + while (retry) {
> + struct reservation_object_list *fobj;
> + unsigned seq;
> +
> + seq = read_seqcount_begin(&obj->seq);
> +
> + rcu_read_lock();
> +
> + fobj = rcu_dereference(obj->fence);
> + if (fobj) {
> + struct fence **nshared;
> +
> + shared_count = ACCESS_ONCE(fobj->shared_count);
ACCESS_ONCE() shouldn't be needed inside the seqlock?
>
> + nshared = krealloc(shared, sizeof(*shared) *
> shared_count, GFP_KERNEL);
Again, krealloc should be a sleeping function, and not suitable within a
RCU read lock? I still think this krealloc should be moved to the start
of the retry loop, and we should start with a suitable guess of
shared_count (perhaps 0?) It's not like we're going to waste a lot of
memory....
> + if (!nshared) {
> + ret = -ENOMEM;
> + shared_count = retry = 0;
> + goto unlock;
> + }
> + shared = nshared;
> + memcpy(shared, fobj->shared, sizeof(*shared) *
> shared_count);
> + } else
> + shared_count = 0;
> + fence_excl = rcu_dereference(obj->fence_excl);
> +
> + retry = read_seqcount_retry(&obj->seq, seq);
> + if (retry)
> + goto unlock;
> +
> + if (!fence_excl || fence_get_rcu(fence_excl)) {
> + unsigned i;
> +
> + for (i = 0; i < shared_count; ++i) {
> + if (fence_get_rcu(shared[i]))
> + continue;
> +
> + /* uh oh, refcount failed, abort and retry */
> + while (i--)
> + fence_put(shared[i]);
> +
> + if (fence_excl) {
> + fence_put(fence_excl);
> + fence_excl = NULL;
> + }
> +
> + retry = 1;
> + break;
> + }
> + } else
> + retry = 1;
> +
> +unlock:
> + rcu_read_unlock();
> + }
> + *pshared_count = shared_count;
> + if (shared_count)
> + *pshared = shared;
> + else {
> + *pshared = NULL;
> + kfree(shared);
> + }
> + *pfence_excl = fence_excl;
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
> +
> +long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
> + bool wait_all, bool intr,
> + unsigned long timeout)
> +{
> + struct fence *fence;
> + unsigned seq, shared_count, i = 0;
> + long ret = timeout;
> +
> +retry:
> + fence = NULL;
> + shared_count = 0;
> + seq = read_seqcount_begin(&obj->seq);
> + rcu_read_lock();
> +
> + if (wait_all) {
> + struct reservation_object_list *fobj =
> rcu_dereference(obj->fence);
> +
> + if (fobj)
> + shared_count = fobj->shared_count;
> +
> + if (read_seqcount_retry(&obj->seq, seq))
> + goto unlock_retry;
> +
> + for (i = 0; i < shared_count; ++i) {
> + struct fence *lfence = rcu_dereference(fobj->shared[i]);
> +
> + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags))
> + continue;
> +
> + if (!fence_get_rcu(lfence))
> + goto unlock_retry;
> +
> + if (fence_is_signaled(lfence)) {
> + fence_put(lfence);
> + continue;
> + }
> +
> + fence = lfence;
> + break;
> + }
> + }
> +
> + if (!shared_count) {
> + struct fence *fence_excl = rcu_dereference(obj->fence_excl);
> +
> + if (read_seqcount_retry(&obj->seq, seq))
> + goto unlock_retry;
> +
> + if (fence_excl &&
> + !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) {
> + if (!fence_get_rcu(fence_excl))
> + goto unlock_retry;
> +
> + if (fence_is_signaled(fence_excl))
> + fence_put(fence_excl);
> + else
> + fence = fence_excl;
> + }
> + }
> +
> + rcu_read_unlock();
> + if (fence) {
> + ret = fence_wait_timeout(fence, intr, ret);
> + fence_put(fence);
> + if (ret > 0 && wait_all && (i + 1 < shared_count))
> + goto retry;
> + }
> + return ret;
> +
> +unlock_retry:
> + rcu_read_unlock();
> + goto retry;
> +}
> +EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu);
> +
> +
> +static inline int
> +reservation_object_test_signaled_single(struct fence *passed_fence)
> +{
> + struct fence *fence, *lfence = passed_fence;
> + int ret = 1;
> +
> + if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) {
> + int ret;
> +
> + fence = fence_get_rcu(lfence);
> + if (!fence)
> + return -1;
> +
> + ret = !!fence_is_signaled(fence);
> + fence_put(fence);
> + }
> + return ret;
> +}
> +
> +bool reservation_object_test_signaled_rcu(struct reservation_object
> *obj,
> + bool test_all)
> +{
> + unsigned seq, shared_count;
> + int ret = true;
> +
> +retry:
> + shared_count = 0;
> + seq = read_seqcount_begin(&obj->seq);
> + rcu_read_lock();
> +
> + if (test_all) {
> + unsigned i;
> +
> + struct reservation_object_list *fobj =
> rcu_dereference(obj->fence);
> +
> + if (fobj)
> + shared_count = fobj->shared_count;
> +
> + if (read_seqcount_retry(&obj->seq, seq))
> + goto unlock_retry;
> +
> + for (i = 0; i < shared_count; ++i) {
> + struct fence *fence = rcu_dereference(fobj->shared[i]);
> +
> + ret = reservation_object_test_signaled_single(fence);
> + if (ret < 0)
> + goto unlock_retry;
> + else if (!ret)
> + break;
> + }
> +
> + /*
> + * There could be a read_seqcount_retry here, but nothing cares
> + * about whether it's the old or newer fence pointers that are
> + * signale. That race could still have happened after checking
Typo.
>
> + * read_seqcount_retry. If you care, use ww_mutex_lock.
> + */
> + }
> +
> + if (!shared_count) {
> + struct fence *fence_excl = rcu_dereference(obj->fence_excl);
> +
> + if (read_seqcount_retry(&obj->seq, seq))
> + goto unlock_retry;
> +
> + if (fence_excl) {
> + ret = reservation_object_test_signaled_single(fence_excl);
> + if (ret < 0)
> + goto unlock_retry;
> + }
> + }
> +
> + rcu_read_unlock();
> + return ret;
> +
> +unlock_retry:
> + rcu_read_unlock();
> + goto retry;
> +}
> +EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu);
> diff --git a/include/linux/fence.h b/include/linux/fence.h
> index d13b5ab61726..4b7002457af0 100644
> --- a/include/linux/fence.h
> +++ b/include/linux/fence.h
> @@ -31,7 +31,7 @@
> #include <linux/kref.h>
> #include <linux/sched.h>
> #include <linux/printk.h>
> -#include <linux/slab.h>
> +#include <linux/rcupdate.h>
>
> struct fence;
> struct fence_ops;
> @@ -41,6 +41,7 @@ struct fence_cb;
> * struct fence - software synchronization primitive
> * @refcount: refcount for this fence
> * @ops: fence_ops associated with this fence
> + * @rcu: used for releasing fence with kfree_rcu
> * @cb_list: list of all callbacks to call
> * @lock: spin_lock_irqsave used for locking
> * @context: execution context this fence belongs to, returned by
> @@ -74,6 +75,7 @@ struct fence_cb;
> struct fence {
> struct kref refcount;
> const struct fence_ops *ops;
> + struct rcu_head rcu;
> struct list_head cb_list;
> spinlock_t *lock;
> unsigned context, seqno;
> @@ -190,11 +192,25 @@ static inline void fence_get(struct fence *fence)
> kref_get(&fence->refcount);
> }
>
> +/**
> + * fence_get_rcu - get a fence from a reservation_object_list with
> rcu read lock
> + * @fence: [in] fence to increase refcount of
> + *
> + * Function returns NULL if no refcount could be obtained, or the fence.
> + */
> +static inline struct fence *fence_get_rcu(struct fence *fence)
> +{
> + if (kref_get_unless_zero(&fence->refcount))
> + return fence;
> + else
> + return NULL;
> +}
> +
> extern void release_fence(struct kref *kref);
>
> static inline void free_fence(struct fence *fence)
> {
> - kfree(fence);
> + kfree_rcu(fence, rcu);
> }
>
> /**
> diff --git a/include/linux/reservation.h b/include/linux/reservation.h
> index 2affe67dea6e..b73d3df5a8e8 100644
> --- a/include/linux/reservation.h
> +++ b/include/linux/reservation.h
> @@ -42,22 +42,29 @@
> #include <linux/ww_mutex.h>
> #include <linux/fence.h>
> #include <linux/slab.h>
> +#include <linux/seqlock.h>
> +#include <linux/rcupdate.h>
>
> extern struct ww_class reservation_ww_class;
> +extern struct lock_class_key reservation_seqcount_class;
> +extern const char reservation_seqcount_string[];
>
> struct reservation_object_list {
> + struct rcu_head rcu;
> u32 shared_count, shared_max;
> - struct fence *shared[];
> + struct fence __rcu *shared[];
> };
>
> struct reservation_object {
> struct ww_mutex lock;
> + seqcount_t seq;
>
> - struct fence *fence_excl;
> - struct reservation_object_list *fence;
> + struct fence __rcu *fence_excl;
> + struct reservation_object_list __rcu *fence;
> struct reservation_object_list *staged;
> };
>
> +#define reservation_object_held(obj) lockdep_is_held(&(obj)->lock.base)
> #define reservation_object_assert_held(obj) \
> lockdep_assert_held(&(obj)->lock.base)
>
> @@ -66,8 +73,9 @@ reservation_object_init(struct reservation_object *obj)
> {
> ww_mutex_init(&obj->lock, &reservation_ww_class);
>
> - obj->fence_excl = NULL;
> - obj->fence = NULL;
> + __seqcount_init(&obj->seq, reservation_seqcount_string,
> &reservation_seqcount_class);
> + RCU_INIT_POINTER(obj->fence, NULL);
> + RCU_INIT_POINTER(obj->fence_excl, NULL);
> obj->staged = NULL;
> }
>
> @@ -76,18 +84,20 @@ reservation_object_fini(struct reservation_object
> *obj)
> {
> int i;
> struct reservation_object_list *fobj;
> + struct fence *excl;
>
> /*
> * This object should be dead and all references must have
> - * been released to it.
> + * been released to it, so no need to be protected with rcu.
> */
> - if (obj->fence_excl)
> - fence_put(obj->fence_excl);
> + excl = rcu_dereference_protected(obj->fence_excl, 1);
> + if (excl)
> + fence_put(excl);
>
> - fobj = obj->fence;
> + fobj = rcu_dereference_protected(obj->fence, 1);
> if (fobj) {
> for (i = 0; i < fobj->shared_count; ++i)
> - fence_put(fobj->shared[i]);
> + fence_put(rcu_dereference_protected(fobj->shared[i], 1));
>
> kfree(fobj);
> }
> @@ -99,17 +109,15 @@ reservation_object_fini(struct reservation_object
> *obj)
> static inline struct reservation_object_list *
> reservation_object_get_list(struct reservation_object *obj)
> {
> - reservation_object_assert_held(obj);
> -
> - return obj->fence;
> + return rcu_dereference_check(obj->fence,
> + reservation_object_held(obj));
> }
>
> static inline struct fence *
> reservation_object_get_excl(struct reservation_object *obj)
> {
> - reservation_object_assert_held(obj);
> -
> - return obj->fence_excl;
> + return rcu_dereference_check(obj->fence_excl,
> + reservation_object_held(obj));
> }
>
> int reservation_object_reserve_shared(struct reservation_object *obj);
> @@ -119,4 +127,16 @@ void reservation_object_add_shared_fence(struct
> reservation_object *obj,
> void reservation_object_add_excl_fence(struct reservation_object *obj,
> struct fence *fence);
>
> +int reservation_object_get_fences_rcu(struct reservation_object *obj,
> + struct fence **pfence_excl,
> + unsigned *pshared_count,
> + struct fence ***pshared);
> +
> +long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
> + bool wait_all, bool intr,
> + unsigned long timeout);
> +
> +bool reservation_object_test_signaled_rcu(struct reservation_object
> *obj,
> + bool test_all);
> +
> #endif /* _LINUX_RESERVATION_H */
More information about the dri-devel
mailing list