[PATCH] dma-buf: fix and rework dma_buf_poll v4

Daniel Vetter daniel.vetter at ffwll.ch
Wed Jun 30 16:29:42 UTC 2021


On Wed, Jun 30, 2021 at 4:22 PM Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
>
> Am 30.06.21 um 16:07 schrieb Daniel Vetter:
> > On Wed, Jun 30, 2021 at 2:36 PM Christian König
> > <ckoenig.leichtzumerken at gmail.com> wrote:
> >> Daniel pointed me towards this function and there are multiple obvious problems
> >> in the implementation.
> >>
> >> First of all the retry loop is not working as intended. In general the retry
> >> makes only sense if you grab the reference first and then check the sequence
> >> values.
> >>
> >> Then we should always also wait for the exclusive fence.
> >>
> >> It's also good practice to keep the reference around when installing callbacks
> >> to fences you don't own.
> >>
> >> And last the whole implementation was unnecessary complex and rather hard to
> >> understand which could lead to probably unexpected behavior of the IOCTL.
> >>
> >> Fix all this by reworking the implementation from scratch. Dropping the
> >> whole RCU approach and taking the lock instead.
> >>
> >> Only mildly tested and needs a thoughtful review of the code.
> > prime_vgem.c has some basic stuff, but it might actually encoding the
> > broken behaviour. Would be good to extend/fix that I think so we don't
> > entirely rely on review. We can't really build easily on top of the
> > testcase Jason created for import/export, since for implicit sync we
> > need some driver that attaches the fences for us.
>
> My question is if I can just send that to
> intel-gfx at lists.freedesktop.org and the CI will pick it up?

We do run all the prime_vgem tests.

Btw if you do an igt patch, you can tell CI to run that igt patch
series together with your kernel submission. Pretty useful for hackery
like this, documented how it works here:

https://intel-gfx-ci.01.org/test-with.html

>
> >
> > There's also a vc4 one, but I guess that's less useful for us :-)
> >
> >> v2: fix the reference counting as well
> >> v3: keep the excl fence handling as is for stable
> >> v4: back to testing all fences, drop RCU
> >>
> >> Signed-off-by: Christian König <christian.koenig at amd.com>
> >> CC: stable at vger.kernel.org
> >> ---
> >>   drivers/dma-buf/dma-buf.c | 132 +++++++++++++-------------------------
> >>   include/linux/dma-buf.h   |   2 +-
> >>   2 files changed, 46 insertions(+), 88 deletions(-)
> >>
> >> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> >> index eadd1eaa2fb5..192c4d34704b 100644
> >> --- a/drivers/dma-buf/dma-buf.c
> >> +++ b/drivers/dma-buf/dma-buf.c
> >> @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry)
> >>           * If you hit this BUG() it means someone dropped their ref to the
> >>           * dma-buf while still having pending operation to the buffer.
> >>           */
> >> -       BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
> >> +       BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
> >>
> >>          dmabuf->ops->release(dmabuf);
> >>
> >> @@ -202,16 +202,19 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
> >>          wake_up_locked_poll(dcb->poll, dcb->active);
> >>          dcb->active = 0;
> >>          spin_unlock_irqrestore(&dcb->poll->lock, flags);
> >> +       dma_fence_put(fence);
> >>   }
> >>
> >>   static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
> >>   {
> >> +       struct dma_buf_poll_cb_t *dcb;
> >>          struct dma_buf *dmabuf;
> >>          struct dma_resv *resv;
> >>          struct dma_resv_list *fobj;
> >> -       struct dma_fence *fence_excl;
> >> +       struct dma_fence *fence;
> >> +       unsigned shared_count;
> >>          __poll_t events;
> >> -       unsigned shared_count, seq;
> >> +       int r, i;
> >>
> >>          dmabuf = file->private_data;
> >>          if (!dmabuf || !dmabuf->resv)
> >> @@ -225,101 +228,56 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
> >>          if (!events)
> >>                  return 0;
> >>
> >> -retry:
> >> -       seq = read_seqcount_begin(&resv->seq);
> >> -       rcu_read_lock();
> >> +       dcb = events & EPOLLOUT ? &dmabuf->cb_out : &dmabuf->cb_in;
> >> +
> >> +       /* Only queue a new one if we are not still waiting for the old one */
> >> +       spin_lock_irq(&dmabuf->poll.lock);
> >> +       if (dcb->active)
> >> +               events = 0;
> >> +       else
> >> +               dcb->active = events;
> >> +       spin_unlock_irq(&dmabuf->poll.lock);
> >> +       if (!events)
> >> +               return 0;
> >> +
> >> +       dma_resv_lock(resv, NULL);
> >>
> >> -       fobj = rcu_dereference(resv->fence);
> >> -       if (fobj)
> >> +       fobj = dma_resv_get_list(resv);
> >> +       if (fobj && events & EPOLLOUT)
> >>                  shared_count = fobj->shared_count;
> >>          else
> >>                  shared_count = 0;
> >> -       fence_excl = rcu_dereference(resv->fence_excl);
> >> -       if (read_seqcount_retry(&resv->seq, seq)) {
> >> -               rcu_read_unlock();
> >> -               goto retry;
> >> -       }
> >>
> >> -       if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
> >> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
> >> -               __poll_t pevents = EPOLLIN;
> >> -
> >> -               if (shared_count == 0)
> >> -                       pevents |= EPOLLOUT;
> >> -
> >> -               spin_lock_irq(&dmabuf->poll.lock);
> >> -               if (dcb->active) {
> >> -                       dcb->active |= pevents;
> >> -                       events &= ~pevents;
> >> -               } else
> >> -                       dcb->active = pevents;
> >> -               spin_unlock_irq(&dmabuf->poll.lock);
> >> -
> >> -               if (events & pevents) {
> >> -                       if (!dma_fence_get_rcu(fence_excl)) {
> >> -                               /* force a recheck */
> >> -                               events &= ~pevents;
> >> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> >> -                       } else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
> >> -                                                          dma_buf_poll_cb)) {
> >> -                               events &= ~pevents;
> >> -                               dma_fence_put(fence_excl);
> >> -                       } else {
> >> -                               /*
> >> -                                * No callback queued, wake up any additional
> >> -                                * waiters.
> >> -                                */
> >> -                               dma_fence_put(fence_excl);
> >> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> >> -                       }
> >> +       for (i = 0; i < shared_count; ++i) {
> >> +               fence = rcu_dereference_protected(fobj->shared[i],
> >> +                                                 dma_resv_held(resv));
> >> +               dma_fence_get(fence);
> >> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
> >> +               if (!r) {
> >> +                       /* Callback queued */
> >> +                       events = 0;
> > Why do you clear events here? There's more than EPOLLIN/OUT, and I
> > think you can also set both (and then we should be able to queue
> > both). I think a few more testcases for this would be really good. I
> > think the old code all handled that like I'd expect it to.
>
> Yeah, that's exactly the stuff I was wondering about since I'm not so
> familiar with the poll interface.

tbh I don't have much clue either, I think we need to decide this with
some testcases. The poll support in the kernel is pretty impressive
amounts of magic since it works for all variants of poll. At least I
think it's rather much clever and tricky to understand as an outsider
...
-Daniel

> Going to fix that.
>
> Thanks,
> Christian.
>
> >
> > Cheers, Daniel
> >
> >> +                       goto out;
> >>                  }
> >> +               dma_fence_put(fence);
> >>          }
> >>
> >> -       if ((events & EPOLLOUT) && shared_count > 0) {
> >> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
> >> -               int i;
> >> -
> >> -               /* Only queue a new callback if no event has fired yet */
> >> -               spin_lock_irq(&dmabuf->poll.lock);
> >> -               if (dcb->active)
> >> -                       events &= ~EPOLLOUT;
> >> -               else
> >> -                       dcb->active = EPOLLOUT;
> >> -               spin_unlock_irq(&dmabuf->poll.lock);
> >> -
> >> -               if (!(events & EPOLLOUT))
> >> +       fence = dma_resv_get_excl(resv);
> >> +       if (fence) {
> >> +               dma_fence_get(fence);
> >> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
> >> +               if (!r) {
> >> +                       /* Callback queued */
> >> +                       events = 0;
> >>                          goto out;
> >> -
> >> -               for (i = 0; i < shared_count; ++i) {
> >> -                       struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
> >> -
> >> -                       if (!dma_fence_get_rcu(fence)) {
> >> -                               /*
> >> -                                * fence refcount dropped to zero, this means
> >> -                                * that fobj has been freed
> >> -                                *
> >> -                                * call dma_buf_poll_cb and force a recheck!
> >> -                                */
> >> -                               events &= ~EPOLLOUT;
> >> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> >> -                               break;
> >> -                       }
> >> -                       if (!dma_fence_add_callback(fence, &dcb->cb,
> >> -                                                   dma_buf_poll_cb)) {
> >> -                               dma_fence_put(fence);
> >> -                               events &= ~EPOLLOUT;
> >> -                               break;
> >> -                       }
> >> -                       dma_fence_put(fence);
> >>                  }
> >> -
> >> -               /* No callback queued, wake up any additional waiters. */
> >> -               if (i == shared_count)
> >> -                       dma_buf_poll_cb(NULL, &dcb->cb);
> >> +               dma_fence_put(fence);
> >>          }
> >>
> >> +       /* No callback queued, wake up any additional waiters. */
> >> +       dma_buf_poll_cb(NULL, &dcb->cb);
> >> +
> >>   out:
> >> -       rcu_read_unlock();
> >> +       dma_resv_unlock(resv);
> >>          return events;
> >>   }
> >>
> >> @@ -562,8 +520,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
> >>          dmabuf->owner = exp_info->owner;
> >>          spin_lock_init(&dmabuf->name_lock);
> >>          init_waitqueue_head(&dmabuf->poll);
> >> -       dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
> >> -       dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
> >> +       dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
> >> +       dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
> >>
> >>          if (!resv) {
> >>                  resv = (struct dma_resv *)&dmabuf[1];
> >> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> >> index efdc56b9d95f..7e747ad54c81 100644
> >> --- a/include/linux/dma-buf.h
> >> +++ b/include/linux/dma-buf.h
> >> @@ -329,7 +329,7 @@ struct dma_buf {
> >>                  wait_queue_head_t *poll;
> >>
> >>                  __poll_t active;
> >> -       } cb_excl, cb_shared;
> >> +       } cb_in, cb_out;
> >>   };
> >>
> >>   /**
> >> --
> >> 2.25.1
> >>
> >
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the dri-devel mailing list