[Freedreno] [PATCH v2] drm/msm: Add syncobj support.
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Fri Jan 17 18:32:27 UTC 2020
On Fri, Jan 17, 2020 at 7:17 PM Jordan Crouse <jcrouse at codeaurora.org> wrote:
>
> On Fri, Jan 17, 2020 at 12:04:17AM +0100, Bas Nieuwenhuizen wrote:
> > This
> >
> > 1) Enables core DRM syncobj support.
> > 2) Adds options to the submission ioctl to wait/signal syncobjs.
> >
> > Just like the wait fence fd, this does inline waits. Using the
> > scheduler would be nice but I believe it is out of scope for
> > this work.
> >
> > Support for timeline syncobjs is implemented and the interface
> > is ready for it, but I'm not enabling it yet until there is
> > some code for turnip to use it.
> >
> > The reset is mostly in there because in the presence of waiting
> > and signalling the same semaphores, resetting them after
> > signalling can become very annoying.
> >
> > v2:
> > - Fixed style issues
> > - Removed a cleanup issue in a failure case
> > - Moved to a copy_from_user per syncobj
>
> A few nits, but nothing serious. This is looking good, thanks for the quick
> turn around.
>
> > Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> > ---
> > drivers/gpu/drm/msm/msm_drv.c | 6 +-
> > drivers/gpu/drm/msm/msm_gem_submit.c | 236 ++++++++++++++++++++++++++-
> > include/uapi/drm/msm_drm.h | 24 ++-
> > 3 files changed, 262 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > index c84f0a8b3f2c..5246b41798df 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.c
> > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > @@ -37,9 +37,10 @@
> > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > * GEM object's debug name
> > * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > + * - 1.6.0 - Syncobj support
> > */
> > #define MSM_VERSION_MAJOR 1
> > -#define MSM_VERSION_MINOR 5
> > +#define MSM_VERSION_MINOR 6
> > #define MSM_VERSION_PATCHLEVEL 0
> >
> > static const struct drm_mode_config_funcs mode_config_funcs = {
> > @@ -988,7 +989,8 @@ static struct drm_driver msm_driver = {
> > .driver_features = DRIVER_GEM |
> > DRIVER_RENDER |
> > DRIVER_ATOMIC |
> > - DRIVER_MODESET,
> > + DRIVER_MODESET |
> > + DRIVER_SYNCOBJ,
> > .open = msm_open,
> > .postclose = msm_postclose,
> > .lastclose = drm_fb_helper_lastclose,
> > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> > index be5327af16fa..6c7f95fc5cfd 100644
> > --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> > @@ -8,7 +8,9 @@
> > #include <linux/sync_file.h>
> > #include <linux/uaccess.h>
> >
> > +#include <drm/drm_drv.h>
> > #include <drm/drm_file.h>
> > +#include <drm/drm_syncobj.h>
> >
> > #include "msm_drv.h"
> > #include "msm_gpu.h"
> > @@ -394,6 +396,191 @@ static void submit_cleanup(struct msm_gem_submit *submit)
> > ww_acquire_fini(&submit->ticket);
> > }
> >
> > +
> > +struct msm_submit_post_dep {
> > + struct drm_syncobj *syncobj;
> > + uint64_t point;
> > + struct dma_fence_chain *chain;
> > +};
> > +
> > +static int msm_wait_deps(struct drm_device *dev,
> > + struct drm_file *file,
> > + uint64_t in_syncobjs_addr,
> > + uint32_t nr_in_syncobjs,
> > + uint32_t syncobj_stride,
> > + struct msm_ringbuffer *ring,
> > + struct drm_syncobj ***syncobjs)
> > +{
> > + struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
> > + int ret = 0;
> > + uint32_t i, j;
> > +
> > + *syncobjs = kcalloc(nr_in_syncobjs, sizeof(**syncobjs),
> > + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
>
> Perfect - I think this will do everything we want - protect us against OOM death
> but also not introduce artificial constraints on object counts.
>
> > + if (!syncobjs) {
>
> You should be able to just return -ENOMEM here.
>
> > + ret = -ENOMEM;
> > + goto out_syncobjs;
> > + }
>
> > +
> > + for (i = 0; i < nr_in_syncobjs; ++i) {
> > + uint64_t address = in_syncobjs_addr + i * syncobj_stride;
> > + struct dma_fence *fence;
> > +
> > + if (copy_from_user(&syncobj_desc,
> > + u64_to_user_ptr(address),
> > + min(syncobj_stride, sizeof(syncobj_desc)))) {
> > + ret = -EFAULT;
> > + goto out_syncobjs;
>
> You can just use break here.
>
> > + }
> > +
> > + if (syncobj_desc.point &&
> > + !drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) {
> > + ret = -EOPNOTSUPP;
> > + break;
> > + }
> > +
> > + if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) {
> > + ret = -EINVAL;
> > + break;
> > + }
> > +
> > + ret = drm_syncobj_find_fence(file, syncobj_desc.handle,
> > + syncobj_desc.point, 0, &fence);
> > + if (ret)
> > + break;
> > +
> > + if (!dma_fence_match_context(fence, ring->fctx->context))
> > + ret = dma_fence_wait(fence, true);
> > +
> > + dma_fence_put(fence);
> > + if (ret)
> > + break;
> > +
> > + if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) {
> > + (*syncobjs)[i] =
> > + drm_syncobj_find(file, syncobj_desc.handle);
> > + if (!(*syncobjs)[i]) {
> > + ret = -EINVAL;
> > + break;
> > + }
> > + }
> > + }
> > +
> > +out_syncobjs:
> > + if (ret && *syncobjs) {
> > + for (j = 0; j < i; ++j) {
>
> You could also walk backwards from i and save having another iterator:
>
> for( ; i >=0; i--)
I thought about that but the slight annoyance is that from the API
perspective they're all unsigned so a >= 0 doesn't really work.
So we have 3 alternatives:
1) check for INT32_MAX and then use signed
2) keep the iterator
3) do { ... } while (i-- != 0);
I think 1 adds extra complexity for no good reason. (unless we want to
implicitly rely on that failing the kmalloc?) Happy to do 3 if we're
happy with a do while construct.
>
> > + if ((*syncobjs)[j])
> > + drm_syncobj_put((*syncobjs)[j]);
> > + }
> > + kfree(*syncobjs);
> > + *syncobjs = NULL;
> > + }
> > + return ret;
>
> if you wanted to you could return syncobjs or ERR_PTR instead of passing it by
> reference. I would probably chose that option personally but it is up to you.
How does this work wrt returning different error codes?
>
> > +}
> > +
> > +static void msm_reset_syncobjs(struct drm_syncobj **syncobjs,
> > + uint32_t nr_syncobjs)
> > +{
> > + uint32_t i;
> > +
> > + for (i = 0; syncobjs && i < nr_syncobjs; ++i) {
> > + if (syncobjs[i])
> > + drm_syncobj_replace_fence(syncobjs[i], NULL);
> > + }
> > +}
> > +
> > +static int msm_parse_post_deps(struct drm_device *dev,
> > + struct drm_file *file,
> > + uint64_t out_syncobjs_addr,
> > + uint32_t nr_out_syncobjs,
> > + uint32_t syncobj_stride,
> > + struct msm_submit_post_dep **post_deps)
> > +{
> > + int ret = 0;
> > + uint32_t i, j;
> > +
> > + *post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
> > + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> > + if (!*post_deps) {
> > + ret = -ENOMEM;
> > + goto out_syncobjs;
>
> return -ENOMEM works fine.
>
> > + }
> > +
> > + for (i = 0; i < nr_out_syncobjs; ++i) {
> > + uint64_t address = out_syncobjs_addr + i * syncobj_stride;
> > +
> > + if (copy_from_user(&syncobj_desc,
> > + u64_to_user_ptr(address),
> > + min(syncobj_stride, sizeof(syncobj_desc)))) {
> > + ret = -EFAULT;
> > + goto out_syncobjs;
>
> You can break here.
>
> > + }
> > +
> > + (*post_deps)[i].point = syncobj_desc.point;
> > + (*post_deps)[i].chain = NULL;
> > +
> > + if (syncobj_desc.flags) {
> > + ret = -EINVAL;
> > + break;
> > + }
> > +
> > + if (syncobj_desc.point) {
> > + if (!drm_core_check_feature(dev,
> > + DRIVER_SYNCOBJ_TIMELINE)) {
> > + ret = -EOPNOTSUPP;
> > + break;
> > + }
> > +
> > + (*post_deps)[i].chain =
> > + kmalloc(sizeof(*(*post_deps)[i].chain),
> > + GFP_KERNEL);
> > + if (!(*post_deps)[i].chain) {
> > + ret = -ENOMEM;
> > + break;
> > + }
> > + }
> > +
> > + (*post_deps)[i].syncobj =
> > + drm_syncobj_find(file, syncobj_desc.handle);
> > + if (!(*post_deps)[i].syncobj) {
> > + ret = -EINVAL;
> > + break;
> > + }
> > + }
> > +
> > + if (ret) {
> > + for (j = 0; j <= i; ++j) {
>
> Same suggest as above, if you would prefer.
>
> > + kfree((*post_deps)[j].chain);
> > + if ((*post_deps)[j].syncobj)
> > + drm_syncobj_put((*post_deps)[j].syncobj);
> > + }
> > +
> > + kfree(*post_deps);
> > + *post_deps = NULL;
> > + }
> > +
> > +out_syncobjs:
> > + return ret;
>
> This might also be a good spot to return post_deps / ERR_PTR.
>
> > +}
> > +
> > +static void msm_process_post_deps(struct msm_submit_post_dep *post_deps,
> > + uint32_t count, struct dma_fence *fence)
> > +{
> > + uint32_t i;
> > +
> > + for (i = 0; post_deps && i < count; ++i) {
> > + if (post_deps[i].chain) {
> > + drm_syncobj_add_point(post_deps[i].syncobj,
> > + post_deps[i].chain,
> > + fence, post_deps[i].point);
> > + post_deps[i].chain = NULL;
> > + } else {
> > + drm_syncobj_replace_fence(post_deps[i].syncobj,
> > + fence);
> > + }
> > + }
> > +}
> > +
> > int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > struct drm_file *file)
> > {
> > @@ -406,6 +593,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > struct sync_file *sync_file = NULL;
> > struct msm_gpu_submitqueue *queue;
> > struct msm_ringbuffer *ring;
> > + struct msm_submit_post_dep *post_deps = NULL;
> > + struct drm_syncobj **syncobjs_to_reset = NULL;
> > int out_fence_fd = -1;
> > struct pid *pid = get_pid(task_pid(current));
> > unsigned i;
> > @@ -413,6 +602,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > if (!gpu)
> > return -ENXIO;
> >
> > + if (args->pad)
> > + return -EINVAL;
> > +
>
> > /* for now, we just have 3d pipe.. eventually this would need to
> > * be more clever to dispatch to appropriate gpu module:
> > */
> > @@ -460,9 +652,28 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > return ret;
> > }
> >
> > + if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) {
> > + ret = msm_wait_deps(dev, file,
> > + args->in_syncobjs, args->nr_in_syncobjs,
> > + args->syncobj_stride, ring,
>
> If you wanted to, you could just pass args directly to the function so you
> didn't have to take it apart.
I think the advantage here is making it really explicit what part of
args is not used, and I don't feel the number of args has quite gone
out of control yet, but happy to change if insist.
(sorry, not seeing if this is a "you could do that" vs. a "I would
prefer if you do that, politely")
>
> Also, Rob - do we want to do the trick where we return
> sizeof(drm_msm_gem_submit_syncobj) if the input stride is zero?
Do you mean making this an in/out field and modifying it to return the
size if a stride of 0 has been given? If so, I don't see the point,
because userspace would not know what to fill any extra size with
(sure they can 0 initialize the extra part, but the kernel already
does that zero initializing the struct to be copied into).
or do you mean interpreting stride as
sizeof(drm_msm_gem_submit_syncobj) if the input stride is 0? In that
case I think that is just an invitation for userspace to set itself up
for incompatibility issues when the size actually changes. Let's
enforce doing it right.
(or did I misunderstand entirely?)
>
> > + &syncobjs_to_reset);
> > + if (ret)
> > + goto out_post_unlock;
> > + }
> > +
> > + if (args->flags & MSM_SUBMIT_SYNCOBJ_OUT) {
> > + ret = msm_parse_post_deps(dev, file,
> > + args->out_syncobjs,
> > + args->nr_out_syncobjs,
> > + args->syncobj_stride,
>
> And same.
>
> > + &post_deps);
> > + if (ret)
> > + goto out_post_unlock;
> > + }
> > +
> > ret = mutex_lock_interruptible(&dev->struct_mutex);
> > if (ret)
> > - return ret;
> > + goto out_post_unlock;
> >
> > if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
> > out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
> > @@ -586,6 +797,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > args->fence_fd = out_fence_fd;
> > }
> >
> > + msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
> > + msm_process_post_deps(post_deps, args->nr_out_syncobjs,
> > + submit->fence);
> > +
> > +
> > out:
> > submit_cleanup(submit);
> > if (ret)
> > @@ -594,5 +810,23 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> > if (ret && (out_fence_fd >= 0))
> > put_unused_fd(out_fence_fd);
> > mutex_unlock(&dev->struct_mutex);
> > +
> > +out_post_unlock:
> > + if (post_deps) {
> > + for (i = 0; i < args->nr_out_syncobjs; ++i) {
> > + kfree(post_deps[i].chain);
> > + drm_syncobj_put(post_deps[i].syncobj);
> > + }
> > + kfree(post_deps);
> > + }
> > +
> > + if (syncobjs_to_reset) {
> > + for (i = 0; i < args->nr_in_syncobjs; ++i) {
> > + if (syncobjs_to_reset[i])
> > + drm_syncobj_put(syncobjs_to_reset[i]);
> > + }
> > + kfree(syncobjs_to_reset);
> > + }
> > +
> > return ret;
> > }
> > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > index 0b85ed6a3710..19806eb3a8e8 100644
> > --- a/include/uapi/drm/msm_drm.h
> > +++ b/include/uapi/drm/msm_drm.h
> > @@ -217,13 +217,28 @@ struct drm_msm_gem_submit_bo {
> > #define MSM_SUBMIT_FENCE_FD_IN 0x40000000 /* enable input fence_fd */
> > #define MSM_SUBMIT_FENCE_FD_OUT 0x20000000 /* enable output fence_fd */
> > #define MSM_SUBMIT_SUDO 0x10000000 /* run submitted cmds from RB */
> > +#define MSM_SUBMIT_SYNCOBJ_IN 0x08000000 /* enable input syncobj */
> > +#define MSM_SUBMIT_SYNCOBJ_OUT 0x04000000 /* enable output syncobj */
> > #define MSM_SUBMIT_FLAGS ( \
> > MSM_SUBMIT_NO_IMPLICIT | \
> > MSM_SUBMIT_FENCE_FD_IN | \
> > MSM_SUBMIT_FENCE_FD_OUT | \
> > MSM_SUBMIT_SUDO | \
> > + MSM_SUBMIT_SYNCOBJ_IN | \
> > + MSM_SUBMIT_SYNCOBJ_OUT | \
> > 0)
> >
> > +#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */
> > +#define MSM_SUBMIT_SYNCOBJ_FLAGS ( \
> > + MSM_SUBMIT_SYNCOBJ_RESET | \
> > + 0)
> > +
> > +struct drm_msm_gem_submit_syncobj {
> > + __u32 handle; /* in, syncobj handle. */
> > + __u32 flags; /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */
> > + __u64 point; /* in, timepoint for timeline syncobjs. */
> > +};
> > +
> > /* Each cmdstream submit consists of a table of buffers involved, and
> > * one or more cmdstream buffers. This allows for conditional execution
> > * (context-restore), and IB buffers needed for per tile/bin draw cmds.
> > @@ -236,7 +251,14 @@ struct drm_msm_gem_submit {
> > __u64 bos; /* in, ptr to array of submit_bo's */
> > __u64 cmds; /* in, ptr to array of submit_cmd's */
> > __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
> > - __u32 queueid; /* in, submitqueue id */
> > + __u32 queueid; /* in, submitqueue id */
> > + __u64 in_syncobjs; /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> > + __u64 out_syncobjs; /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> > + __u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */
> > + __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */
> > + __u32 syncobj_stride; /* in, stride of syncobj arrays. */
> > + __u32 pad; /*in, reserved for future use, always 0. */
> > +
> > };
> >
> > /* The normal way to synchronize with the GPU is just to CPU_PREP on
> > --
> > 2.24.1
> >
>
> --
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project
More information about the Freedreno
mailing list