[Freedreno] [PATCH v2] drm/msm: Add syncobj support.

Fri Jan 17 18:17:27 UTC 2020

On Fri, Jan 17, 2020 at 12:04:17AM +0100, Bas Nieuwenhuizen wrote:
> This
> 
> 1) Enables core DRM syncobj support.
> 2) Adds options to the submission ioctl to wait/signal syncobjs.
> 
> Just like the wait fence fd, this does inline waits. Using the
> scheduler would be nice but I believe it is out of scope for
> this work.
> 
> Support for timeline syncobjs is implemented and the interface
> is ready for it, but I'm not enabling it yet until there is
> some code for turnip to use it.
> 
> The reset is mostly in there because in the presence of waiting
> and signalling the same semaphores, resetting them after
> signalling can become very annoying.
> 
> v2:
>   - Fixed style issues
>   - Removed a cleanup issue in a failure case
>   - Moved to a copy_from_user per syncobj

A few nits, but nothing serious.  This is looking good, thanks for the quick
turn around.

> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>  drivers/gpu/drm/msm/msm_drv.c        |   6 +-
>  drivers/gpu/drm/msm/msm_gem_submit.c | 236 ++++++++++++++++++++++++++-
>  include/uapi/drm/msm_drm.h           |  24 ++-
>  3 files changed, 262 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index c84f0a8b3f2c..5246b41798df 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -37,9 +37,10 @@
>   * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
>   *           GEM object's debug name
>   * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> + * - 1.6.0 - Syncobj support
>   */
>  #define MSM_VERSION_MAJOR	1
> -#define MSM_VERSION_MINOR	5
> +#define MSM_VERSION_MINOR	6
>  #define MSM_VERSION_PATCHLEVEL	0
>  
>  static const struct drm_mode_config_funcs mode_config_funcs = {
> @@ -988,7 +989,8 @@ static struct drm_driver msm_driver = {
>  	.driver_features    = DRIVER_GEM |
>  				DRIVER_RENDER |
>  				DRIVER_ATOMIC |
> -				DRIVER_MODESET,
> +				DRIVER_MODESET |
> +				DRIVER_SYNCOBJ,
>  	.open               = msm_open,
>  	.postclose           = msm_postclose,
>  	.lastclose          = drm_fb_helper_lastclose,
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index be5327af16fa..6c7f95fc5cfd 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -8,7 +8,9 @@
>  #include <linux/sync_file.h>
>  #include <linux/uaccess.h>
>  
> +#include <drm/drm_drv.h>
>  #include <drm/drm_file.h>
> +#include <drm/drm_syncobj.h>
>  
>  #include "msm_drv.h"
>  #include "msm_gpu.h"
> @@ -394,6 +396,191 @@ static void submit_cleanup(struct msm_gem_submit *submit)
>  	ww_acquire_fini(&submit->ticket);
>  }
>  
> +
> +struct msm_submit_post_dep {
> +	struct drm_syncobj *syncobj;
> +	uint64_t point;
> +	struct dma_fence_chain *chain;
> +};
> +
> +static int msm_wait_deps(struct drm_device *dev,
> +                         struct drm_file *file,
> +                         uint64_t in_syncobjs_addr,
> +                         uint32_t nr_in_syncobjs,
> +                         uint32_t syncobj_stride,
> +                         struct msm_ringbuffer *ring,
> +                         struct drm_syncobj ***syncobjs)
> +{
> +	struct drm_msm_gem_submit_syncobj syncobj_desc = {0};
> +	int ret = 0;
> +	uint32_t i, j;
> +
> +	*syncobjs = kcalloc(nr_in_syncobjs, sizeof(**syncobjs),
> +	                    GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);

Perfect - I think this will do everything we want - protect us against OOM death
but also not introduce artificial constraints on object counts.

> +	if (!syncobjs) {

You should be able to just return -ENOMEM here.

> +		ret = -ENOMEM;
> +		goto out_syncobjs;
> +	}

> +
> +	for (i = 0; i < nr_in_syncobjs; ++i) {
> +		uint64_t address = in_syncobjs_addr + i * syncobj_stride;
> +		struct dma_fence *fence;
> +
> +		if (copy_from_user(&syncobj_desc,
> +			           u64_to_user_ptr(address),
> +			           min(syncobj_stride, sizeof(syncobj_desc)))) {
> +			ret = -EFAULT;
> +			goto out_syncobjs;

You can just use break here.

> +		}
> +
> +		if (syncobj_desc.point &&
> +		    !drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) {
> +			ret = -EOPNOTSUPP;
> +			break;
> +		}
> +
> +		if (syncobj_desc.flags & ~MSM_SUBMIT_SYNCOBJ_FLAGS) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		ret = drm_syncobj_find_fence(file, syncobj_desc.handle,
> +		                             syncobj_desc.point, 0, &fence);
> +		if (ret)
> +			break;
> +
> +		if (!dma_fence_match_context(fence, ring->fctx->context))
> +			ret = dma_fence_wait(fence, true);
> +
> +		dma_fence_put(fence);
> +		if (ret)
> +			break;
> +
> +		if (syncobj_desc.flags & MSM_SUBMIT_SYNCOBJ_RESET) {
> +			(*syncobjs)[i] =
> +				drm_syncobj_find(file, syncobj_desc.handle);
> +			if (!(*syncobjs)[i]) {
> +				ret = -EINVAL;
> +				break;
> +			}
> +		}
> +	}
> +
> +out_syncobjs:
> +	if (ret && *syncobjs) {
> +		for (j = 0; j < i; ++j) {

You could also walk backwards from i and save having another iterator:

for( ; i >=0; i--)

> +			if ((*syncobjs)[j])
> +				drm_syncobj_put((*syncobjs)[j]);
> +		}
> +		kfree(*syncobjs);
> +		*syncobjs = NULL;
> +	}
> +	return ret;

if you wanted to you could return syncobjs or ERR_PTR instead of passing it by
reference. I would probably chose that option personally but it is up to you.

> +}
> +
> +static void msm_reset_syncobjs(struct drm_syncobj **syncobjs,
> +                               uint32_t nr_syncobjs)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; syncobjs && i < nr_syncobjs; ++i) {
> +		if (syncobjs[i])
> +			drm_syncobj_replace_fence(syncobjs[i], NULL);
> +	}
> +}
> +
> +static int msm_parse_post_deps(struct drm_device *dev,
> +                               struct drm_file *file,
> +                               uint64_t out_syncobjs_addr,
> +                               uint32_t nr_out_syncobjs,
> +			       uint32_t syncobj_stride,
> +                               struct msm_submit_post_dep **post_deps)
> +{
> +	int ret = 0;
> +	uint32_t i, j;
> +
> +	*post_deps = kmalloc_array(nr_out_syncobjs, sizeof(**post_deps),
> +	                           GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> +	if (!*post_deps) {
> +		ret = -ENOMEM;
> +		goto out_syncobjs;

return -ENOMEM works fine.

> +	}
> +
> +	for (i = 0; i < nr_out_syncobjs; ++i) {
> +		uint64_t address = out_syncobjs_addr + i * syncobj_stride;
> +
> +		if (copy_from_user(&syncobj_desc,
> +			           u64_to_user_ptr(address),
> +			           min(syncobj_stride, sizeof(syncobj_desc)))) {
> +			ret = -EFAULT;
> +			goto out_syncobjs;

You can break here.

> +		}
> +
> +		(*post_deps)[i].point = syncobj_desc.point;
> +		(*post_deps)[i].chain = NULL;
> +
> +		if (syncobj_desc.flags) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		if (syncobj_desc.point) {
> +			if (!drm_core_check_feature(dev,
> +			                            DRIVER_SYNCOBJ_TIMELINE)) {
> +				ret = -EOPNOTSUPP;
> +				break;
> +			}
> +
> +			(*post_deps)[i].chain =
> +				kmalloc(sizeof(*(*post_deps)[i].chain),
> +				        GFP_KERNEL);
> +			if (!(*post_deps)[i].chain) {
> +				ret = -ENOMEM;
> +				break;
> +			}
> +		}
> +
> +		(*post_deps)[i].syncobj =
> +			drm_syncobj_find(file, syncobj_desc.handle);
> +		if (!(*post_deps)[i].syncobj) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +	}
> +
> +	if (ret) {
> +		for (j = 0; j <= i; ++j) {

Same suggest as above, if you would prefer.

> +			kfree((*post_deps)[j].chain);
> +			if ((*post_deps)[j].syncobj)
> +				drm_syncobj_put((*post_deps)[j].syncobj);
> +		}
> +
> +		kfree(*post_deps);
> +		*post_deps = NULL;
> +	}
> +
> +out_syncobjs:
> +	return ret;

This might also be a good spot to return post_deps / ERR_PTR.

> +}
> +
> +static void msm_process_post_deps(struct msm_submit_post_dep *post_deps,
> +                                  uint32_t count, struct dma_fence *fence)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; post_deps && i < count; ++i) {
> +		if (post_deps[i].chain) {
> +			drm_syncobj_add_point(post_deps[i].syncobj,
> +			                      post_deps[i].chain,
> +			                      fence, post_deps[i].point);
> +			post_deps[i].chain = NULL;
> +		} else {
> +			drm_syncobj_replace_fence(post_deps[i].syncobj,
> +			                          fence);
> +		}
> +	}
> +}
> +
>  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  		struct drm_file *file)
>  {
> @@ -406,6 +593,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  	struct sync_file *sync_file = NULL;
>  	struct msm_gpu_submitqueue *queue;
>  	struct msm_ringbuffer *ring;
> +	struct msm_submit_post_dep *post_deps = NULL;
> +	struct drm_syncobj **syncobjs_to_reset = NULL;
>  	int out_fence_fd = -1;
>  	struct pid *pid = get_pid(task_pid(current));
>  	unsigned i;
> @@ -413,6 +602,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  	if (!gpu)
>  		return -ENXIO;
>  
> +	if (args->pad)
> +		return -EINVAL;
> +

>  	/* for now, we just have 3d pipe.. eventually this would need to
>  	 * be more clever to dispatch to appropriate gpu module:
>  	 */
> @@ -460,9 +652,28 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  			return ret;
>  	}
>  
> +	if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) {
> +		ret = msm_wait_deps(dev, file,
> +		                    args->in_syncobjs, args->nr_in_syncobjs,
> +		                    args->syncobj_stride, ring,

If you wanted to, you could just pass args directly to the function so you
didn't have to take it apart.

Also, Rob - do we want to do the trick where we return
sizeof(drm_msm_gem_submit_syncobj) if the input stride is zero?

> +		                    &syncobjs_to_reset);
> +		if (ret)
> +			goto out_post_unlock;
> +	}
> +
> +	if (args->flags & MSM_SUBMIT_SYNCOBJ_OUT) {
> +		ret = msm_parse_post_deps(dev, file,
> +		                          args->out_syncobjs,
> +		                          args->nr_out_syncobjs,
> +		                          args->syncobj_stride,

And same.

> +		                          &post_deps);
> +		if (ret)
> +			goto out_post_unlock;
> +	}
> +
>  	ret = mutex_lock_interruptible(&dev->struct_mutex);
>  	if (ret)
> -		return ret;
> +		goto out_post_unlock;
>  
>  	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
>  		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
> @@ -586,6 +797,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  		args->fence_fd = out_fence_fd;
>  	}
>  
> +	msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
> +	msm_process_post_deps(post_deps, args->nr_out_syncobjs,
> +	                      submit->fence);
> +
> +
>  out:
>  	submit_cleanup(submit);
>  	if (ret)
> @@ -594,5 +810,23 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  	if (ret && (out_fence_fd >= 0))
>  		put_unused_fd(out_fence_fd);
>  	mutex_unlock(&dev->struct_mutex);
> +
> +out_post_unlock:
> +	if (post_deps) {
> +		for (i = 0; i < args->nr_out_syncobjs; ++i) {
> +			kfree(post_deps[i].chain);
> +			drm_syncobj_put(post_deps[i].syncobj);
> +		}
> +		kfree(post_deps);
> +	}
> +
> +	if (syncobjs_to_reset) {
> +		for (i = 0; i < args->nr_in_syncobjs; ++i) {
> +			if (syncobjs_to_reset[i])
> +				drm_syncobj_put(syncobjs_to_reset[i]);
> +		}
> +		kfree(syncobjs_to_reset);
> +	}
> +
>  	return ret;
>  }
> diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> index 0b85ed6a3710..19806eb3a8e8 100644
> --- a/include/uapi/drm/msm_drm.h
> +++ b/include/uapi/drm/msm_drm.h
> @@ -217,13 +217,28 @@ struct drm_msm_gem_submit_bo {
>  #define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
>  #define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
>  #define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
> +#define MSM_SUBMIT_SYNCOBJ_IN    0x08000000 /* enable input syncobj */
> +#define MSM_SUBMIT_SYNCOBJ_OUT   0x04000000 /* enable output syncobj */
>  #define MSM_SUBMIT_FLAGS                ( \
>  		MSM_SUBMIT_NO_IMPLICIT   | \
>  		MSM_SUBMIT_FENCE_FD_IN   | \
>  		MSM_SUBMIT_FENCE_FD_OUT  | \
>  		MSM_SUBMIT_SUDO          | \
> +		MSM_SUBMIT_SYNCOBJ_IN    | \
> +		MSM_SUBMIT_SYNCOBJ_OUT   | \
>  		0)
>  
> +#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */
> +#define MSM_SUBMIT_SYNCOBJ_FLAGS        ( \
> +		MSM_SUBMIT_SYNCOBJ_RESET | \
> +		0)
> +
> +struct drm_msm_gem_submit_syncobj {
> +	__u32 handle;     /* in, syncobj handle. */
> +	__u32 flags;      /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */
> +	__u64 point;      /* in, timepoint for timeline syncobjs. */
> +};
> +
>  /* Each cmdstream submit consists of a table of buffers involved, and
>   * one or more cmdstream buffers.  This allows for conditional execution
>   * (context-restore), and IB buffers needed for per tile/bin draw cmds.
> @@ -236,7 +251,14 @@ struct drm_msm_gem_submit {
>  	__u64 bos;            /* in, ptr to array of submit_bo's */
>  	__u64 cmds;           /* in, ptr to array of submit_cmd's */
>  	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
> -	__u32 queueid;         /* in, submitqueue id */
> +	__u32 queueid;        /* in, submitqueue id */
> +	__u64 in_syncobjs;    /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> +	__u64 out_syncobjs;   /* in, ptr to to array of drm_msm_gem_submit_syncobj */
> +	__u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */
> +	__u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */
> +	__u32 syncobj_stride; /* in, stride of syncobj arrays. */
> +	__u32 pad;            /*in, reserved for future use, always 0. */
> +
>  };
>  
>  /* The normal way to synchronize with the GPU is just to CPU_PREP on
> -- 
> 2.24.1
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project