[Intel-gfx] [PATCH v5 06/10] drm/i915: add syncobj timeline support
Chris Wilson
chris at chris-wilson.co.uk
Thu Jun 27 09:15:49 UTC 2019
Quoting Lionel Landwerlin (2019-06-27 09:00:41)
> Introduces a new parameters to execbuf so that we can specify syncobj
> handles as well as timeline points.
>
> v2: Reuse i915_user_extension_fn
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 280 ++++++++++++++----
> drivers/gpu/drm/i915/i915_drv.c | 2 +
> include/uapi/drm/i915_drm.h | 37 +++
> 3 files changed, 263 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 1970dd8cbac3..476fade6fcab 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -213,6 +213,13 @@ enum {
> * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
> */
>
> +struct i915_drm_dma_fences {
i915_execbuffer_fences or eb_fences as it is private to the impl.
> + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> + struct dma_fence *dma_fence;
> + u64 value;
> + struct dma_fence_chain *chain_fence;
> +};
> +
> struct i915_execbuffer {
> struct drm_i915_private *i915; /** i915 backpointer */
> struct drm_file *file; /** per-file lookup tables and limits */
> @@ -275,6 +282,7 @@ struct i915_execbuffer {
>
> struct {
> u64 flags; /** Available extensions parameters */
> + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
Another 40b on stack. Getting closer to those warning bells being set off.
> } extensions;
> };
>
> @@ -2224,67 +2232,193 @@ eb_select_engine(struct i915_execbuffer *eb,
> }
>
> static void
> -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
> +__free_fence_array(struct i915_drm_dma_fences *fences, unsigned int n)
> {
> - while (n--)
> - drm_syncobj_put(ptr_mask_bits(fences[n], 2));
> + while (n--) {
> + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> + dma_fence_put(fences[n].dma_fence);
> + kfree(fences[n].chain_fence);
> + }
> kvfree(fences);
> }
>
> -static struct drm_syncobj **
> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> - struct drm_file *file)
> +static struct i915_drm_dma_fences *
> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> {
> - const unsigned long nfences = args->num_cliprects;
> + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> + &eb->extensions.timeline_fences;
> + struct drm_i915_gem_exec_fence __user *user_fences;
> + struct i915_drm_dma_fences *fences;
> + u64 __user *user_values;
> + unsigned long n;
> + int err;
> +
> + *out_n_fences = timeline_fences->handle_count;
I'd encourage using a const num_fences = timeline_fences->handle_count
for readability within the function and then assigning
*out_n_fences = num_fences;
right before the successful return fences;
> + /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> + if (*out_n_fences > min_t(unsigned long,
> + ULONG_MAX / sizeof(*user_fences),
> + SIZE_MAX / sizeof(*fences)))
> + return ERR_PTR(-EINVAL);
> +
> + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> + if (!access_ok(user_fences, *out_n_fences * sizeof(*user_fences)))
> + return ERR_PTR(-EFAULT);
> +
> + user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> + if (!access_ok(user_values, *out_n_fences * sizeof(*user_values)))
> + return ERR_PTR(-EFAULT);
> + fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
> + __GFP_NOWARN | GFP_KERNEL);
> + if (!fences)
> + return ERR_PTR(-ENOMEM);
> +
> + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> + for (n = 0; n < *out_n_fences; n++) {
> + struct drm_i915_gem_exec_fence user_fence;
> + struct drm_syncobj *syncobj;
> + struct dma_fence *fence = NULL;
> + u64 point;
> +
> + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> + err = -EFAULT;
> + goto err;
> + }
> +
> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> + err = -EINVAL;
> + goto err;
> + }
> +
> + if (__get_user(point, user_values++)) {
> + err = -EFAULT;
> + goto err;
> + }
> +
> + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> + if (!syncobj) {
> + DRM_DEBUG("Invalid syncobj handle provided\n");
> + err = -EINVAL;
> + goto err;
> + }
> +
> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> + fence = drm_syncobj_fence_get(syncobj);
> + if (!fence) {
> + DRM_DEBUG("Syncobj handle has no fence\n");
> + drm_syncobj_put(syncobj);
> + err = -EINVAL;
> + goto err;
> + }
> +
> + err = dma_fence_chain_find_seqno(&fence, point);
Is an imported syncobj always a fence-chain?
drm_syncobj_import_sync_file_fence() says no.
Similarly, we put ordinary fences into the syncobj ourselves if
point==0, or the user uses the old path.
I think you need something like
if (dma_fence_is_chain() || point) {
err = dma_fence_chain_find_seqno(&fence, point);
if (err) {
}
}
> + if (err) {
> + DRM_DEBUG("Syncobj handle missing requested point\n");
> + goto err;
> + }
> + }
> +
> + /*
> + * For timeline syncobjs we need to create a chain.
"need to preallocate chains for later signaling
> + */
> + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> + fences[n].chain_fence =
> + kmalloc(sizeof(*fences[n].chain_fence),
> + GFP_KERNEL);
> + if (!fences[n].chain_fence) {
> + dma_fence_put(fence);
> + drm_syncobj_put(syncobj);
> + err = -ENOMEM;
> + DRM_DEBUG("Unable to alloc chain_fence\n");
> + goto err;
> + }
> + } else {
> + fences[n].chain_fence = NULL;
> + }
> +
> + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> + fences[n].dma_fence = fence;
> + fences[n].value = point;
> + }
> +
> + return fences;
> +
> +err:
> + __free_fence_array(fences, n);
> + return ERR_PTR(err);
> +}
> +
> +static struct i915_drm_dma_fences *
> +get_legacy_fence_array(struct i915_execbuffer *eb,
> + int *out_n_fences)
> +{
> + struct drm_i915_gem_execbuffer2 *args = eb->args;
> struct drm_i915_gem_exec_fence __user *user;
> - struct drm_syncobj **fences;
> + struct i915_drm_dma_fences *fences;
> unsigned long n;
> int err;
>
> - if (!(args->flags & I915_EXEC_FENCE_ARRAY))
> - return NULL;
> + *out_n_fences = args->num_cliprects;
Same suggestion as earlier for a local num_fences.
> +static struct i915_drm_dma_fences *
> +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> + return get_legacy_fence_array(eb, out_n_fences);
Blank line.
> + if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> + return get_timeline_fence_array(eb, out_n_fences);
> +
> + *out_n_fences = 0;
> +
No blank line, my argument is that both are part of the return value for
the function.
> + return NULL;
> +}
> +
> static void
> -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
> - struct drm_syncobj **fences)
> +put_fence_array(struct i915_drm_dma_fences *fences, int nfences)
> {
> if (fences)
> - __free_fence_array(fences, args->num_cliprects);
> + __free_fence_array(fences, nfences);
> }
>
> static int
> await_fence_array(struct i915_execbuffer *eb,
> - struct drm_syncobj **fences)
> + struct i915_drm_dma_fences *fences,
> + int nfences)
> {
> - const unsigned int nfences = eb->args->num_cliprects;
> unsigned int n;
> int err;
>
> for (n = 0; n < nfences; n++) {
> struct drm_syncobj *syncobj;
> - struct dma_fence *fence;
> unsigned int flags;
>
> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> if (!(flags & I915_EXEC_FENCE_WAIT))
> continue;
>
> - fence = drm_syncobj_fence_get(syncobj);
> - if (!fence)
> - return -EINVAL;
> -
> - err = i915_request_await_dma_fence(eb->request, fence);
> - dma_fence_put(fence);
> + err = i915_request_await_dma_fence(eb->request,
> + fences[n].dma_fence);
> if (err < 0)
> return err;
> }
> @@ -2334,9 +2475,9 @@ await_fence_array(struct i915_execbuffer *eb,
>
> static void
> signal_fence_array(struct i915_execbuffer *eb,
> - struct drm_syncobj **fences)
> + struct i915_drm_dma_fences *fences,
> + int nfences)
> {
> - const unsigned int nfences = eb->args->num_cliprects;
> struct dma_fence * const fence = &eb->request->fence;
> unsigned int n;
>
> @@ -2344,15 +2485,43 @@ signal_fence_array(struct i915_execbuffer *eb,
> struct drm_syncobj *syncobj;
> unsigned int flags;
>
> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> if (!(flags & I915_EXEC_FENCE_SIGNAL))
> continue;
>
> - drm_syncobj_replace_fence(syncobj, fence);
> + if (fences[n].chain_fence) {
> + drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> + fence, fences[n].value);
> + /*
> + * The chain's ownership is transfered to the
> + * timeline.
> + */
> + fences[n].chain_fence = NULL;
> + } else {
> + drm_syncobj_replace_fence(syncobj, fence);
> + }
> }
> }
>
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> + struct i915_execbuffer *eb = data;
> +
> + /* Timeline fences are incompatible with the fence array flag. */
> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> + return -EINVAL;
Also you only allow one timeline_fence extension struct.
> +
> + if (copy_from_user(&eb->extensions.timeline_fences, ext,
> + sizeof(eb->extensions.timeline_fences)))
> + return -EFAULT;
> +
> + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> +
> + return 0;
> +}
> +
> static const i915_user_extension_fn execbuf_extensions[] = {
> + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> };
>
> static int
> @@ -2372,14 +2541,15 @@ static int
> i915_gem_do_execbuffer(struct drm_device *dev,
> struct drm_file *file,
> struct drm_i915_gem_execbuffer2 *args,
> - struct drm_i915_gem_exec_object2 *exec,
> - struct drm_syncobj **fences)
> + struct drm_i915_gem_exec_object2 *exec)
> {
> struct i915_execbuffer eb;
> struct dma_fence *in_fence = NULL;
> struct dma_fence *exec_fence = NULL;
> struct sync_file *out_fence = NULL;
> + struct i915_drm_dma_fences *fences = NULL;
> int out_fence_fd = -1;
> + int nfences = 0;
> int err;
>
> BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
> @@ -2421,10 +2591,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> return err;
> }
>
> + fences = get_fence_array(&eb, &nfences);
> + if (IS_ERR(fences))
> + return PTR_ERR(fences);
> +
> if (args->flags & I915_EXEC_FENCE_IN) {
> in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> - if (!in_fence)
> - return -EINVAL;
> + if (!in_fence) {
> + err = -EINVAL;
> + goto err_fences;
> + }
> }
>
> if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> @@ -2582,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> }
>
> if (fences) {
> - err = await_fence_array(&eb, fences);
> + err = await_fence_array(&eb, fences, nfences);
> if (err)
> goto err_request;
> }
> @@ -2611,7 +2787,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> i915_request_add(eb.request);
>
> if (fences)
> - signal_fence_array(&eb, fences);
> + signal_fence_array(&eb, fences, nfences);
>
> if (out_fence) {
> if (err == 0) {
> @@ -2646,6 +2822,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> dma_fence_put(exec_fence);
> err_in_fence:
> dma_fence_put(in_fence);
> +err_fences:
> + put_fence_array(fences, nfences);
> return err;
> }
>
> @@ -2739,7 +2917,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
> exec2_list[i].flags = 0;
> }
>
> - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
> + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
> if (exec2.flags & __EXEC_HAS_RELOC) {
> struct drm_i915_gem_exec_object __user *user_exec_list =
> u64_to_user_ptr(args->buffers_ptr);
> @@ -2770,7 +2948,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> {
> struct drm_i915_gem_execbuffer2 *args = data;
> struct drm_i915_gem_exec_object2 *exec2_list;
> - struct drm_syncobj **fences = NULL;
> const size_t count = args->buffer_count;
> int err;
>
> @@ -2798,15 +2975,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> return -EFAULT;
> }
>
> - if (args->flags & I915_EXEC_FENCE_ARRAY) {
> - fences = get_fence_array(args, file);
> - if (IS_ERR(fences)) {
> - kvfree(exec2_list);
> - return PTR_ERR(fences);
> - }
> - }
> -
> - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
> + err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
>
> /*
> * Now that we have begun execution of the batchbuffer, we ignore
> @@ -2846,7 +3015,6 @@ end:;
> }
>
> args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
> - put_fence_array(args, fences);
> kvfree(exec2_list);
> return err;
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 7c3c3b8ab339..48f9009a6318 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -456,6 +456,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_EXEC_BATCH_FIRST:
> case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
> /* For the time being all of these are always true;
> * if some supported hardware does not have one of these
> * features this value needs to be provided from
> @@ -3220,6 +3221,7 @@ static struct drm_driver driver = {
> .driver_features =
> DRIVER_GEM |
> DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> + DRIVER_SYNCOBJ_TIMELINE,
With a comma ? i.e. .ioctls = DRIVER_SYNCOBJ_TIMELINE
> .release = i915_driver_release,
> .open = i915_driver_open,
> .lastclose = i915_driver_lastclose,
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index efa195d6994e..b7fe1915e34d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -617,6 +617,12 @@ typedef struct drm_i915_irq_wait {
> */
> #define I915_PARAM_PERF_REVISION 54
>
> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> + * I915_EXEC_EXT.
> + */
> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> +
> /* Must be kept compact -- no holes and well documented */
>
> typedef struct drm_i915_getparam {
> @@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence {
> };
>
> enum drm_i915_gem_execbuffer_ext {
> + /**
> + * This identifier is associated with
> + * drm_i915_gem_execbuf_ext_timeline_fences.
Or just "See drm_i915_gem_execbuf_ext_timeline_fences
> + */
> + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
= 0
don't leave it up to the compiler.
> +
> DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
> };
>
> +/**
> + * This structure describes an array of drm_syncobj and associated points for
> + * timeline variants of drm_syncobj. It is invalid to append this structure to
> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> + */
> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> + struct i915_user_extension base;
> +
> + /**
> + * Number of element in the handles_ptr & value_ptr arrays.
> + */
> + __u64 handle_count;
Since it is used for both, I would use something like fence_count, or
num_fences (although _count is in the majority).
> + /**
> + * Pointer to an array of struct drm_i915_gem_exec_fence of size handle_count.
s/size/length/
I think we typically use size for byte sizes around the uAPI.
> + */
> + __u64 handles_ptr;
> +
> + /**
> + * Pointer to an array of u64 values of size handle_count. Values must
> + * be 0 for a binary drm_syncobj.
> + */
> + __u64 values_ptr;
> +};
> +
> struct drm_i915_gem_execbuffer2 {
> /**
> * List of gem_exec_object2 structs
> --
> 2.21.0.392.gf8f6787159e
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list