[Intel-gfx] [PATCH v5 06/10] drm/i915: add syncobj timeline support

Lionel Landwerlin lionel.g.landwerlin at intel.com
Thu Jun 27 10:49:56 UTC 2019


Thanks a lot for your comments.

On 27/06/2019 12:15, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-06-27 09:00:41)
>> Introduces a new parameters to execbuf so that we can specify syncobj
>> handles as well as timeline points.
>>
>> v2: Reuse i915_user_extension_fn
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 280 ++++++++++++++----
>>   drivers/gpu/drm/i915/i915_drv.c               |   2 +
>>   include/uapi/drm/i915_drm.h                   |  37 +++
>>   3 files changed, 263 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 1970dd8cbac3..476fade6fcab 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -213,6 +213,13 @@ enum {
>>    * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
>>    */
>>   
>> +struct i915_drm_dma_fences {
> i915_execbuffer_fences or eb_fences as it is private to the impl.


Sure.


>
>> +       struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
>> +       struct dma_fence *dma_fence;
>> +       u64 value;
>> +       struct dma_fence_chain *chain_fence;
>> +};
>> +
>>   struct i915_execbuffer {
>>          struct drm_i915_private *i915; /** i915 backpointer */
>>          struct drm_file *file; /** per-file lookup tables and limits */
>> @@ -275,6 +282,7 @@ struct i915_execbuffer {
>>   
>>          struct {
>>                  u64 flags; /** Available extensions parameters */
>> +               struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> Another 40b on stack. Getting closer to those warning bells being set off.
>
>>          } extensions;
>>   };
>>   
>> @@ -2224,67 +2232,193 @@ eb_select_engine(struct i915_execbuffer *eb,
>>   }
>>   
>>   static void
>> -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
>> +__free_fence_array(struct i915_drm_dma_fences *fences, unsigned int n)
>>   {
>> -       while (n--)
>> -               drm_syncobj_put(ptr_mask_bits(fences[n], 2));
>> +       while (n--) {
>> +               drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
>> +               dma_fence_put(fences[n].dma_fence);
>> +               kfree(fences[n].chain_fence);
>> +       }
>>          kvfree(fences);
>>   }
>>   
>> -static struct drm_syncobj **
>> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>> -               struct drm_file *file)
>> +static struct i915_drm_dma_fences *
>> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
>>   {
>> -       const unsigned long nfences = args->num_cliprects;
>> +       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
>> +               &eb->extensions.timeline_fences;
>> +       struct drm_i915_gem_exec_fence __user *user_fences;
>> +       struct i915_drm_dma_fences *fences;
>> +       u64 __user *user_values;
>> +       unsigned long n;
>> +       int err;
>> +
>> +       *out_n_fences = timeline_fences->handle_count;
> I'd encourage using a const num_fences = timeline_fences->handle_count
> for readability within the function and then assigning
> 	*out_n_fences = num_fences;
> right before the successful return fences;


Done.


>
>> +       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
>> +       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
>> +       if (*out_n_fences > min_t(unsigned long,
>> +                                 ULONG_MAX / sizeof(*user_fences),
>> +                                 SIZE_MAX / sizeof(*fences)))
>> +               return ERR_PTR(-EINVAL);
>> +
>> +       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
>> +       if (!access_ok(user_fences, *out_n_fences * sizeof(*user_fences)))
>> +               return ERR_PTR(-EFAULT);
>> +
>> +       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
>> +       if (!access_ok(user_values, *out_n_fences * sizeof(*user_values)))
>> +               return ERR_PTR(-EFAULT);
>> +       fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
>> +                               __GFP_NOWARN | GFP_KERNEL);
>> +       if (!fences)
>> +               return ERR_PTR(-ENOMEM);
>> +
>> +       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>> +                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>> +
>> +       for (n = 0; n < *out_n_fences; n++) {
>> +               struct drm_i915_gem_exec_fence user_fence;
>> +               struct drm_syncobj *syncobj;
>> +               struct dma_fence *fence = NULL;
>> +               u64 point;
>> +
>> +               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
>> +                       err = -EFAULT;
>> +                       goto err;
>> +               }
>> +
>> +               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
>> +                       err = -EINVAL;
>> +                       goto err;
>> +               }
>> +
>> +               if (__get_user(point, user_values++)) {
>> +                       err = -EFAULT;
>> +                       goto err;
>> +               }
>> +
>> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>> +               if (!syncobj) {
>> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
>> +                       err = -EINVAL;
>> +                       goto err;
>> +               }
>> +
>> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>> +                       fence = drm_syncobj_fence_get(syncobj);
>> +                       if (!fence) {
>> +                               DRM_DEBUG("Syncobj handle has no fence\n");
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -EINVAL;
>> +                               goto err;
>> +                       }
>> +
>> +                       err = dma_fence_chain_find_seqno(&fence, point);
> Is an imported syncobj always a fence-chain?
>
> drm_syncobj_import_sync_file_fence() says no.


drm_syncobj_import_sync_file_fence() would turn a timeline semaphore 
into a binary semaphore by breaking the chain.

drm_syncobj_transfer_to_timeline() is what you should use if you wish to 
import a fence into the timeline.


>
> Similarly, we put ordinary fences into the syncobj ourselves if
> point==0, or the user uses the old path.
>
> I think you need something like
>
> if (dma_fence_is_chain() || point) {
> 	err = dma_fence_chain_find_seqno(&fence, point);
> 	if (err) {
> 	}
> }


dma_fence_chain_find_seqno() already deals with point == 0.
If the fence is not a dma_fence_chain then the application screwed up somewhere.


>
>> +                       if (err) {
>> +                               DRM_DEBUG("Syncobj handle missing requested point\n");
>> +                               goto err;
>> +                       }
>> +               }
>> +
>> +               /*
>> +                * For timeline syncobjs we need to create a chain.
> "need to preallocate chains for later signaling


Sure.


>
>> +                */
>> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
>> +                       fences[n].chain_fence =
>> +                               kmalloc(sizeof(*fences[n].chain_fence),
>> +                                       GFP_KERNEL);
>> +                       if (!fences[n].chain_fence) {
>> +                               dma_fence_put(fence);
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -ENOMEM;
>> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
>> +                               goto err;
>> +                       }
>> +               } else {
>> +                       fences[n].chain_fence = NULL;
>> +               }
>> +
>> +               fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
>> +               fences[n].dma_fence = fence;
>> +               fences[n].value = point;
>> +       }
>> +
>> +       return fences;
>> +
>> +err:
>> +       __free_fence_array(fences, n);
>> +       return ERR_PTR(err);
>> +}
>> +
>> +static struct i915_drm_dma_fences *
>> +get_legacy_fence_array(struct i915_execbuffer *eb,
>> +                      int *out_n_fences)
>> +{
>> +       struct drm_i915_gem_execbuffer2 *args = eb->args;
>>          struct drm_i915_gem_exec_fence __user *user;
>> -       struct drm_syncobj **fences;
>> +       struct i915_drm_dma_fences *fences;
>>          unsigned long n;
>>          int err;
>>   
>> -       if (!(args->flags & I915_EXEC_FENCE_ARRAY))
>> -               return NULL;
>> +       *out_n_fences = args->num_cliprects;
> Same suggestion as earlier for a local num_fences.


Done.


>
>> +static struct i915_drm_dma_fences *
>> +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
>> +{
>> +       if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
>> +               return get_legacy_fence_array(eb, out_n_fences);
> Blank line.


Oops.


>
>> +       if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
>> +               return get_timeline_fence_array(eb, out_n_fences);
>> +
>> +       *out_n_fences = 0;
>> +
> No blank line, my argument is that both are part of the return value for
> the function.


Okay.


>> +       return NULL;
>> +}
>> +
>>   static void
>> -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
>> -               struct drm_syncobj **fences)
>> +put_fence_array(struct i915_drm_dma_fences *fences, int nfences)
>>   {
>>          if (fences)
>> -               __free_fence_array(fences, args->num_cliprects);
>> +               __free_fence_array(fences, nfences);
>>   }
>>   
>>   static int
>>   await_fence_array(struct i915_execbuffer *eb,
>> -                 struct drm_syncobj **fences)
>> +                 struct i915_drm_dma_fences *fences,
>> +                 int nfences)
>>   {
>> -       const unsigned int nfences = eb->args->num_cliprects;
>>          unsigned int n;
>>          int err;
>>   
>>          for (n = 0; n < nfences; n++) {
>>                  struct drm_syncobj *syncobj;
>> -               struct dma_fence *fence;
>>                  unsigned int flags;
>>   
>> -               syncobj = ptr_unpack_bits(fences[n], &flags, 2);
>> +               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>>                  if (!(flags & I915_EXEC_FENCE_WAIT))
>>                          continue;
>>   
>> -               fence = drm_syncobj_fence_get(syncobj);
>> -               if (!fence)
>> -                       return -EINVAL;
>> -
>> -               err = i915_request_await_dma_fence(eb->request, fence);
>> -               dma_fence_put(fence);
>> +               err = i915_request_await_dma_fence(eb->request,
>> +                                                  fences[n].dma_fence);
>>                  if (err < 0)
>>                          return err;
>>          }
>> @@ -2334,9 +2475,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>   
>>   static void
>>   signal_fence_array(struct i915_execbuffer *eb,
>> -                  struct drm_syncobj **fences)
>> +                  struct i915_drm_dma_fences *fences,
>> +                  int nfences)
>>   {
>> -       const unsigned int nfences = eb->args->num_cliprects;
>>          struct dma_fence * const fence = &eb->request->fence;
>>          unsigned int n;
>>   
>> @@ -2344,15 +2485,43 @@ signal_fence_array(struct i915_execbuffer *eb,
>>                  struct drm_syncobj *syncobj;
>>                  unsigned int flags;
>>   
>> -               syncobj = ptr_unpack_bits(fences[n], &flags, 2);
>> +               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>>                  if (!(flags & I915_EXEC_FENCE_SIGNAL))
>>                          continue;
>>   
>> -               drm_syncobj_replace_fence(syncobj, fence);
>> +               if (fences[n].chain_fence) {
>> +                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
>> +                                             fence, fences[n].value);
>> +                       /*
>> +                        * The chain's ownership is transfered to the
>> +                        * timeline.
>> +                        */
>> +                       fences[n].chain_fence = NULL;
>> +               } else {
>> +                       drm_syncobj_replace_fence(syncobj, fence);
>> +               }
>>          }
>>   }
>>   
>> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
>> +{
>> +       struct i915_execbuffer *eb = data;
>> +
>> +       /* Timeline fences are incompatible with the fence array flag. */
>> +       if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
>> +               return -EINVAL;
> Also you only allow one timeline_fence extension struct.


Well spotted, thanks.


>
>> +
>> +       if (copy_from_user(&eb->extensions.timeline_fences, ext,
>> +                          sizeof(eb->extensions.timeline_fences)))
>> +               return -EFAULT;
>> +
>> +       eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
>> +
>> +       return 0;
>> +}
>> +
>>   static const i915_user_extension_fn execbuf_extensions[] = {
>> +        [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>>   };
>>   
>>   static int
>> @@ -2372,14 +2541,15 @@ static int
>>   i915_gem_do_execbuffer(struct drm_device *dev,
>>                         struct drm_file *file,
>>                         struct drm_i915_gem_execbuffer2 *args,
>> -                      struct drm_i915_gem_exec_object2 *exec,
>> -                      struct drm_syncobj **fences)
>> +                      struct drm_i915_gem_exec_object2 *exec)
>>   {
>>          struct i915_execbuffer eb;
>>          struct dma_fence *in_fence = NULL;
>>          struct dma_fence *exec_fence = NULL;
>>          struct sync_file *out_fence = NULL;
>> +       struct i915_drm_dma_fences *fences = NULL;
>>          int out_fence_fd = -1;
>> +       int nfences = 0;
>>          int err;
>>   
>>          BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
>> @@ -2421,10 +2591,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>                          return err;
>>          }
>>   
>> +       fences = get_fence_array(&eb, &nfences);
>> +       if (IS_ERR(fences))
>> +               return PTR_ERR(fences);
>> +
>>          if (args->flags & I915_EXEC_FENCE_IN) {
>>                  in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
>> -               if (!in_fence)
>> -                       return -EINVAL;
>> +               if (!in_fence) {
>> +                       err = -EINVAL;
>> +                       goto err_fences;
>> +               }
>>          }
>>   
>>          if (args->flags & I915_EXEC_FENCE_SUBMIT) {
>> @@ -2582,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>          }
>>   
>>          if (fences) {
>> -               err = await_fence_array(&eb, fences);
>> +               err = await_fence_array(&eb, fences, nfences);
>>                  if (err)
>>                          goto err_request;
>>          }
>> @@ -2611,7 +2787,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>          i915_request_add(eb.request);
>>   
>>          if (fences)
>> -               signal_fence_array(&eb, fences);
>> +               signal_fence_array(&eb, fences, nfences);
>>   
>>          if (out_fence) {
>>                  if (err == 0) {
>> @@ -2646,6 +2822,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>          dma_fence_put(exec_fence);
>>   err_in_fence:
>>          dma_fence_put(in_fence);
>> +err_fences:
>> +       put_fence_array(fences, nfences);
>>          return err;
>>   }
>>   
>> @@ -2739,7 +2917,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
>>                          exec2_list[i].flags = 0;
>>          }
>>   
>> -       err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
>> +       err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
>>          if (exec2.flags & __EXEC_HAS_RELOC) {
>>                  struct drm_i915_gem_exec_object __user *user_exec_list =
>>                          u64_to_user_ptr(args->buffers_ptr);
>> @@ -2770,7 +2948,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>>   {
>>          struct drm_i915_gem_execbuffer2 *args = data;
>>          struct drm_i915_gem_exec_object2 *exec2_list;
>> -       struct drm_syncobj **fences = NULL;
>>          const size_t count = args->buffer_count;
>>          int err;
>>   
>> @@ -2798,15 +2975,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>>                  return -EFAULT;
>>          }
>>   
>> -       if (args->flags & I915_EXEC_FENCE_ARRAY) {
>> -               fences = get_fence_array(args, file);
>> -               if (IS_ERR(fences)) {
>> -                       kvfree(exec2_list);
>> -                       return PTR_ERR(fences);
>> -               }
>> -       }
>> -
>> -       err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
>> +       err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
>>   
>>          /*
>>           * Now that we have begun execution of the batchbuffer, we ignore
>> @@ -2846,7 +3015,6 @@ end:;
>>          }
>>   
>>          args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
>> -       put_fence_array(args, fences);
>>          kvfree(exec2_list);
>>          return err;
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>> index 7c3c3b8ab339..48f9009a6318 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -456,6 +456,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>>          case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>>          case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>>          case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
>> +       case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
>>                  /* For the time being all of these are always true;
>>                   * if some supported hardware does not have one of these
>>                   * features this value needs to be provided from
>> @@ -3220,6 +3221,7 @@ static struct drm_driver driver = {
>>          .driver_features =
>>              DRIVER_GEM |
>>              DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
>> +           DRIVER_SYNCOBJ_TIMELINE,
> With a comma ? i.e. .ioctls = DRIVER_SYNCOBJ_TIMELINE


Waaaat. How am I running a this and it works....?


>
>>          .release = i915_driver_release,
>>          .open = i915_driver_open,
>>          .lastclose = i915_driver_lastclose,
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index efa195d6994e..b7fe1915e34d 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -617,6 +617,12 @@ typedef struct drm_i915_irq_wait {
>>    */
>>   #define I915_PARAM_PERF_REVISION       54
>>   
>> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
>> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
>> + * I915_EXEC_EXT.
>> + */
>> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
>> +
>>   /* Must be kept compact -- no holes and well documented */
>>   
>>   typedef struct drm_i915_getparam {
>> @@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence {
>>   };
>>   
>>   enum drm_i915_gem_execbuffer_ext {
>> +       /**
>> +        * This identifier is associated with
>> +        * drm_i915_gem_execbuf_ext_timeline_fences.
> Or just "See drm_i915_gem_execbuf_ext_timeline_fences
>> +        */
>> +       DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
>   = 0
> don't leave it up to the compiler.


Should we set every single item in the enum or just the first one?


>> +
>>          DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>>   };
>>   
>> +/**
>> + * This structure describes an array of drm_syncobj and associated points for
>> + * timeline variants of drm_syncobj. It is invalid to append this structure to
>> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
>> + */
>> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
>> +       struct i915_user_extension base;
>> +
>> +       /**
>> +        * Number of element in the handles_ptr & value_ptr arrays.
>> +        */
>> +       __u64 handle_count;
> Since it is used for both, I would use something like fence_count, or
> num_fences (although _count is in the majority).


Done.


>
>> +       /**
>> +        * Pointer to an array of struct drm_i915_gem_exec_fence of size handle_count.
> s/size/length/
>
> I think we typically use size for byte sizes around the uAPI.


Sure.


>
>> +        */
>> +       __u64 handles_ptr;
>> +
>> +       /**
>> +        * Pointer to an array of u64 values of size handle_count. Values must
>> +        * be 0 for a binary drm_syncobj.
>> +        */
>> +       __u64 values_ptr;
>> +};
>> +
>>   struct drm_i915_gem_execbuffer2 {
>>          /**
>>           * List of gem_exec_object2 structs
>> -- 
>> 2.21.0.392.gf8f6787159e
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx




More information about the Intel-gfx mailing list