[Intel-gfx] [PATCH v3 2/2] drm/i915: add syncobj timeline support
Chris Wilson
chris at chris-wilson.co.uk
Thu Aug 1 15:16:58 UTC 2019
Quoting Lionel Landwerlin (2019-08-01 15:29:34)
> On 31/07/2019 23:03, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2019-07-31 15:07:33)
> >> -static struct drm_syncobj **
> >> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> >> - struct drm_file *file)
> >> +static struct i915_eb_fences *
> >> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> >> +{
> >> + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> >> + &eb->extensions.timeline_fences;
> >> + struct drm_i915_gem_exec_fence __user *user_fences;
> >> + struct i915_eb_fences *fences;
> >> + u64 __user *user_values;
> >> + u64 num_fences, num_user_fences = timeline_fences->fence_count;
> >> + unsigned long n;
> >> + int err;
> >> +
> >> + /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> >> + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> >> + if (num_user_fences > min_t(unsigned long,
> >> + ULONG_MAX / sizeof(*user_fences),
> >> + SIZE_MAX / sizeof(*fences)))
> >> + return ERR_PTR(-EINVAL);
> >> +
> >> + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> >> + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> >> + return ERR_PTR(-EFAULT);
> >> +
> >> + user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> >> + if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> >> + return ERR_PTR(-EFAULT);
> >> +
> >> + fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> >> + __GFP_NOWARN | GFP_KERNEL);
> >> + if (!fences)
> >> + return ERR_PTR(-ENOMEM);
> >> +
> >> + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> >> + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> >> +
> >> + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> >> + struct drm_i915_gem_exec_fence user_fence;
> >> + struct drm_syncobj *syncobj;
> >> + struct dma_fence *fence = NULL;
> >> + u64 point;
> >> +
> >> + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> >> + err = -EFAULT;
> >> + goto err;
> >> + }
> >> +
> >> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> >> + err = -EINVAL;
> >> + goto err;
> >> + }
> >> +
> >> + if (__get_user(point, user_values++)) {
> >> + err = -EFAULT;
> >> + goto err;
> >> + }
> >> +
> >> + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> >> + if (!syncobj) {
> >> + DRM_DEBUG("Invalid syncobj handle provided\n");
> >> + err = -ENOENT;
> >> + goto err;
> >> + }
> >> +
> >> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> >> + fence = drm_syncobj_fence_get(syncobj);
> >> + if (!fence) {
> >> + DRM_DEBUG("Syncobj handle has no fence\n");
> >> + drm_syncobj_put(syncobj);
> >> + err = -EINVAL;
> >> + goto err;
> >> + }
> >> +
> >> + err = dma_fence_chain_find_seqno(&fence, point);
> >> + if (err) {
> >> + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> >> + drm_syncobj_put(syncobj);
> >> + goto err;
> >> + }
> >> +
> >> + /* A point might have been signaled already and
> >> + * garbage collected from the timeline. In this case
> >> + * just ignore the point and carry on.
> >> + */
> >> + if (!fence) {
> >> + drm_syncobj_put(syncobj);
> >> + continue;
> >> + }
> >> + }
> >> +
> >> + /*
> >> + * For timeline syncobjs we need to preallocate chains for
> >> + * later signaling.
> >> + */
> >> + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> > if (dma_fence_chain_find_seqno() == 0)
> > return -EINVAL;
> >
> > as an early sanity check?
> >
> >> + fences[num_fences].chain_fence =
> >> + kmalloc(sizeof(*fences[num_fences].chain_fence),
> >> + GFP_KERNEL);
> >> + if (!fences[num_fences].chain_fence) {
> >> + dma_fence_put(fence);
> >> + drm_syncobj_put(syncobj);
> >> + err = -ENOMEM;
> >> + DRM_DEBUG("Unable to alloc chain_fence\n");
> >> + goto err;
> >> + }
> >> + } else {
> >> + fences[num_fences].chain_fence = NULL;
> >> + }
> >> +
> >> + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> >> + fences[num_fences].dma_fence = fence;
> >> + fences[num_fences].value = point;
> >> + num_fences++;
> >> + }
> >> +
> >> + *out_n_fences = num_fences;
> >> +
> >> + return fences;
> >> +
> >> +err:
> >> + __free_fence_array(fences, num_fences);
> >> + return ERR_PTR(err);
> >> +}
> >> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
> >>
> >> static void
> >> signal_fence_array(struct i915_execbuffer *eb,
> >> - struct drm_syncobj **fences)
> >> + struct i915_eb_fences *fences,
> >> + int nfences)
> >> {
> >> - const unsigned int nfences = eb->args->num_cliprects;
> >> struct dma_fence * const fence = &eb->request->fence;
> >> unsigned int n;
> >>
> >> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
> >> struct drm_syncobj *syncobj;
> >> unsigned int flags;
> >>
> >> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> >> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> >> if (!(flags & I915_EXEC_FENCE_SIGNAL))
> >> continue;
> >>
> >> - drm_syncobj_replace_fence(syncobj, fence);
> >> + if (fences[n].chain_fence) {
> >> + drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> >> + fence, fences[n].value);
> >> + /*
> >> + * The chain's ownership is transfered to the
> >> + * timeline.
> >> + */
> >> + fences[n].chain_fence = NULL;
> >> + } else {
> >> + drm_syncobj_replace_fence(syncobj, fence);
> >> + }
> >> }
> >> }
> > I think I have convinced myself that with the split between wait before,
> > signal after combined with the rule that seqno point along the syncobj
> > are monotonic, you should not be able to generate an AB-BA deadlock
> > between concurrent clients.
>
>
> Can you come up with an example that would deadlock?
Timeline holds 2,1; a wait on 2 will fail with -EINVAL to userspace.
(Though possibly perfectly valid behaviour on the part of the user.)
Timeline holds 2, with 1 being submitted. A wait on 1 waits on 2
instead. If 1 gains a dependency on A (e.g. bad userspace or an
implicit fence, it's a concurrent wait/submit so expect the worst, i.e.
userspace has to be racing with itself to get into this mess), you
now have a deadlock. (The assumption being that the syncpt along the
timeline are themselves not strictly ordered, and considering they are
external syncobj, that seems like a reasonable generalisation.)
-Chris
More information about the Intel-gfx
mailing list