[Intel-gfx] [PATCH v3 2/2] drm/i915: add syncobj timeline support
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Aug 1 07:43:24 UTC 2019
On 31/07/2019 23:03, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
>> -static struct drm_syncobj **
>> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>> - struct drm_file *file)
>> +static struct i915_eb_fences *
>> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
>> +{
>> + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
>> + &eb->extensions.timeline_fences;
>> + struct drm_i915_gem_exec_fence __user *user_fences;
>> + struct i915_eb_fences *fences;
>> + u64 __user *user_values;
>> + u64 num_fences, num_user_fences = timeline_fences->fence_count;
>> + unsigned long n;
>> + int err;
>> +
>> + /* Check multiplication overflow for access_ok() and kvmalloc_array() */
>> + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
>> + if (num_user_fences > min_t(unsigned long,
>> + ULONG_MAX / sizeof(*user_fences),
>> + SIZE_MAX / sizeof(*fences)))
>> + return ERR_PTR(-EINVAL);
>> +
>> + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
>> + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
>> + return ERR_PTR(-EFAULT);
>> +
>> + user_values = u64_to_user_ptr(timeline_fences->values_ptr);
>> + if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
>> + return ERR_PTR(-EFAULT);
>> +
>> + fences = kvmalloc_array(num_user_fences, sizeof(*fences),
>> + __GFP_NOWARN | GFP_KERNEL);
>> + if (!fences)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>> + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>> +
>> + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
>> + struct drm_i915_gem_exec_fence user_fence;
>> + struct drm_syncobj *syncobj;
>> + struct dma_fence *fence = NULL;
>> + u64 point;
>> +
>> + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
>> + err = -EFAULT;
>> + goto err;
>> + }
>> +
>> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
>> + err = -EINVAL;
>> + goto err;
>> + }
>> +
>> + if (__get_user(point, user_values++)) {
>> + err = -EFAULT;
>> + goto err;
>> + }
>> +
>> + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>> + if (!syncobj) {
>> + DRM_DEBUG("Invalid syncobj handle provided\n");
>> + err = -ENOENT;
>> + goto err;
>> + }
>> +
>> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>> + fence = drm_syncobj_fence_get(syncobj);
>> + if (!fence) {
>> + DRM_DEBUG("Syncobj handle has no fence\n");
>> + drm_syncobj_put(syncobj);
>> + err = -EINVAL;
>> + goto err;
>> + }
>> +
>> + err = dma_fence_chain_find_seqno(&fence, point);
>> + if (err) {
>> + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
>> + drm_syncobj_put(syncobj);
>> + goto err;
>> + }
>> +
>> + /* A point might have been signaled already and
>> + * garbage collected from the timeline. In this case
>> + * just ignore the point and carry on.
>> + */
>> + if (!fence) {
>> + drm_syncobj_put(syncobj);
>> + continue;
>> + }
>> + }
>> +
>> + /*
>> + * For timeline syncobjs we need to preallocate chains for
>> + * later signaling.
>> + */
>> + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> if (dma_fence_chain_find_seqno() == 0)
> return -EINVAL;
>
> as an early sanity check?
Makes sense, let me see if that breaks anything.
Also waiting and signaling the same point doesn't work anymore for
timelines.
Will check that too.
>
>> + fences[num_fences].chain_fence =
>> + kmalloc(sizeof(*fences[num_fences].chain_fence),
>> + GFP_KERNEL);
>> + if (!fences[num_fences].chain_fence) {
>> + dma_fence_put(fence);
>> + drm_syncobj_put(syncobj);
>> + err = -ENOMEM;
>> + DRM_DEBUG("Unable to alloc chain_fence\n");
>> + goto err;
>> + }
>> + } else {
>> + fences[num_fences].chain_fence = NULL;
>> + }
>> +
>> + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
>> + fences[num_fences].dma_fence = fence;
>> + fences[num_fences].value = point;
>> + num_fences++;
>> + }
>> +
>> + *out_n_fences = num_fences;
>> +
>> + return fences;
>> +
>> +err:
>> + __free_fence_array(fences, num_fences);
>> + return ERR_PTR(err);
>> +}
>> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>
>> static void
>> signal_fence_array(struct i915_execbuffer *eb,
>> - struct drm_syncobj **fences)
>> + struct i915_eb_fences *fences,
>> + int nfences)
>> {
>> - const unsigned int nfences = eb->args->num_cliprects;
>> struct dma_fence * const fence = &eb->request->fence;
>> unsigned int n;
>>
>> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
>> struct drm_syncobj *syncobj;
>> unsigned int flags;
>>
>> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
>> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>> if (!(flags & I915_EXEC_FENCE_SIGNAL))
>> continue;
>>
>> - drm_syncobj_replace_fence(syncobj, fence);
>> + if (fences[n].chain_fence) {
>> + drm_syncobj_add_point(syncobj, fences[n].chain_fence,
>> + fence, fences[n].value);
>> + /*
>> + * The chain's ownership is transfered to the
>> + * timeline.
>> + */
>> + fences[n].chain_fence = NULL;
>> + } else {
>> + drm_syncobj_replace_fence(syncobj, fence);
>> + }
>> }
>> }
> I think I have convinced myself that with the split between wait before,
> signal after combined with the rule that seqno point along the syncobj
> are monotonic, you should not be able to generate an AB-BA deadlock
> between concurrent clients.
>
> Except that we need to fixup drm_syncobj_add_point() to actually apply
> that rule. Throwing an error and leaving the client stuck is less than
> ideal, we can't even recover with a GPU reset, and as they are native
> fences we don't employ a failsafe timer.
Unfortunately we're not the only user of add_point() and amdgpu doesn't
want it to fail.
Best I can come up with is take the syncobj lock when signaling in
get_timeline_fence_array() do the check there, unlock in __free_fence_array.
>
> Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for
> deadlocks?
Hm... I can't see how.
Unless you mean clients could deadlock them themselves the same way it
would using 2 pthread_mutex and having 2 threads trying to lock both
mutexes in opposite order.
Is it that the kernel's problem?
> On the other hand, if they used MI_SEMA rather scheduler
> fences, give or take some coarseness in our timeslicing granularity (I
> need to enable fast context switches on user semaphores -- first attempt
> was just an interrupt storm!) it will work as well as a round-robin
> scheduler can work. (In other words, we only need coarse fences for the
> scheduler and userspace can implement its own semaphores -- with the
> open discussion on how to share the iova user semaphores between
> processes, cf ugpu.)
> -Chris
>
More information about the Intel-gfx
mailing list