[Intel-gfx] [PATCH 2/5] drm/i915/gem: Separate reloc validation into an earlier step
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Jun 5 15:27:26 UTC 2020
On 05/06/2020 10:58, Chris Wilson wrote:
> Over the next couple of patches, we will want to lock all the modified
> vma for relocation processing under a single ww_mutex. We neither want
> to have to include the vma that are skipped (due to no modifications
> required) nor do we want those to be marked as written too. So separate
> out the reloc validation into an early step, which we can use both to
> reject the execbuf before committing to making our changes, and to
> filter out the unmodified vma.
>
> This does introduce a second pass through the reloc[], but only if we
> need to emit relocations.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 178 +++++++++++++-----
> 1 file changed, 133 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index cfe6d2cdbef1..7d4464fddca8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1331,6 +1331,117 @@ static u64
> eb_relocate_entry(struct i915_execbuffer *eb,
> struct eb_vma *ev,
> const struct drm_i915_gem_relocation_entry *reloc)
> +{
> + struct eb_vma *target;
> +
> + /* we've already hold a reference to all valid objects */
> + target = eb_get_vma(eb, reloc->target_handle);
> + if (unlikely(!target))
> + return -ENOENT;
> +
> + /*
> + * If the relocation already has the right value in it, no
> + * more work needs to be done.
> + */
> + if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
> + return 0;
These have been filtered out, no?
> +
> + /*
> + * If we write into the object, we need to force the synchronisation
> + * barrier, either with an asynchronous clflush or if we executed the
> + * patching using the GPU (though that should be serialised by the
> + * timeline). To be completely sure, and since we are required to
> + * do relocations we are already stalling, disable the user's opt
> + * out of our synchronisation.
> + */
> + ev->flags &= ~EXEC_OBJECT_ASYNC;
> +
> + /* and update the user's relocation entry */
> + return relocate_entry(eb, ev->vma, reloc, target->vma);
> +}
> +
> +static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
> +{
> +#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
> + struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
> + const struct drm_i915_gem_exec_object2 *entry = ev->exec;
> + struct drm_i915_gem_relocation_entry __user *urelocs =
> + u64_to_user_ptr(entry->relocs_ptr);
> + unsigned long remain = entry->relocation_count;
> +
> + if (unlikely(remain > N_RELOC(ULONG_MAX)))
> + return -EINVAL;
This has been checked already in eb_reloca_vma_validate.
> +
> + /*
> + * We must check that the entire relocation array is safe
> + * to read. However, if the array is not writable the user loses
> + * the updated relocation values.
> + */
> + if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
> + return -EFAULT;
> +
> + do {
> + struct drm_i915_gem_relocation_entry *r = stack;
> + unsigned int count =
> + min_t(unsigned long, remain, ARRAY_SIZE(stack));
> + unsigned int copied;
> +
> + /*
> + * This is the fast path and we cannot handle a pagefault
> + * whilst holding the struct mutex lest the user pass in the
> + * relocations contained within a mmaped bo. For in such a case
> + * we, the page fault handler would call i915_gem_fault() and
> + * we would try to acquire the struct mutex again. Obviously
> + * this is bad and so lockdep complains vehemently.
> + */
> + copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
> + if (unlikely(copied))
> + return -EFAULT;
> +
> + remain -= count;
The above two comments end up duplicated which is kind of ugly. Not sure
how a common "runner/looper" would look with just the per-reloc body
being a passed in function.
> + do {
> + u64 offset = eb_relocate_entry(eb, ev, r);
> +
> + if (likely(offset == 0)) {
> + } else if ((s64)offset < 0) {
> + return (int)offset;
> + } else {
> + /*
> + * Note that reporting an error now
> + * leaves everything in an inconsistent
> + * state as we have *already* changed
> + * the relocation value inside the
> + * object. As we have not changed the
> + * reloc.presumed_offset or will not
> + * change the execobject.offset, on the
> + * call we may not rewrite the value
> + * inside the object, leaving it
> + * dangling and causing a GPU hang. Unless
> + * userspace dynamically rebuilds the
> + * relocations on each execbuf rather than
> + * presume a static tree.
> + *
> + * We did previously check if the relocations
> + * were writable (access_ok), an error now
> + * would be a strange race with mprotect,
> + * having already demonstrated that we
> + * can read from this userspace address.
> + */
> + offset = gen8_canonical_addr(offset & ~UPDATE);
> + __put_user(offset,
> + &urelocs[r - stack].presumed_offset);
> + }
> + } while (r++, --count);
> + urelocs += ARRAY_SIZE(stack);
> + } while (remain);
> +
> + return 0;
> +}
> +
> +static int
> +eb_reloc_valid(struct i915_execbuffer *eb,
> + struct eb_vma *ev,
> + const struct drm_i915_gem_relocation_entry *reloc)
It does a bit more than check for validity so if you agree maybe
eb_reloc_prepare(_entry)?
> {
> struct drm_i915_private *i915 = eb->i915;
> struct eb_vma *target;
> @@ -1408,21 +1519,10 @@ eb_relocate_entry(struct i915_execbuffer *eb,
> return -EINVAL;
> }
>
> - /*
> - * If we write into the object, we need to force the synchronisation
> - * barrier, either with an asynchronous clflush or if we executed the
> - * patching using the GPU (though that should be serialised by the
> - * timeline). To be completely sure, and since we are required to
> - * do relocations we are already stalling, disable the user's opt
> - * out of our synchronisation.
> - */
> - ev->flags &= ~EXEC_OBJECT_ASYNC;
> -
> - /* and update the user's relocation entry */
> - return relocate_entry(eb, ev->vma, reloc, target->vma);
> + return 1;
> }
>
> -static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
> +static long eb_reloc_vma_validate(struct i915_execbuffer *eb, struct eb_vma *ev)
> {
> #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
> struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
> @@ -1430,6 +1530,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
> struct drm_i915_gem_relocation_entry __user *urelocs =
> u64_to_user_ptr(entry->relocs_ptr);
> unsigned long remain = entry->relocation_count;
> + long required = 0;
>
> if (unlikely(remain > N_RELOC(ULONG_MAX)))
> return -EINVAL;
> @@ -1462,42 +1563,18 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
>
> remain -= count;
> do {
> - u64 offset = eb_relocate_entry(eb, ev, r);
> + int ret;
>
> - if (likely(offset == 0)) {
> - } else if ((s64)offset < 0) {
> - return (int)offset;
> - } else {
> - /*
> - * Note that reporting an error now
> - * leaves everything in an inconsistent
> - * state as we have *already* changed
> - * the relocation value inside the
> - * object. As we have not changed the
> - * reloc.presumed_offset or will not
> - * change the execobject.offset, on the
> - * call we may not rewrite the value
> - * inside the object, leaving it
> - * dangling and causing a GPU hang. Unless
> - * userspace dynamically rebuilds the
> - * relocations on each execbuf rather than
> - * presume a static tree.
> - *
> - * We did previously check if the relocations
> - * were writable (access_ok), an error now
> - * would be a strange race with mprotect,
> - * having already demonstrated that we
> - * can read from this userspace address.
> - */
> - offset = gen8_canonical_addr(offset & ~UPDATE);
> - __put_user(offset,
> - &urelocs[r - stack].presumed_offset);
> - }
> + ret = eb_reloc_valid(eb, ev, r);
> + if (ret < 0)
> + return ret;
> +
> + required += ret;
> } while (r++, --count);
> urelocs += ARRAY_SIZE(stack);
> } while (remain);
>
> - return 0;
> + return required;
> }
>
> static int eb_relocate(struct i915_execbuffer *eb)
> @@ -1516,9 +1593,20 @@ static int eb_relocate(struct i915_execbuffer *eb)
>
> /* The objects are in their final locations, apply the relocations. */
> if (eb->args->flags & __EXEC_HAS_RELOC) {
> - struct eb_vma *ev;
> + struct eb_vma *ev, *en;
> int flush;
>
> + list_for_each_entry_safe(ev, en, &eb->relocs, reloc_link) {
> + long count;
> +
> + count = eb_reloc_vma_validate(eb, ev);
> + if (count < 0)
> + return count;
> +
> + if (count == 0)
> + list_del_init(&ev->reloc_link);
> + }
> +
> list_for_each_entry(ev, &eb->relocs, reloc_link) {
> err = eb_relocate_vma(eb, ev);
> if (err)
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list