[Intel-gfx] [PATCH 10/28] drm/i915/gem: Separate reloc validation into an earlier step
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Jun 9 07:47:00 UTC 2020
On 07/06/2020 23:20, Chris Wilson wrote:
> Over the next couple of patches, we will want to lock all the modified
> vma for relocation processing under a single ww_mutex. We neither want
> to have to include the vma that are skipped (due to no modifications
> required) nor do we want those to be marked as written too. So separate
> out the reloc validation into an early step, which we can use both to
> reject the execbuf before committing to making our changes, and to
> filter out the unmodified vma.
>
> This does introduce a second pass through the reloc[], but only if we
> need to emit relocations.
>
> v2: reuse the outer loop, not cut'n'paste.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 145 +++++++++++-------
> 1 file changed, 86 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 23db79b806db..01ab1e15a142 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -911,9 +911,9 @@ static void eb_destroy(const struct i915_execbuffer *eb)
>
> static inline u64
> relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
> - const struct i915_vma *target)
> + u64 target)
> {
> - return gen8_canonical_addr((int)reloc->delta + target->node.start);
> + return gen8_canonical_addr((int)reloc->delta + target);
> }
>
> static void reloc_cache_init(struct reloc_cache *cache,
> @@ -1292,26 +1292,11 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
> return 0;
> }
>
> -static u64
> -relocate_entry(struct i915_execbuffer *eb,
> - struct i915_vma *vma,
> - const struct drm_i915_gem_relocation_entry *reloc,
> - const struct i915_vma *target)
> -{
> - u64 target_addr = relocation_target(reloc, target);
> - int err;
> -
> - err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr);
> - if (err)
> - return err;
> -
> - return target->node.start | UPDATE;
> -}
> -
> -static u64
> -eb_relocate_entry(struct i915_execbuffer *eb,
> - struct eb_vma *ev,
> - const struct drm_i915_gem_relocation_entry *reloc)
> +static int
> +eb_reloc_prepare(struct i915_execbuffer *eb,
> + struct eb_vma *ev,
> + const struct drm_i915_gem_relocation_entry *reloc,
> + struct drm_i915_gem_relocation_entry __user *user)
> {
> struct drm_i915_private *i915 = eb->i915;
> struct eb_vma *target;
> @@ -1389,6 +1374,32 @@ eb_relocate_entry(struct i915_execbuffer *eb,
> return -EINVAL;
> }
>
> + return 1;
> +}
> +
> +static int
> +eb_reloc_entry(struct i915_execbuffer *eb,
> + struct eb_vma *ev,
> + const struct drm_i915_gem_relocation_entry *reloc,
> + struct drm_i915_gem_relocation_entry __user *user)
> +{
> + struct eb_vma *target;
> + u64 offset;
> + int err;
> +
> + /* we've already hold a reference to all valid objects */
> + target = eb_get_vma(eb, reloc->target_handle);
> + if (unlikely(!target))
> + return -ENOENT;
> +
> + /*
> + * If the relocation already has the right value in it, no
> + * more work needs to be done.
> + */
> + offset = gen8_canonical_addr(target->vma->node.start);
> + if (offset == reloc->presumed_offset) > + return 0;
> +
Haven't these reloc entries been removed from the list in the prepare phase?
Regards,
Tvrtko
> /*
> * If we write into the object, we need to force the synchronisation
> * barrier, either with an asynchronous clflush or if we executed the
> @@ -1399,11 +1410,41 @@ eb_relocate_entry(struct i915_execbuffer *eb,
> */
> ev->flags &= ~EXEC_OBJECT_ASYNC;
>
> - /* and update the user's relocation entry */
> - return relocate_entry(eb, ev->vma, reloc, target->vma);
> + err = __reloc_entry_gpu(eb, ev->vma, reloc->offset,
> + relocation_target(reloc, offset));
> + if (err)
> + return err;
> +
> + /*
> + * Note that reporting an error now
> + * leaves everything in an inconsistent
> + * state as we have *already* changed
> + * the relocation value inside the
> + * object. As we have not changed the
> + * reloc.presumed_offset or will not
> + * change the execobject.offset, on the
> + * call we may not rewrite the value
> + * inside the object, leaving it
> + * dangling and causing a GPU hang. Unless
> + * userspace dynamically rebuilds the
> + * relocations on each execbuf rather than
> + * presume a static tree.
> + *
> + * We did previously check if the relocations
> + * were writable (access_ok), an error now
> + * would be a strange race with mprotect,
> + * having already demonstrated that we
> + * can read from this userspace address.
> + */
> + __put_user(offset, &user->presumed_offset);
> + return 0;
> }
>
> -static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
> +static long eb_reloc_vma(struct i915_execbuffer *eb, struct eb_vma *ev,
> + int (*fn)(struct i915_execbuffer *eb,
> + struct eb_vma *ev,
> + const struct drm_i915_gem_relocation_entry *reloc,
> + struct drm_i915_gem_relocation_entry __user *user))
> {
> #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
> struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
> @@ -1411,6 +1452,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
> struct drm_i915_gem_relocation_entry __user *urelocs =
> u64_to_user_ptr(entry->relocs_ptr);
> unsigned long remain = entry->relocation_count;
> + int required = 0;
>
> if (unlikely(remain > N_RELOC(ULONG_MAX)))
> return -EINVAL;
> @@ -1443,42 +1485,18 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
>
> remain -= count;
> do {
> - u64 offset = eb_relocate_entry(eb, ev, r);
> + int ret;
>
> - if (likely(offset == 0)) {
> - } else if ((s64)offset < 0) {
> - return (int)offset;
> - } else {
> - /*
> - * Note that reporting an error now
> - * leaves everything in an inconsistent
> - * state as we have *already* changed
> - * the relocation value inside the
> - * object. As we have not changed the
> - * reloc.presumed_offset or will not
> - * change the execobject.offset, on the
> - * call we may not rewrite the value
> - * inside the object, leaving it
> - * dangling and causing a GPU hang. Unless
> - * userspace dynamically rebuilds the
> - * relocations on each execbuf rather than
> - * presume a static tree.
> - *
> - * We did previously check if the relocations
> - * were writable (access_ok), an error now
> - * would be a strange race with mprotect,
> - * having already demonstrated that we
> - * can read from this userspace address.
> - */
> - offset = gen8_canonical_addr(offset & ~UPDATE);
> - __put_user(offset,
> - &urelocs[r - stack].presumed_offset);
> - }
> + ret = fn(eb, ev, r, &urelocs[r - stack]);
> + if (ret < 0)
> + return ret;
> +
> + required |= ret;
> } while (r++, --count);
> urelocs += ARRAY_SIZE(stack);
> } while (remain);
>
> - return 0;
> + return required;
> }
>
> static int eb_relocate(struct i915_execbuffer *eb)
> @@ -1497,12 +1515,21 @@ static int eb_relocate(struct i915_execbuffer *eb)
>
> /* The objects are in their final locations, apply the relocations. */
> if (eb->args->flags & __EXEC_HAS_RELOC) {
> - struct eb_vma *ev;
> + struct eb_vma *ev, *en;
> int flush;
>
> + list_for_each_entry_safe(ev, en, &eb->relocs, reloc_link) {
> + err = eb_reloc_vma(eb, ev, eb_reloc_prepare);
> + if (err < 0)
> + return err;
> +
> + if (err == 0)
> + list_del_init(&ev->reloc_link);
> + }
> +
> list_for_each_entry(ev, &eb->relocs, reloc_link) {
> - err = eb_relocate_vma(eb, ev);
> - if (err)
> + err = eb_reloc_vma(eb, ev, eb_reloc_entry);
> + if (err < 0)
> break;
> }
>
>
More information about the Intel-gfx
mailing list