[Intel-gfx] [PATCH 28/28] drm/i915: Remove short-term pins from execbuf, v4.

Matthew Auld matthew.william.auld at gmail.com
Mon Oct 25 15:02:00 UTC 2021


On Thu, 21 Oct 2021 at 11:37, Maarten Lankhorst
<maarten.lankhorst at linux.intel.com> wrote:
>
> Add a flag PIN_VALIDATE, to indicate we don't need to pin and only
> protected by the object lock.
>
> This removes the need to unpin, which is done by just releasing the
> lock.
>
> eb_reserve is slightly reworked for readability, but the same steps
> are still done:
> - First pass pins with NONBLOCK.
> - Second pass unbinds all objects first, then pins.
> - Third pass is only called when not all objects are softpinned, and
>   unbinds all objects, then calls i915_gem_evict_vm(), then pins.
>
> When evicting the entire vm in eb_reserve() we do temporarily pin objects
> that are marked with EXEC_OBJECT_PINNED. This is because they are already
> at their destination, and i915_gem_evict_vm() would otherwise unbind them.
>
> However, we reduce the visibility of those pins by limiting the pin
> to our call to i915_gem_evict_vm() only, and pin with vm->mutex held,
> instead of the entire duration of the execbuf.
>
> Not sure the latter matters, one can hope..
> In theory we could kill the pinning by adding an extra flag to the vma
> to temporarily prevent unbinding for gtt for i915_gem_evict_vm only, but
> I think that might be overkill. We're still holding the object lock, and
> we don't have blocking eviction yet. It's likely sufficient to simply
> enforce EXEC_OBJECT_PINNED for all objects on >= gen12.
>
> Changes since v1:
> - Split out eb_reserve() into separate functions for readability.
> Changes since v2:
> - Make batch buffer mappable on platforms where only GGTT is available,
>   to prevent moving the batch buffer during relocations.
> Changes since v3:
> - Preserve current behavior for batch buffer, instead be cautious when
>   calling i915_gem_object_ggtt_pin_ww, and re-use the current batch vma
>   if it's inside ggtt and map-and-fenceable.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 252 ++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
>  drivers/gpu/drm/i915/i915_vma.c               |  24 +-
>  3 files changed, 161 insertions(+), 116 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index bbf2a10738f7..19f91143cfcf 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -439,7 +439,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
>         else
>                 pin_flags = entry->offset & PIN_OFFSET_MASK;
>
> -       pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
> +       pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
>         if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
>                 pin_flags |= PIN_GLOBAL;
>
> @@ -457,17 +457,15 @@ eb_pin_vma(struct i915_execbuffer *eb,
>                                              entry->pad_to_size,
>                                              entry->alignment,
>                                              eb_pin_flags(entry, ev->flags) |
> -                                            PIN_USER | PIN_NOEVICT);
> +                                            PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
>                 if (unlikely(err))
>                         return err;
>         }
>
>         if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
>                 err = i915_vma_pin_fence(vma);
> -               if (unlikely(err)) {
> -                       i915_vma_unpin(vma);
> +               if (unlikely(err))
>                         return err;
> -               }
>
>                 if (vma->fence)
>                         ev->flags |= __EXEC_OBJECT_HAS_FENCE;
> @@ -483,13 +481,9 @@ eb_pin_vma(struct i915_execbuffer *eb,
>  static inline void
>  eb_unreserve_vma(struct eb_vma *ev)
>  {
> -       if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
> -               return;
> -
>         if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
>                 __i915_vma_unpin_fence(ev->vma);
>
> -       __i915_vma_unpin(ev->vma);
>         ev->flags &= ~__EXEC_OBJECT_RESERVED;
>  }
>
> @@ -682,10 +676,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
>
>         if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
>                 err = i915_vma_pin_fence(vma);
> -               if (unlikely(err)) {
> -                       i915_vma_unpin(vma);
> +               if (unlikely(err))
>                         return err;
> -               }
>
>                 if (vma->fence)
>                         ev->flags |= __EXEC_OBJECT_HAS_FENCE;
> @@ -697,85 +689,129 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
>         return 0;
>  }
>
> -static int eb_reserve(struct i915_execbuffer *eb)
> +static int eb_evict_vm(struct i915_execbuffer *eb)
> +{
> +       const unsigned int count = eb->buffer_count;
> +       unsigned int i;
> +       int err;
> +
> +       err = mutex_lock_interruptible(&eb->context->vm->mutex);
> +       if (err)
> +               return err;
> +
> +       /* pin to protect against i915_gem_evict_vm evicting below */
> +       for (i = 0; i < count; i++) {
> +               struct eb_vma *ev = &eb->vma[i];
> +
> +               if (ev->flags & __EXEC_OBJECT_HAS_PIN)
> +                       __i915_vma_pin(ev->vma);
> +       }
> +
> +       /* Too fragmented, unbind everything and retry */
> +       err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
> +
> +       /* unpin objects.. */
> +       for (i = 0; i < count; i++) {
> +               struct eb_vma *ev = &eb->vma[i];
> +
> +               if (ev->flags & __EXEC_OBJECT_HAS_PIN)
> +                       i915_vma_unpin(ev->vma);
> +       }
> +
> +       mutex_unlock(&eb->context->vm->mutex);
> +
> +       return err;
> +}
> +
> +static bool eb_unbind(struct i915_execbuffer *eb)
>  {
>         const unsigned int count = eb->buffer_count;
> -       unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
> +       unsigned int i;
>         struct list_head last;
> +       bool unpinned = false;
> +
> +       /* Resort *all* the objects into priority order */
> +       INIT_LIST_HEAD(&eb->unbound);
> +       INIT_LIST_HEAD(&last);
> +
> +       for (i = 0; i < count; i++) {
> +               struct eb_vma *ev = &eb->vma[i];
> +               unsigned int flags = ev->flags;
> +
> +               if (flags & EXEC_OBJECT_PINNED &&
> +                   flags & __EXEC_OBJECT_HAS_PIN)
> +                       continue;
> +
> +               unpinned = true;
> +               eb_unreserve_vma(ev);
> +
> +               if (flags & EXEC_OBJECT_PINNED)
> +                       /* Pinned must have their slot */
> +                       list_add(&ev->bind_link, &eb->unbound);
> +               else if (flags & __EXEC_OBJECT_NEEDS_MAP)
> +                       /* Map require the lowest 256MiB (aperture) */
> +                       list_add_tail(&ev->bind_link, &eb->unbound);
> +               else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
> +                       /* Prioritise 4GiB region for restricted bo */
> +                       list_add(&ev->bind_link, &last);
> +               else
> +                       list_add_tail(&ev->bind_link, &last);
> +       }
> +
> +       list_splice_tail(&last, &eb->unbound);
> +       return unpinned;
> +}
> +
> +static int eb_reserve(struct i915_execbuffer *eb)
> +{
>         struct eb_vma *ev;
> -       unsigned int i, pass;
> +       unsigned int pass;
>         int err = 0;
> +       bool unpinned;
>
>         /*
>          * Attempt to pin all of the buffers into the GTT.
> -        * This is done in 3 phases:
> +        * This is done in 2 phases:
>          *
> -        * 1a. Unbind all objects that do not match the GTT constraints for
> -        *     the execbuffer (fenceable, mappable, alignment etc).
> -        * 1b. Increment pin count for already bound objects.
> -        * 2.  Bind new objects.
> -        * 3.  Decrement pin count.
> +        * 1. Unbind all objects that do not match the GTT constraints for
> +        *    the execbuffer (fenceable, mappable, alignment etc).
> +        * 2. Bind new objects.
>          *
>          * This avoid unnecessary unbinding of later objects in order to make
>          * room for the earlier objects *unless* we need to defragment.
> +        *
> +        * Defragmenting is skipped if all objects are pinned at a fixed location.
>          */
> -       pass = 0;
> -       do {
> -               list_for_each_entry(ev, &eb->unbound, bind_link) {
> -                       err = eb_reserve_vma(eb, ev, pin_flags);
> -                       if (err)
> -                               break;
> -               }
> -               if (err != -ENOSPC)
> -                       return err;
> +       for (pass = 0; pass <= 2; pass++) {
> +               int pin_flags = PIN_USER | PIN_VALIDATE;
>
> -               /* Resort *all* the objects into priority order */
> -               INIT_LIST_HEAD(&eb->unbound);
> -               INIT_LIST_HEAD(&last);
> -               for (i = 0; i < count; i++) {
> -                       unsigned int flags;
> +               if (pass == 0)
> +                       pin_flags |= PIN_NONBLOCK;
>
> -                       ev = &eb->vma[i];
> -                       flags = ev->flags;
> -                       if (flags & EXEC_OBJECT_PINNED &&
> -                           flags & __EXEC_OBJECT_HAS_PIN)
> -                               continue;
> +               if (pass >= 1)
> +                       unpinned = eb_unbind(eb);
>
> -                       eb_unreserve_vma(ev);
> -
> -                       if (flags & EXEC_OBJECT_PINNED)
> -                               /* Pinned must have their slot */
> -                               list_add(&ev->bind_link, &eb->unbound);
> -                       else if (flags & __EXEC_OBJECT_NEEDS_MAP)
> -                               /* Map require the lowest 256MiB (aperture) */
> -                               list_add_tail(&ev->bind_link, &eb->unbound);
> -                       else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
> -                               /* Prioritise 4GiB region for restricted bo */
> -                               list_add(&ev->bind_link, &last);
> -                       else
> -                               list_add_tail(&ev->bind_link, &last);
> -               }
> -               list_splice_tail(&last, &eb->unbound);
> -
> -               switch (pass++) {
> -               case 0:
> -                       break;
> +               if (pass == 2) {
> +                       /* No point in defragmenting gtt if all is pinned */
> +                       if (!unpinned)
> +                               return -ENOSPC;

Can this ever happen? If everything is already pinned where it's meant
to be, then how did we get here?

>
> -               case 1:
> -                       /* Too fragmented, unbind everything and retry */
> -                       mutex_lock(&eb->context->vm->mutex);
> -                       err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
> -                       mutex_unlock(&eb->context->vm->mutex);
> +                       err = eb_evict_vm(eb);
>                         if (err)
>                                 return err;
> -                       break;
> +               }
>
> -               default:
> -                       return -ENOSPC;
> +               list_for_each_entry(ev, &eb->unbound, bind_link) {
> +                       err = eb_reserve_vma(eb, ev, pin_flags);
> +                       if (err)
> +                               break;
>                 }
>
> -               pin_flags = PIN_USER;
> -       } while (1);
> +               if (err != -ENOSPC)
> +                       break;
> +       }
> +
> +       return err;
>  }
>
>  static int eb_select_context(struct i915_execbuffer *eb)
> @@ -1184,10 +1220,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
>         return vaddr;
>  }
>
> -static void *reloc_iomap(struct drm_i915_gem_object *obj,
> +static void *reloc_iomap(struct i915_vma *batch,
>                          struct i915_execbuffer *eb,
>                          unsigned long page)
>  {
> +       struct drm_i915_gem_object *obj = batch->obj;
>         struct reloc_cache *cache = &eb->reloc_cache;
>         struct i915_ggtt *ggtt = cache_to_ggtt(cache);
>         unsigned long offset;
> @@ -1197,7 +1234,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>                 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
>                 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
>         } else {
> -               struct i915_vma *vma;
> +               struct i915_vma *vma = ERR_PTR(-ENODEV);
>                 int err;
>
>                 if (i915_gem_object_is_tiled(obj))
> @@ -1210,10 +1247,23 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>                 if (err)
>                         return ERR_PTR(err);
>
> -               vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
> -                                                 PIN_MAPPABLE |
> -                                                 PIN_NONBLOCK /* NOWARN */ |
> -                                                 PIN_NOEVICT);
> +               /*
> +                * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
> +                * VMA from the object list because we no longer pin.
> +                *
> +                * Only attempt to pin the batch buffer to ggtt if the current batch
> +                * is not inside ggtt, or the batch buffer is not misplaced.
> +                */
> +               if (!i915_is_ggtt(batch->vm)) {
> +                       vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
> +                                                         PIN_MAPPABLE |
> +                                                         PIN_NONBLOCK /* NOWARN */ |
> +                                                         PIN_NOEVICT);
> +               } else if (i915_vma_is_map_and_fenceable(batch)) {
> +                       __i915_vma_pin(batch);
> +                       vma = batch;
> +               }
> +
>                 if (vma == ERR_PTR(-EDEADLK))
>                         return vma;
>
> @@ -1251,7 +1301,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
>         return vaddr;
>  }
>
> -static void *reloc_vaddr(struct drm_i915_gem_object *obj,
> +static void *reloc_vaddr(struct i915_vma *vma,
>                          struct i915_execbuffer *eb,
>                          unsigned long page)
>  {
> @@ -1263,9 +1313,9 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
>         } else {
>                 vaddr = NULL;
>                 if ((cache->vaddr & KMAP) == 0)
> -                       vaddr = reloc_iomap(obj, eb, page);
> +                       vaddr = reloc_iomap(vma, eb, page);
>                 if (!vaddr)
> -                       vaddr = reloc_kmap(obj, cache, page);
> +                       vaddr = reloc_kmap(vma->obj, cache, page);
>         }
>
>         return vaddr;
> @@ -1306,7 +1356,7 @@ relocate_entry(struct i915_vma *vma,
>         void *vaddr;
>
>  repeat:
> -       vaddr = reloc_vaddr(vma->obj, eb,
> +       vaddr = reloc_vaddr(vma, eb,
>                             offset >> PAGE_SHIFT);
>         if (IS_ERR(vaddr))
>                 return PTR_ERR(vaddr);
> @@ -2074,7 +2124,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
>         if (IS_ERR(vma))
>                 return vma;
>
> -       err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
> +       err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
>         if (err)
>                 return ERR_PTR(err);
>
> @@ -2088,7 +2138,7 @@ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i9
>          * batch" bit. Hence we need to pin secure batches into the global gtt.
>          * hsw should have this fixed, but bdw mucks it up again. */
>         if (eb->batch_flags & I915_DISPATCH_SECURE)
> -               return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
> +               return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
>
>         return NULL;
>  }
> @@ -2139,13 +2189,12 @@ static int eb_parse(struct i915_execbuffer *eb)
>
>         err = i915_gem_object_lock(pool->obj, &eb->ww);
>         if (err)
> -               goto err;
> +               return err;
>
>         shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
> -       if (IS_ERR(shadow)) {
> -               err = PTR_ERR(shadow);
> -               goto err;
> -       }
> +       if (IS_ERR(shadow))
> +               return PTR_ERR(shadow);
> +
>         intel_gt_buffer_pool_mark_used(pool);
>         i915_gem_object_set_readonly(shadow->obj);
>         shadow->private = pool;
> @@ -2157,25 +2206,21 @@ static int eb_parse(struct i915_execbuffer *eb)
>                 shadow = shadow_batch_pin(eb, pool->obj,
>                                           &eb->gt->ggtt->vm,
>                                           PIN_GLOBAL);
> -               if (IS_ERR(shadow)) {
> -                       err = PTR_ERR(shadow);
> -                       shadow = trampoline;
> -                       goto err_shadow;
> -               }
> +               if (IS_ERR(shadow))
> +                       return PTR_ERR(shadow);
> +
>                 shadow->private = pool;
>
>                 eb->batch_flags |= I915_DISPATCH_SECURE;
>         }
>
>         batch = eb_dispatch_secure(eb, shadow);
> -       if (IS_ERR(batch)) {
> -               err = PTR_ERR(batch);
> -               goto err_trampoline;
> -       }
> +       if (IS_ERR(batch))
> +               return PTR_ERR(batch);
>
>         err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
>         if (err)
> -               goto err_trampoline;
> +               return err;
>
>         err = intel_engine_cmd_parser(eb->context->engine,
>                                       eb->batches[0]->vma,
> @@ -2183,7 +2228,7 @@ static int eb_parse(struct i915_execbuffer *eb)
>                                       eb->batch_len[0],
>                                       shadow, trampoline);
>         if (err)
> -               goto err_unpin_batch;
> +               return err;
>
>         eb->batches[0] = &eb->vma[eb->buffer_count++];
>         eb->batches[0]->vma = i915_vma_get(shadow);
> @@ -2202,17 +2247,6 @@ static int eb_parse(struct i915_execbuffer *eb)
>                 eb->batches[0]->vma = i915_vma_get(batch);
>         }
>         return 0;
> -
> -err_unpin_batch:
> -       if (batch)
> -               i915_vma_unpin(batch);
> -err_trampoline:
> -       if (trampoline)
> -               i915_vma_unpin(trampoline);
> -err_shadow:
> -       i915_vma_unpin(shadow);
> -err:
> -       return err;
>  }
>
>  static int eb_request_submit(struct i915_execbuffer *eb,
> @@ -3337,8 +3371,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>
>  err_vma:
>         eb_release_vmas(&eb, true);
> -       if (eb.trampoline)
> -               i915_vma_unpin(eb.trampoline);
>         WARN_ON(err == -EDEADLK);
>         i915_gem_ww_ctx_fini(&eb.ww);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index e4938aba3fe9..8c2f57eb5dda 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -44,6 +44,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>  #define PIN_HIGH               BIT_ULL(5)
>  #define PIN_OFFSET_BIAS                BIT_ULL(6)
>  #define PIN_OFFSET_FIXED       BIT_ULL(7)
> +#define PIN_VALIDATE           BIT_ULL(8) /* validate placement only, no need to call unpin() */
>
>  #define PIN_GLOBAL             BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
>  #define PIN_USER               BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 65168db534f0..0706731b211d 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -751,6 +751,15 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
>         unsigned int bound;
>
>         bound = atomic_read(&vma->flags);
> +
> +       if (flags & PIN_VALIDATE) {
> +               flags &= I915_VMA_BIND_MASK;
> +
> +               return (flags & bound) == flags;
> +       }
> +
> +       /* with the lock mandatory for unbind, we don't race here */
> +       flags &= I915_VMA_BIND_MASK;
>         do {
>                 if (unlikely(flags & ~bound))
>                         return false;
> @@ -1176,7 +1185,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>         GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
>
>         /* First try and grab the pin without rebinding the vma */
> -       if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
> +       if (try_qad_pin(vma, flags))
>                 return 0;
>
>         err = i915_vma_get_pages(vma);
> @@ -1255,7 +1264,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>         }
>
>         if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
> -               __i915_vma_pin(vma);
> +               if (!(flags & PIN_VALIDATE))
> +                       __i915_vma_pin(vma);
>                 goto err_unlock;
>         }
>
> @@ -1284,8 +1294,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>         atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
>         list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>
> -       __i915_vma_pin(vma);
> -       GEM_BUG_ON(!i915_vma_is_pinned(vma));
> +       if (!(flags & PIN_VALIDATE)) {
> +               __i915_vma_pin(vma);
> +               GEM_BUG_ON(!i915_vma_is_pinned(vma));
> +       }
>         GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
>         GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
>
> @@ -1538,8 +1550,6 @@ static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *
>  {
>         int err;
>
> -       GEM_BUG_ON(!i915_vma_is_pinned(vma));
> -
>         /* Wait for the vma to be bound before we start! */
>         err = __i915_request_await_bind(rq, vma);
>         if (err)
> @@ -1558,6 +1568,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
>
>         assert_object_held(obj);
>
> +       GEM_BUG_ON(!vma->pages);
> +
>         err = __i915_vma_move_to_active(vma, rq);
>         if (unlikely(err))
>                 return err;
> --
> 2.33.0
>


More information about the Intel-gfx mailing list