[Intel-gfx] [PATCH 17/29] drm/i915: plumb VM into bind/unbind code
Daniel Vetter
daniel at ffwll.ch
Tue Aug 6 20:29:47 CEST 2013
On Wed, Jul 31, 2013 at 05:00:10PM -0700, Ben Widawsky wrote:
> As alluded to in several patches, and it will be reiterated later... A
> VMA is an abstraction for a GEM BO bound into an address space.
> Therefore it stands to reason, that the existing bind, and unbind are
> the ones which will be the most impacted. This patch implements this,
> and updates all callers which weren't already updated in the series
> (because it was too messy).
>
> This patch represents the bulk of an earlier, larger patch. I've pulled
> out a bunch of things by the request of Daniel. The history is preserved
> for posterity with the email convention of ">" One big change from the
> original patch aside from a bunch of cropping is I've created an
> i915_vma_unbind() function. That is because we always have the VMA
> anyway, and doing an extra lookup is useful. There is a caveat, we
> retain an i915_gem_object_ggtt_unbind, for the global cases which might
> not talk in VMAs.
>
> > drm/i915: plumb VM into object operations
> >
> > This patch was formerly known as:
> > "drm/i915: Create VMAs (part 3) - plumbing"
> >
> > This patch adds a VM argument, bind/unbind, and the object
> > offset/size/color getters/setters. It preserves the old ggtt helper
> > functions because things still need, and will continue to need them.
> >
> > Some code will still need to be ported over after this.
> >
> > v2: Fix purge to pick an object and unbind all vmas
> > This was doable because of the global bound list change.
> >
> > v3: With the commit to actually pin/unpin pages in place, there is no
> > longer a need to check if unbind succeeded before calling put_pages().
> > Make put_pages only BUG() after checking pin count.
> >
> > v4: Rebased on top of the new hangcheck work by Mika
> > plumbed eb_destroy also
> > Many checkpatch related fixes
> >
> > v5: Very large rebase
> >
> > v6:
> > Change BUG_ON to WARN_ON (Daniel)
> > Rename vm to ggtt in preallocate stolen, since it is always ggtt when
> > dealing with stolen memory. (Daniel)
> > list_for_each will short-circuit already (Daniel)
> > remove superflous space (Daniel)
> > Use per object list of vmas (Daniel)
> > Make obj_bound_any() use obj_bound for each vm (Ben)
> > s/bind_to_gtt/bind_to_vm/ (Ben)
> >
> > Fixed up the inactive shrinker. As Daniel noticed the code could
> > potentially count the same object multiple times. While it's not
> > possible in the current case, since 1 object can only ever be bound into
> > 1 address space thus far - we may as well try to get something more
> > future proof in place now. With a prep patch before this to switch over
> > to using the bound list + inactive check, we're now able to carry that
> > forward for every address space an object is bound into.
>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
A bunch of comments below. Overall that patch is imo really easy to review
and the comments can all be addressed in follow-up patches (if at all).
But I think I've spotted a leak in object_pin.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
> drivers/gpu/drm/i915/i915_drv.h | 3 +-
> drivers/gpu/drm/i915/i915_gem.c | 134 +++++++++++++++++++----------
> drivers/gpu/drm/i915/i915_gem_evict.c | 4 +-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_tiling.c | 9 +-
> drivers/gpu/drm/i915/i915_trace.h | 37 ++++----
> 7 files changed, 120 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d6154cb..6d5ca85bd 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1796,7 +1796,7 @@ i915_drop_caches_set(void *data, u64 val)
> mm_list) {
> if (obj->pin_count)
> continue;
> - ret = i915_gem_object_unbind(obj);
> + ret = i915_gem_object_ggtt_unbind(obj);
> if (ret)
> goto unlock;
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dbfffb2..0610588 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1700,7 +1700,8 @@ int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
> bool map_and_fenceable,
> bool nonblocking);
> void i915_gem_object_unpin(struct drm_i915_gem_object *obj);
> -int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj);
> +int __must_check i915_vma_unbind(struct i915_vma *vma);
> +int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
> int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
> void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
> void i915_gem_lastclose(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4b669e8..0cb36c2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -38,10 +38,12 @@
>
> static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
> static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
> -static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
> - unsigned alignment,
> - bool map_and_fenceable,
> - bool nonblocking);
> +static __must_check int
> +i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> + struct i915_address_space *vm,
> + unsigned alignment,
> + bool map_and_fenceable,
> + bool nonblocking);
> static int i915_gem_phys_pwrite(struct drm_device *dev,
> struct drm_i915_gem_object *obj,
> struct drm_i915_gem_pwrite *args,
> @@ -1678,7 +1680,6 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
> bool purgeable_only)
> {
> struct drm_i915_gem_object *obj, *next;
> - struct i915_address_space *vm = &dev_priv->gtt.base;
> long count = 0;
>
> list_for_each_entry_safe(obj, next,
> @@ -1692,13 +1693,16 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
> }
> }
>
> - list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list) {
> + list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list,
> + global_list) {
> + struct i915_vma *vma, *v;
>
> if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
> continue;
>
> - if (i915_gem_object_unbind(obj))
> - continue;
> + list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
> + if (i915_vma_unbind(vma))
> + break;
>
> if (!i915_gem_object_put_pages(obj)) {
> count += obj->base.size >> PAGE_SHIFT;
> @@ -2591,17 +2595,13 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
> old_write_domain);
> }
>
> -/**
> - * Unbinds an object from the GTT aperture.
> - */
> -int
> -i915_gem_object_unbind(struct drm_i915_gem_object *obj)
> +int i915_vma_unbind(struct i915_vma *vma)
> {
> + struct drm_i915_gem_object *obj = vma->obj;
> drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
> - struct i915_vma *vma;
> int ret;
>
> - if (!i915_gem_obj_ggtt_bound(obj))
> + if (list_empty(&vma->vma_link))
> return 0;
This smells like something which should never be the case. Add a WARN_ON
in a follow-up patch?
>
> if (obj->pin_count)
> @@ -2624,7 +2624,7 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
> if (ret)
> return ret;
>
> - trace_i915_gem_object_unbind(obj);
> + trace_i915_vma_unbind(vma);
>
> if (obj->has_global_gtt_mapping)
> i915_gem_gtt_unbind_object(obj);
> @@ -2639,7 +2639,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
> /* Avoid an unnecessary call to unbind on rebind. */
> obj->map_and_fenceable = true;
>
> - vma = i915_gem_obj_to_vma(obj, &dev_priv->gtt.base);
> i915_gem_vma_destroy(vma);
>
> /* Since the unbound list is global, only move to that list if
> @@ -2652,6 +2651,26 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
> return 0;
> }
>
> +/**
> + * Unbinds an object from the global GTT aperture.
> + */
> +int
> +i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
> +{
> + struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> + struct i915_address_space *ggtt = &dev_priv->gtt.base;
> +
> + if (!i915_gem_obj_ggtt_bound(obj));
> + return 0;
> +
> + if (obj->pin_count)
> + return -EBUSY;
> +
> + BUG_ON(obj->pages == NULL);
> +
> + return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt));
> +}
> +
> int i915_gpu_idle(struct drm_device *dev)
> {
> drm_i915_private_t *dev_priv = dev->dev_private;
> @@ -3069,18 +3088,18 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
> * Finds free space in the GTT aperture and binds the object there.
> */
> static int
> -i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
> - unsigned alignment,
> - bool map_and_fenceable,
> - bool nonblocking)
> +i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> + struct i915_address_space *vm,
> + unsigned alignment,
> + bool map_and_fenceable,
> + bool nonblocking)
> {
> struct drm_device *dev = obj->base.dev;
> drm_i915_private_t *dev_priv = dev->dev_private;
> - struct i915_address_space *vm = &dev_priv->gtt.base;
> u32 size, fence_size, fence_alignment, unfenced_alignment;
> bool mappable, fenceable;
> - size_t gtt_max = map_and_fenceable ?
> - dev_priv->gtt.mappable_end : dev_priv->gtt.base.total;
> + size_t gtt_max =
> + map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
> struct i915_vma *vma;
> int ret;
>
> @@ -3125,15 +3144,18 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>
> i915_gem_object_pin_pages(obj);
>
> - vma = i915_gem_vma_create(obj, &dev_priv->gtt.base);
> + /* FIXME: For now we only ever use 1 VMA per object */
> + BUG_ON(!i915_is_ggtt(vm));
I'll change this to a WARN_ON for now ... My upfront apologies for the
rebase conflict.
> + WARN_ON(!list_empty(&obj->vma_list));
> +
> + vma = i915_gem_vma_create(obj, vm);
> if (IS_ERR(vma)) {
> i915_gem_object_unpin_pages(obj);
> return PTR_ERR(vma);
> }
>
> search_free:
> - ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
> - &vma->node,
> + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> size, alignment,
> obj->cache_level, 0, gtt_max);
> if (ret) {
> @@ -3158,18 +3180,25 @@ search_free:
>
> list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
> list_add_tail(&obj->mm_list, &vm->inactive_list);
> - list_add(&vma->vma_link, &obj->vma_list);
> +
> + /* Keep GGTT vmas first to make debug easier */
> + if (i915_is_ggtt(vm))
> + list_add(&vma->vma_link, &obj->vma_list);
> + else
> + list_add_tail(&vma->vma_link, &obj->vma_list);
>
> fenceable =
> + i915_is_ggtt(vm) &&
> i915_gem_obj_ggtt_size(obj) == fence_size &&
> (i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0;
>
> - mappable = i915_gem_obj_ggtt_offset(obj) + obj->base.size <=
> - dev_priv->gtt.mappable_end;
> + mappable =
> + i915_is_ggtt(vm) &&
> + vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end;
>
> obj->map_and_fenceable = mappable && fenceable;
>
> - trace_i915_gem_object_bind(obj, map_and_fenceable);
> + trace_i915_vma_bind(vma, map_and_fenceable);
> i915_gem_verify_gtt(dev);
> return 0;
>
> @@ -3335,7 +3364,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>
> list_for_each_entry(vma, &obj->vma_list, vma_link) {
> if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
> - ret = i915_gem_object_unbind(obj);
> + ret = i915_vma_unbind(vma);
> if (ret)
> return ret;
>
> @@ -3643,33 +3672,39 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
> bool map_and_fenceable,
> bool nonblocking)
> {
> + struct i915_vma *vma;
> int ret;
>
> if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
> return -EBUSY;
>
> - if (i915_gem_obj_ggtt_bound(obj)) {
> - if ((alignment && i915_gem_obj_ggtt_offset(obj) & (alignment - 1)) ||
> + WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
> +
> + vma = i915_gem_obj_to_vma(obj, vm);
> +
> + if (vma) {
> + if ((alignment &&
> + vma->node.start & (alignment - 1)) ||
> (map_and_fenceable && !obj->map_and_fenceable)) {
> WARN(obj->pin_count,
> "bo is already pinned with incorrect alignment:"
> " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
> " obj->map_and_fenceable=%d\n",
> - i915_gem_obj_ggtt_offset(obj), alignment,
> + i915_gem_obj_offset(obj, vm), alignment,
> map_and_fenceable,
> obj->map_and_fenceable);
> - ret = i915_gem_object_unbind(obj);
> + ret = i915_vma_unbind(vma);
If I read this correctly then we wont' call i915_gem_vma_destroy anymore
and so will leak the vma. Is that correct? If so I guess a new slab for
vmas could be handy to easily detect such bugs.
> if (ret)
> return ret;
> }
> }
>
> - if (!i915_gem_obj_ggtt_bound(obj)) {
> + if (!i915_gem_obj_bound(obj, vm)) {
> struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
>
> - ret = i915_gem_object_bind_to_gtt(obj, alignment,
> - map_and_fenceable,
> - nonblocking);
> + ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
> + map_and_fenceable,
> + nonblocking);
> if (ret)
> return ret;
>
> @@ -3961,6 +3996,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> struct drm_device *dev = obj->base.dev;
> drm_i915_private_t *dev_priv = dev->dev_private;
> + struct i915_vma *vma, *next;
>
> trace_i915_gem_object_destroy(obj);
>
> @@ -3968,15 +4004,21 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> i915_gem_detach_phys_object(dev, obj);
>
> obj->pin_count = 0;
> - if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
> - bool was_interruptible;
> + /* NB: 0 or 1 elements */
> + WARN_ON(!list_empty(&obj->vma_list) &&
> + !list_is_singular(&obj->vma_list));
> + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
> + int ret = i915_vma_unbind(vma);
> + if (WARN_ON(ret == -ERESTARTSYS)) {
> + bool was_interruptible;
>
> - was_interruptible = dev_priv->mm.interruptible;
> - dev_priv->mm.interruptible = false;
> + was_interruptible = dev_priv->mm.interruptible;
> + dev_priv->mm.interruptible = false;
>
> - WARN_ON(i915_gem_object_unbind(obj));
> + WARN_ON(i915_vma_unbind(vma));
>
> - dev_priv->mm.interruptible = was_interruptible;
> + dev_priv->mm.interruptible = was_interruptible;
> + }
Hm, I think we've released sufficient amounts of kernels and never hit the
above WARN_ON. Can I volunteer you for a follow-up patch to rip out the
code here (but keep the WARN_ON ofc)?
> }
>
> /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 33d85a4..9205a41 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -147,7 +147,7 @@ found:
> struct drm_i915_gem_object,
> exec_list);
> if (ret == 0)
> - ret = i915_gem_object_unbind(obj);
> + ret = i915_gem_object_ggtt_unbind(obj);
>
> list_del_init(&obj->exec_list);
> drm_gem_object_unreference(&obj->base);
> @@ -185,7 +185,7 @@ i915_gem_evict_everything(struct drm_device *dev)
> /* Having flushed everything, unbind() should never raise an error */
> list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list)
> if (obj->pin_count == 0)
> - WARN_ON(i915_gem_object_unbind(obj));
> + WARN_ON(i915_gem_object_ggtt_unbind(obj));
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index a23b80f..5e68f1e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -556,7 +556,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
> if ((entry->alignment &&
> obj_offset & (entry->alignment - 1)) ||
> (need_mappable && !obj->map_and_fenceable))
> - ret = i915_gem_object_unbind(obj);
> + ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm));
> else
> ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
> if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 92a8d27..032e9ef 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -360,17 +360,18 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>
> obj->map_and_fenceable =
> !i915_gem_obj_ggtt_bound(obj) ||
> - (i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
> + (i915_gem_obj_ggtt_offset(obj) +
> + obj->base.size <= dev_priv->gtt.mappable_end &&
> i915_gem_object_fence_ok(obj, args->tiling_mode));
>
> /* Rebind if we need a change of alignment */
> if (!obj->map_and_fenceable) {
> - u32 unfenced_alignment =
> + u32 unfenced_align =
> i915_gem_get_gtt_alignment(dev, obj->base.size,
> args->tiling_mode,
> false);
> - if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1))
> - ret = i915_gem_object_unbind(obj);
> + if (i915_gem_obj_ggtt_offset(obj) & (unfenced_align - 1))
> + ret = i915_gem_object_ggtt_unbind(obj);
> }
>
> if (ret == 0) {
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 7d283b5..931e2c6 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -33,47 +33,52 @@ TRACE_EVENT(i915_gem_object_create,
> TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
> );
>
> -TRACE_EVENT(i915_gem_object_bind,
> - TP_PROTO(struct drm_i915_gem_object *obj, bool mappable),
> - TP_ARGS(obj, mappable),
> +TRACE_EVENT(i915_vma_bind,
> + TP_PROTO(struct i915_vma *vma, bool mappable),
> + TP_ARGS(vma, mappable),
>
> TP_STRUCT__entry(
> __field(struct drm_i915_gem_object *, obj)
> + __field(struct i915_address_space *, vm)
> __field(u32, offset)
> __field(u32, size)
> __field(bool, mappable)
> ),
>
> TP_fast_assign(
> - __entry->obj = obj;
> - __entry->offset = i915_gem_obj_ggtt_offset(obj);
> - __entry->size = i915_gem_obj_ggtt_size(obj);
> + __entry->obj = vma->obj;
> + __entry->vm = vma->vm;
> + __entry->offset = vma->node.start;
> + __entry->size = vma->node.size;
> __entry->mappable = mappable;
> ),
>
> - TP_printk("obj=%p, offset=%08x size=%x%s",
> + TP_printk("obj=%p, offset=%08x size=%x%s vm=%p",
> __entry->obj, __entry->offset, __entry->size,
> - __entry->mappable ? ", mappable" : "")
> + __entry->mappable ? ", mappable" : "",
> + __entry->vm)
> );
>
> -TRACE_EVENT(i915_gem_object_unbind,
> - TP_PROTO(struct drm_i915_gem_object *obj),
> - TP_ARGS(obj),
> +TRACE_EVENT(i915_vma_unbind,
> + TP_PROTO(struct i915_vma *vma),
> + TP_ARGS(vma),
>
> TP_STRUCT__entry(
> __field(struct drm_i915_gem_object *, obj)
> + __field(struct i915_address_space *, vm)
> __field(u32, offset)
> __field(u32, size)
> ),
>
> TP_fast_assign(
> - __entry->obj = obj;
> - __entry->offset = i915_gem_obj_ggtt_offset(obj);
> - __entry->size = i915_gem_obj_ggtt_size(obj);
> + __entry->obj = vma->obj;
> + __entry->vm = vma->vm;
> + __entry->offset = vma->node.start;
> + __entry->size = vma->node.size;
> ),
>
> - TP_printk("obj=%p, offset=%08x size=%x",
> - __entry->obj, __entry->offset, __entry->size)
> + TP_printk("obj=%p, offset=%08x size=%x vm=%p",
> + __entry->obj, __entry->offset, __entry->size, __entry->vm)
> );
>
> TRACE_EVENT(i915_gem_object_change_domain,
> --
> 1.8.3.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list