[Intel-gfx] [PATCH] drm/i915: Replace obj->pin_global with obj->frontbuffer
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 23 17:03:44 UTC 2019
Quoting Ville Syrjälä (2019-08-23 17:48:49)
> On Fri, Aug 23, 2019 at 05:20:41PM +0100, Chris Wilson wrote:
> > obj->pin_global was original used as a means to keep the shrinker off
> > the active scanout, but we use the vma->pin_count itself for that and
> > the obj->frontbuffer to delay shrinking active framebuffers. The other
> > role that obj->pin_global gained was for spotting display objects inside
> > GEM and working harder to keep those coherent; for which we can again
> > simply inspect obj->frontbuffer directly.
> >
> > Coming up next, we will want to manipulate the pin_global counter
> > outside of the principle locks, so would need to make pin_global atomic.
> > However, since obj->frontbuffer is already managed atomically, it makes
> > sense to use that the primary key for display objects instead of having
> > pin_global.
>
> The difference being that pin_global was only incremented while active
> scanout was happening, but obj->frontbuffer simply indicates that the
> obj is attached to at least one framebuffer regardless of whether it's
> ever scanned out or not.
cpu_write_needs_clflush() is the main culprit there,
i915_gem_pwrite_ioctl
i915_gem_set_domain_ioctl(CPU, CPU)
which I hope we truly do not have to worry about are being heavily used
on framebuffer objects.
flush_if_display() is only used on framebuffer objects
intel_user_framebuffer_dirty (DIRTYFB_IOCTL)
i915_gem_sw_finish_ioctl (which should not be used!)
and then they only actually do anything if the CPU cache is dirty from
either of the above ioctls or set-cache-level, or after rendering with
EXEC_OBJECT_WRITE into an LLC object.
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> > ---
> > .../gpu/drm/i915/display/intel_frontbuffer.c | 13 +++++--
> > drivers/gpu/drm/i915/gem/i915_gem_domain.c | 34 ++++++-------------
> > drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 +-
> > .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 --
> > drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 15 +++-----
> > drivers/gpu/drm/i915/i915_debugfs.c | 12 ++-----
> > 6 files changed, 29 insertions(+), 50 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > index 719379774fa5..43047b676b7b 100644
> > --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> > @@ -220,11 +220,18 @@ static void frontbuffer_release(struct kref *ref)
> > {
> > struct intel_frontbuffer *front =
> > container_of(ref, typeof(*front), ref);
> > + struct drm_i915_gem_object *obj = front->obj;
> > + struct i915_vma *vma;
> >
> > - front->obj->frontbuffer = NULL;
> > - spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock);
> > + spin_lock(&obj->vma.lock);
> > + for_each_ggtt_vma(vma, obj)
> > + vma->display_alignment = I915_GTT_PAGE_SIZE;
> > + spin_unlock(&obj->vma.lock);
> >
> > - i915_gem_object_put(front->obj);
> > + obj->frontbuffer = NULL;
> > + spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock);
> > +
> > + i915_gem_object_put(obj);
> > kfree(front);
> > }
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > index 9c58e8fac1d9..0341b3da930a 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > @@ -27,7 +27,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
> >
> > void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
> > {
> > - if (!READ_ONCE(obj->pin_global))
> > + if (!READ_ONCE(obj->frontbuffer))
> > return;
>
> So we maybe get a few more flushes now?
Looking at the call paths, a few, but realistically only on paths that
would already be flushing their framebuffer objects.
> > i915_gem_object_lock(obj);
> > @@ -422,12 +422,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> >
> > assert_object_held(obj);
> >
> > - /* Mark the global pin early so that we account for the
> > - * display coherency whilst setting up the cache domains.
> > - */
> > - obj->pin_global++;
> > -
> > - /* The display engine is not coherent with the LLC cache on gen6. As
> > + /*
> > + * The display engine is not coherent with the LLC cache on gen6. As
> > * a result, we make sure that the pinning that is about to occur is
> > * done with uncached PTEs. This is lowest common denominator for all
> > * chipsets.
> > @@ -439,12 +435,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> > ret = i915_gem_object_set_cache_level(obj,
> > HAS_WT(to_i915(obj->base.dev)) ?
> > I915_CACHE_WT : I915_CACHE_NONE);
> > - if (ret) {
> > - vma = ERR_PTR(ret);
> > - goto err_unpin_global;
> > - }
> > + if (ret)
> > + return ERR_PTR(ret);
> >
> > - /* As the user may map the buffer once pinned in the display plane
> > + /*
> > + * As the user may map the buffer once pinned in the display plane
> > * (e.g. libkms for the bootup splash), we have to ensure that we
> > * always use map_and_fenceable for all scanout buffers. However,
> > * it may simply be too big to fit into mappable, in which case
> > @@ -461,22 +456,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> > if (IS_ERR(vma))
> > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
> > if (IS_ERR(vma))
> > - goto err_unpin_global;
> > + return vma;
> >
> > vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
> >
> > __i915_gem_object_flush_for_display(obj);
> >
> > - /* It should now be out of any other write domains, and we can update
> > + /*
> > + * It should now be out of any other write domains, and we can update
> > * the domain values for our changes.
> > */
> > obj->read_domains |= I915_GEM_DOMAIN_GTT;
> >
> > return vma;
> > -
> > -err_unpin_global:
> > - obj->pin_global--;
> > - return vma;
> > }
> >
> > static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
> > @@ -514,12 +506,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
> >
> > assert_object_held(obj);
> >
> > - if (WARN_ON(obj->pin_global == 0))
> > - return;
> > -
> > - if (--obj->pin_global == 0)
> > - vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
> > -
> > /* Bump the LRU to try and avoid premature eviction whilst flipping */
> > i915_gem_object_bump_inactive_ggtt(obj);
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > index 5efb9936e05b..d6005cad9d5c 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > @@ -406,7 +406,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
> > if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
> > return true;
> >
> > - return obj->pin_global; /* currently in use by HW, keep flushed */
> > + /* Currently in use by HW (display engine)? Keep flushed. */
> > + return READ_ONCE(obj->frontbuffer);
> > }
> >
> > static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index ede0eb4218a8..13b9dc0e1a89 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -152,8 +152,6 @@ struct drm_i915_gem_object {
> >
> > /** Count of VMA actually bound by this object */
> > atomic_t bind_count;
> > - /** Count of how many global VMA are currently pinned for use by HW */
> > - unsigned int pin_global;
> >
> > struct {
> > struct mutex lock; /* protects the pages and their use */
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> > index edd21d14e64f..4e55cfc2b0dc 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> > @@ -61,7 +61,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
> > if (!i915_gem_object_is_shrinkable(obj))
> > return false;
> >
> > - /* Only report true if by unbinding the object and putting its pages
> > + /*
> > + * Only report true if by unbinding the object and putting its pages
> > * we can actually make forward progress towards freeing physical
> > * pages.
> > *
> > @@ -72,16 +73,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
> > if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
> > return false;
> >
> > - /* If any vma are "permanently" pinned, it will prevent us from
> > - * reclaiming the obj->mm.pages. We only allow scanout objects to claim
> > - * a permanent pin, along with a few others like the context objects.
> > - * To simplify the scan, and to avoid walking the list of vma under the
> > - * object, we just check the count of its permanently pinned.
> > - */
> > - if (READ_ONCE(obj->pin_global))
> > - return false;
>
> Are we giving the shrinker false hope here when it comes to the actual
> frontbuffer?
Only under duress, as we have
if (!(shrink & I915_SHRINK_ACTIVE) &&
i915_gem_object_is_framebuffer(obj))
continue;
and so don't try any framebuffers until kswapd or oom.
> > - /* We can only return physical pages to the system if we can either
> > + /*
> > + * We can only return physical pages to the system if we can either
> > * discard the contents (because the user has marked them as being
> > * purgeable) or if we can move their contents out to swap.
> > */
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index e103fcba6435..b90371844689 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -77,11 +77,6 @@ static int i915_capabilities(struct seq_file *m, void *data)
> > return 0;
> > }
> >
> > -static char get_pin_flag(struct drm_i915_gem_object *obj)
> > -{
> > - return obj->pin_global ? 'p' : ' ';
> > -}
> > -
> > static char get_tiling_flag(struct drm_i915_gem_object *obj)
> > {
> > switch (i915_gem_object_get_tiling(obj)) {
> > @@ -140,9 +135,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> > struct i915_vma *vma;
> > int pin_count = 0;
> >
> > - seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s",
> > + seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s",
> > &obj->base,
> > - get_pin_flag(obj),
> > get_tiling_flag(obj),
> > get_global_flag(obj),
> > get_pin_mapped_flag(obj),
> > @@ -221,8 +215,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> > seq_printf(m, " (pinned x %d)", pin_count);
> > if (obj->stolen)
> > seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
> > - if (obj->pin_global)
> > - seq_printf(m, " (global)");
> > + if (READ_ONCE(obj->frontbuffer))
> > + seq_printf(m, " (front)");
>
> A bit confusing to say it's "front" when in fact it can be any random backbuffer.
> Maybe should be just "fb" or something?
Having just spotted we already have i915_gem_object_is_framebuffer(), fb
is consistent.
-Chris
More information about the Intel-gfx
mailing list