[Intel-gfx] [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
Chris Wilson
chris at chris-wilson.co.uk
Wed Jan 24 10:37:48 UTC 2018
Quoting Tvrtko Ursulin (2018-01-24 10:32:16)
>
> On 19/01/2018 15:23, Chris Wilson wrote:
> > When we finally decide the gpu is idle, that is a good time to shrink
> > our kmem_caches.
> >
> > v3: Defer until an rcu grace period after we idle.
> >
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 65 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 7f0684ccc724..6a8fbcae835b 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
> > work_pending(&i915->gt.idle_work.work));
> > }
> >
> > +static void shrink_caches(struct drm_i915_private *i915)
> > +{
> > + /*
> > + * kmem_cache_shrink() discards empty slabs and reorders partially
> > + * filled slabs to prioritise allocating from the mostly full slabs,
> > + * with the aim of reducing fragmentation.
> > + */
> > + kmem_cache_shrink(i915->priorities);
> > + kmem_cache_shrink(i915->dependencies);
> > + kmem_cache_shrink(i915->requests);
> > + kmem_cache_shrink(i915->luts);
> > + kmem_cache_shrink(i915->vmas);
> > + kmem_cache_shrink(i915->objects);
> > +}
> > +
> > +struct sleep_rcu_work {
> > + struct drm_i915_private *i915;
> > + struct rcu_head rcu;
> > + struct work_struct work;
> > + u32 epoch;
> > +};
> > +
> > +static void __sleep_work(struct work_struct *work)
> > +{
> > + struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
> > + struct drm_i915_private *i915 = s->i915;
> > + u32 epoch = s->epoch;
> > +
> > + kfree(s);
> > + if (epoch == READ_ONCE(i915->gt.epoch))
> > + shrink_caches(i915);
> > +}
> > +
> > +static void __sleep_rcu(struct rcu_head *rcu)
> > +{
> > + struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
> > + struct drm_i915_private *i915 = s->i915;
> > +
> > + if (s->epoch == READ_ONCE(i915->gt.epoch)) {
> > + INIT_WORK(&s->work, __sleep_work);
> > + queue_work(i915->wq, &s->work);
> > + } else {
> > + kfree(s);
> > + }
> > +}
> > +
> > static void
> > i915_gem_idle_work_handler(struct work_struct *work)
> > {
> > struct drm_i915_private *dev_priv =
> > container_of(work, typeof(*dev_priv), gt.idle_work.work);
> > bool rearm_hangcheck;
> > + u32 epoch = 0;
> > ktime_t end;
> >
> > if (!READ_ONCE(dev_priv->gt.awake))
> > @@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
> > GEM_BUG_ON(!dev_priv->gt.awake);
> > dev_priv->gt.awake = false;
> > rearm_hangcheck = false;
> > + epoch = dev_priv->gt.epoch;
> >
> > if (INTEL_GEN(dev_priv) >= 6)
> > gen6_rps_idle(dev_priv);
> > @@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
> > GEM_BUG_ON(!dev_priv->gt.awake);
> > i915_queue_hangcheck(dev_priv);
> > }
> > +
> > + /*
> > + * When we are idle, it is an opportune time to reap our caches.
> > + * However, we have many objects that utilise RCU and the ordered
> > + * i915->wq that this work is executing on. To try and flush any
> > + * pending frees now we are idle, we first wait for an RCU grace
> > + * period, and then queue a task (that will run last on the wq) to
> > + * shrink and re-optimize the caches.
> > + */
> > + if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
>
> Theoretically this can be true on epoch wrap-around, when trylock
> failed. It's one in four billion busy transitions but it could be just
> worth handling it explicitly. Simplest probably to ensure gt.epoch is
> never zero when incrementing?
I was working on the principle that a u32 wraparound takes at least
100ms * 2^32, or about 326 years. :)
-Chris
More information about the Intel-gfx
mailing list