[Intel-gfx] [PATCH 1/3] drm/i915: Move user fault tracking to a separate list
Daniel Vetter
daniel at ffwll.ch
Thu Oct 13 14:40:40 UTC 2016
On Tue, Oct 11, 2016 at 03:37:57PM +0100, Chris Wilson wrote:
> We want to decouple RPM and struct_mutex, but currently RPM has to walk
> the list of bound objects and remove userspace mmapping before we
> suspend (otherwise userspace may continue to access the GTT whilst it is
> powered down). This currently requires the struct_mutex to walk the
> bound_list, but if we move that to a separate list and lock we can take
> the first step towards removing the struct_mutex.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 4 ++--
> drivers/gpu/drm/i915/i915_drv.h | 20 +++++++++++-------
> drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++++++++++--------
> drivers/gpu/drm/i915/i915_gem_evict.c | 2 +-
> 4 files changed, 46 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 358663e833d6..d4ecc5283c2a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -186,11 +186,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> }
> if (obj->stolen)
> seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
> - if (obj->pin_display || obj->fault_mappable) {
> + if (obj->pin_display || !list_empty(&obj->userfault_link)) {
> char s[3], *t = s;
> if (obj->pin_display)
> *t++ = 'p';
> - if (obj->fault_mappable)
> + if (!list_empty(&obj->userfault_link))
> *t++ = 'f';
> *t = '\0';
> seq_printf(m, " (%s mappable)", s);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bf397b643cc0..72b3126c6c74 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1359,6 +1359,14 @@ struct i915_gem_mm {
> */
> struct list_head unbound_list;
>
> + /** Protects access to the userfault_list */
> + spinlock_t userfault_lock;
> +
> + /** List of all objects in gtt_space, currently mmaped by userspace.
> + * All objects within this list must also be on bound_list.
> + */
> + struct list_head userfault_list;
> +
> /** Usable portion of the GTT for GEM */
> unsigned long stolen_base; /* limited to low memory (32-bit) */
>
> @@ -2215,6 +2223,11 @@ struct drm_i915_gem_object {
> struct drm_mm_node *stolen;
> struct list_head global_list;
>
> + /**
> + * Whether the object is currently in the GGTT mmap.
> + */
> + struct list_head userfault_link;
> +
> /** Used in execbuf to temporarily hold a ref */
> struct list_head obj_exec_link;
>
> @@ -2242,13 +2255,6 @@ struct drm_i915_gem_object {
> */
> unsigned int madv:2;
>
> - /**
> - * Whether the current gtt mapping needs to be mappable (and isn't just
> - * mappable by accident). Track pin and fault separate for a more
> - * accurate mappable working set.
> - */
> - unsigned int fault_mappable:1;
> -
> /*
> * Is the object to be mapped as read-only to the GPU
> * Only honoured if hardware has relevant pte bit
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fdd496e6c081..91910ffe0964 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1850,7 +1850,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
> if (ret)
> goto err_unpin;
>
> - obj->fault_mappable = true;
> + spin_lock(&dev_priv->mm.userfault_lock);
> + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
> + spin_unlock(&dev_priv->mm.userfault_lock);
I think we need to add it to the list _before_ we start punching in new
ptes. At least if we want to decouple the locking and rpm refcounting a
lot more (which I think we should, I had magic locks that ensure ordering
on their own simply by being a BKL). But right now it's ok, so just a
bikeshed.
> +
> err_unpin:
> __i915_vma_unpin(vma);
> err_unlock:
> @@ -1918,36 +1921,52 @@ err:
> void
> i915_gem_release_mmap(struct drm_i915_gem_object *obj)
> {
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + bool zap = false;
> +
> /* Serialisation between user GTT access and our code depends upon
> * revoking the CPU's PTE whilst the mutex is held. The next user
> * pagefault then has to wait until we release the mutex.
> + *
> + * Note that RPM complicates somewhat by adding an additional
> + * requirement that operations to the GGTT be made holding the RPM
> + * wakeref.
> */
> lockdep_assert_held(&obj->base.dev->struct_mutex);
>
> - if (!obj->fault_mappable)
> + spin_lock(&i915->mm.userfault_lock);
> + if (!list_empty(&obj->userfault_link)) {
> + list_del_init(&obj->userfault_link);
> + zap = true;
> + }
> + spin_unlock(&i915->mm.userfault_lock);
> + if (!zap)
> return;
>
> drm_vma_node_unmap(&obj->base.vma_node,
> obj->base.dev->anon_inode->i_mapping);
>
> /* Ensure that the CPU's PTE are revoked and there are not outstanding
> - * memory transactions from userspace before we return. The TLB
> - * flushing implied above by changing the PTE above *should* be
> + * memory transactions from userspace before we return. The TLB
> + * flushing implied above by changing the PTE above *should* be
> * sufficient, an extra barrier here just provides us with a bit
> * of paranoid documentation about our requirement to serialise
> - * memory writes before touching registers / GSM.
> + * memory writes before touching registers / GSM.
> */
> wmb();
> -
> - obj->fault_mappable = false;
> }
>
> void
> i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_gem_object *obj;
> + struct drm_i915_gem_object *obj, *on;
/* protected by dev_priv->mm.userfault_lock */
Since I had to think about it for a while ;-)
> + struct list_head userfault_list;
> +
> + spin_lock(&dev_priv->mm.userfault_lock);
> + list_replace_init(&dev_priv->mm.userfault_list, &userfault_list);
> + spin_unlock(&dev_priv->mm.userfault_lock);
>
> - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> + list_for_each_entry_safe(obj, on, &userfault_list, userfault_link)
> i915_gem_release_mmap(obj);
> }
>
> @@ -4066,6 +4085,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
> int i;
>
> INIT_LIST_HEAD(&obj->global_list);
> + INIT_LIST_HEAD(&obj->userfault_link);
> for (i = 0; i < I915_NUM_ENGINES; i++)
> init_request_active(&obj->last_read[i],
> i915_gem_object_retire__read);
> @@ -4441,6 +4461,7 @@ int i915_gem_init(struct drm_device *dev)
> int ret;
>
> mutex_lock(&dev->struct_mutex);
> + spin_lock_init(&dev_priv->mm.userfault_lock);
>
> if (!i915.enable_execlists) {
> dev_priv->gt.resume = intel_legacy_submission_resume;
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 5b6f81c1dbca..ca175c3063ed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -55,7 +55,7 @@ mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
> if (WARN_ON(!list_empty(&vma->exec_list)))
> return false;
>
> - if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
> + if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))
Racy access of list_empty. Where's our friend COMPUTE_ONCE again? Since
there it's just an optimization there's no issue with racing, except for
diverging control flow in case gcc decides to recompute this.
I think that's the only one I spotted, with that addressed:
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
> return false;
>
> list_add(&vma->exec_list, unwind);
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
> --
> 2.9.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list