[Intel-gfx] [PATCH 3/3] drm/i915: Use a task to cancel the userptr on invalidate_range
Chris Wilson
chris at chris-wilson.co.uk
Tue Jun 30 09:55:33 PDT 2015
Whilst discussing possible ways to trigger an invalidate_range on a
userptr with an aliased GGTT mmapping (and so cause a struct_mutex
deadlock), the conclusion is that we can, and we must, prevent any
possible deadlock by avoiding taking the mutex at all during
invalidate_range. This has numerous advantages all of which stem from
avoid the sleeping function from inside the unknown context. In
particular, it simplifies the invalidate_range because we no longer
have to juggle the spinlock/mutex and can just hold the spinlock
for the entire walk. To compensate, we have to make get_pages a bit more
complicated in order to serialise with a pending cancel_userptr worker.
As we hold the struct_mutex, we have no choice but to return EAGAIN and
hope that the worker is then flushed before we retry after reacquiring
the struct_mutex.
The important caveat is that the invalidate_range itself is no longer
synchronous. There exists a small but definite period in time in which
the old PTE remain accessible via the GPU. Note however that the
physical pages themselves are not invalidated by the mmu_notifier, just
the CPU view of the address space. The impact should be limited to a
delay in pages being flushed, rather than a possibility of writing to
the wrong pages.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: MichaĆ Winiarski <michal.winiarski at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/i915_gem_userptr.c | 142 +++++++++++++-------------------
1 file changed, 56 insertions(+), 86 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d3213fdefafc..9056aa5b00f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -50,7 +50,6 @@ struct i915_mmu_notifier {
struct mmu_notifier mn;
struct rb_root objects;
struct list_head linear;
- unsigned long serial;
bool has_linear;
};
@@ -59,14 +58,16 @@ struct i915_mmu_object {
struct interval_tree_node it;
struct list_head link;
struct drm_i915_gem_object *obj;
+ struct work_struct work;
bool active;
bool is_linear;
};
-static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
+static void __cancel_userptr__worker(struct work_struct *work)
{
+ struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
+ struct drm_i915_gem_object *obj = mo->obj;
struct drm_device *dev = obj->base.dev;
- unsigned long end;
mutex_lock(&dev->struct_mutex);
/* Cancel any active worker and force us to re-evaluate gup */
@@ -89,46 +90,26 @@ static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
dev_priv->mm.interruptible = was_interruptible;
}
- end = obj->userptr.ptr + obj->base.size;
-
drm_gem_object_unreference(&obj->base);
mutex_unlock(&dev->struct_mutex);
-
- return end;
}
-static void *invalidate_range__linear(struct i915_mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static unsigned long cancel_userptr(struct i915_mmu_object *mo)
{
- struct i915_mmu_object *mo;
- unsigned long serial;
-
-restart:
- serial = mn->serial;
- list_for_each_entry(mo, &mn->linear, link) {
- struct drm_i915_gem_object *obj;
-
- if (mo->it.last < start || mo->it.start > end)
- continue;
-
- obj = mo->obj;
-
- if (!mo->active ||
- !kref_get_unless_zero(&obj->base.refcount))
- continue;
-
- spin_unlock(&mn->lock);
-
- cancel_userptr(obj);
-
- spin_lock(&mn->lock);
- if (serial != mn->serial)
- goto restart;
- }
+ unsigned long end = mo->obj->userptr.ptr + mo->obj->base.size;
+
+ /* The mmu_object is released late when destroying the
+ * GEM object so it is entirely possible to gain a
+ * reference on an object in the process of being freed
+ * since our serialisation is via the spinlock and not
+ * the struct_mutex - and consequently use it after it
+ * is freed and then double free it.
+ */
+ if (mo->active &&
+ kref_get_unless_zero(&mo->obj->base.refcount))
+ schedule_work(&mo->work);
- return NULL;
+ return end;
}
static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
@@ -136,45 +117,32 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
unsigned long start,
unsigned long end)
{
- struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
- struct interval_tree_node *it = NULL;
- unsigned long next = start;
- unsigned long serial = 0;
-
- end--; /* interval ranges are inclusive, but invalidate range is exclusive */
- while (next < end) {
- struct drm_i915_gem_object *obj = NULL;
-
- spin_lock(&mn->lock);
- if (mn->has_linear)
- it = invalidate_range__linear(mn, mm, start, end);
- else if (serial == mn->serial)
- it = interval_tree_iter_next(it, next, end);
- else
- it = interval_tree_iter_first(&mn->objects, start, end);
- if (it != NULL) {
- struct i915_mmu_object *mo =
- container_of(it, struct i915_mmu_object, it);
-
- /* The mmu_object is released late when destroying the
- * GEM object so it is entirely possible to gain a
- * reference on an object in the process of being freed
- * since our serialisation is via the spinlock and not
- * the struct_mutex - and consequently use it after it
- * is freed and then double free it.
- */
- if (mo->active &&
- kref_get_unless_zero(&mo->obj->base.refcount))
- obj = mo->obj;
+ struct i915_mmu_notifier *mn =
+ container_of(_mn, struct i915_mmu_notifier, mn);
+ struct i915_mmu_object *mo;
+
+ /* interval ranges are inclusive, but invalidate range is exclusive */
+ end--;
- serial = mn->serial;
+ spin_lock(&mn->lock);
+ if (mn->has_linear) {
+ list_for_each_entry(mo, &mn->linear, link) {
+ if (mo->it.last < start || mo->it.start > end)
+ continue;
+
+ cancel_userptr(mo);
}
- spin_unlock(&mn->lock);
- if (obj == NULL)
- return;
+ } else {
+ struct interval_tree_node *it;
- next = cancel_userptr(obj);
+ it = interval_tree_iter_first(&mn->objects, start, end);
+ while (it) {
+ mo = container_of(it, struct i915_mmu_object, it);
+ start = cancel_userptr(mo);
+ it = interval_tree_iter_next(it, start, end);
+ }
}
+ spin_unlock(&mn->lock);
}
static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
@@ -194,7 +162,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
spin_lock_init(&mn->lock);
mn->mn.ops = &i915_gem_userptr_notifier;
mn->objects = RB_ROOT;
- mn->serial = 1;
INIT_LIST_HEAD(&mn->linear);
mn->has_linear = false;
@@ -208,12 +175,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
return mn;
}
-static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mn)
-{
- if (++mn->serial == 0)
- mn->serial = 1;
-}
-
static int
i915_mmu_notifier_add(struct drm_device *dev,
struct i915_mmu_notifier *mn,
@@ -260,10 +221,9 @@ i915_mmu_notifier_add(struct drm_device *dev,
} else
interval_tree_insert(&mo->it, &mn->objects);
- if (ret == 0) {
+ if (ret == 0)
list_add(&mo->link, &mn->linear);
- __i915_mmu_notifier_update_serial(mn);
- }
+
spin_unlock(&mn->lock);
mutex_unlock(&dev->struct_mutex);
@@ -291,7 +251,6 @@ i915_mmu_notifier_del(struct i915_mmu_notifier *mn,
mn->has_linear = i915_mmu_notifier_has_linear(mn);
else
interval_tree_remove(&mo->it, &mn->objects);
- __i915_mmu_notifier_update_serial(mn);
spin_unlock(&mn->lock);
}
@@ -358,6 +317,7 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
mo->it.start = obj->userptr.ptr;
mo->it.last = mo->it.start + obj->base.size - 1;
mo->obj = obj;
+ INIT_WORK(&mo->work, __cancel_userptr__worker);
ret = i915_mmu_notifier_add(obj->base.dev, mn, mo);
if (ret) {
@@ -546,10 +506,12 @@ err:
return ret;
}
-static void
+static int
__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
bool value)
{
+ int ret = 0;
+
/* During mm_invalidate_range we need to cancel any userptr that
* overlaps the range being invalidated. Doing so requires the
* struct_mutex, and that risks recursion. In order to cause
@@ -562,12 +524,20 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
*/
#if defined(CONFIG_MMU_NOTIFIER)
if (obj->userptr.mmu_object == NULL)
- return;
+ return 0;
spin_lock(&obj->userptr.mmu_object->mn->lock);
- obj->userptr.mmu_object->active = value;
+ /* In order to serialise get_pages with an outstanding
+ * cancel_userptr, we must drop the struct_mutex and try again.
+ */
+ if (!value || !work_pending(&obj->userptr.mmu_object->work))
+ obj->userptr.mmu_object->active = value;
+ else
+ ret = -EAGAIN;
spin_unlock(&obj->userptr.mmu_object->mn->lock);
#endif
+
+ return ret;
}
static void
--
2.1.4
More information about the Intel-gfx
mailing list