[Intel-gfx] [PATCH] drm/i915: Clean up associated VMAs on context destruction

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Sep 11 07:31:33 PDT 2015


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Prevent leaking VMAs and PPGTT VMs when objects are imported
via flink.

Scenario is that any VMAs created by the importer will be left
dangling after the importer exits, or destroys the PPGTT context
with which they are associated.

This is caused by object destruction not running when the
importer closes the buffer object handle due the reference held
by the exporter. This also leaks the VM since the VMA has a
reference on it.

In practice these leaks can be observed by stopping and starting
the X server on a kernel with fbcon compiled in. Every time
X server exits another VMA will be leaked against the fbcon's
frame buffer object.

Also on systems where flink buffer sharing is used extensively,
like Android, this leak has even more serious consequences.

This version is takes a general approach from the  earlier work
by Rafael Barabalho (drm/i915: Clean-up PPGTT on context
destruction) and tries to incorporate the subsequent discussion
between Chris Wilson and Daniel Vetter.

On context destruction a VM is marked as closed and a worker
thread scheduled to unbind all inactive VMAs for this VM. At
the same time, active VMAs retired on this closed VM are
unbound immediately.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Testcase: igt/gem_ppgtt.c
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Rafael Barbalho <rafael.barbalho at intel.com>
Cc: Michel Thierry <michel.thierry at intel.com>
---

Ok another attempt guys. Does it have any merrit as a stop gap
solution until that full GEM VMA rewrite by Chris lands?
---
 drivers/gpu/drm/i915/i915_gem.c         |  8 +++--
 drivers/gpu/drm/i915/i915_gem_context.c | 56 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c     |  3 ++
 drivers/gpu/drm/i915/i915_gem_gtt.h     |  5 +++
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 41263cd4170c..1dc005758a96 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2372,7 +2372,7 @@ i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
 static void
 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 {
-	struct i915_vma *vma;
+	struct i915_vma *vma, *next;
 
 	RQ_BUG_ON(obj->last_read_req[ring] == NULL);
 	RQ_BUG_ON(!(obj->active & (1 << ring)));
@@ -2394,9 +2394,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 	list_move_tail(&obj->global_list,
 		       &to_i915(obj->base.dev)->mm.bound_list);
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
 		if (!list_empty(&vma->mm_list))
 			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
+
+		/* Unbind VMAs from contexts closed in the interim. */
+		if (vma->vm->closed)
+			WARN_ON(i915_vma_unbind(vma));
 	}
 
 	i915_gem_request_assign(&obj->last_fenced_req, NULL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 74aa0c9929ba..6628b8fe1976 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -133,6 +133,55 @@ static int get_context_size(struct drm_device *dev)
 	return ret;
 }
 
+static void
+i915_gem_context_cleanup_worker(struct work_struct *work)
+{
+	struct i915_hw_ppgtt *ppgtt = container_of(work, typeof(*ppgtt),
+						   cleanup_work);
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_vma *vma, *next;
+
+	mutex_lock(&dev->struct_mutex);
+
+	list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
+				 mm_list) {
+		bool was_interruptible;
+
+		was_interruptible = dev_priv->mm.interruptible;
+		dev_priv->mm.interruptible = false;
+		WARN_ON(i915_vma_unbind(vma));
+
+		dev_priv->mm.interruptible = was_interruptible;
+	}
+
+	i915_ppgtt_put(ppgtt);
+
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void i915_gem_context_clean(struct intel_context *ctx)
+{
+	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
+
+	if (!ppgtt)
+		return;
+
+	/*
+	 * Signal that whis context, or better say the underlying VM is
+	 * getting closed. Since the VM is not expected to outlive the context
+	 * this is safe to do.
+	 */
+	ppgtt->base.closed = true;
+
+	/*
+	 * Unbind all inactive VMAs for this VM, but do it asynchronously.
+	 */
+	INIT_WORK(&ppgtt->cleanup_work, i915_gem_context_cleanup_worker);
+	i915_ppgtt_get(ppgtt);
+	schedule_work(&ppgtt->cleanup_work);
+}
+
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -142,6 +191,13 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	if (i915.enable_execlists)
 		intel_lr_context_free(ctx);
 
+	/*
+	 * This context is going away and we need to remove all VMAs still
+	 * around. This is to handle imported shared objects for which
+	 * destructor did not run when their handles were closed.
+	 */
+	i915_gem_context_clean(ctx);
+
 	i915_ppgtt_put(ctx->ppgtt);
 
 	if (ctx->legacy_hw_ctx.rcs_state)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 87862813cfde..b8e54ce579a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3178,6 +3178,9 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
 		return ERR_PTR(-EINVAL);
 
+	if (WARN_ON(vm->closed))
+		return ERR_PTR(-EINVAL);
+
 	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 82750073d5b3..aa079ee67c16 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -275,6 +275,9 @@ struct i915_address_space {
 	u64 start;		/* Start offset always 0 for dri2 */
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 
+	/** Address space is being destroyed? */
+	bool closed;
+
 	struct i915_page_scratch *scratch_page;
 	struct i915_page_table *scratch_pt;
 	struct i915_page_directory *scratch_pd;
@@ -371,6 +374,8 @@ struct i915_hw_ppgtt {
 
 	struct drm_i915_file_private *file_priv;
 
+	struct work_struct cleanup_work;
+
 	gen6_pte_t __iomem *pd_addr;
 
 	int (*enable)(struct i915_hw_ppgtt *ppgtt);
-- 
2.5.1



More information about the Intel-gfx mailing list