[PATCH v2 2/9] drm/i915: Drop user contexts on driver remove

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Tue Jul 21 15:23:24 UTC 2020


Contexts associated with open device file descriptors together with
their assigned address spaces are now closed on device file close.
On address space closure its underlying DMA mappings are revoked.
When the device is removed while being open, DMA API may then issue
a warning on device associated DMA mappings still active.  Moreover,
subsequent attempts to revoke those mappings while late closing the
device file descriptor may be judged by intel-iommu code as bugs and
result in kernel panic.

Since user contexts become useless after the device is no longer
available, drop them unconditionally on device removal.

<4> [36.900985] ------------[ cut here ]------------
<2> [36.901005] kernel BUG at drivers/iommu/intel-iommu.c:3717!
<4> [36.901105] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
<4> [36.901117] CPU: 0 PID: 39 Comm: kworker/u8:1 Tainted: G     U  W         5.7.0-rc5-CI-CI_DRM_8485+ #1
<4> [36.901133] Hardware name: Intel Corporation Elkhart Lake Embedded Platform/ElkhartLake LPDDR4x T3 CRB, BIOS EHLSFWI1.R00.1484.A00.1911290833 11/29/2019
<4> [36.901250] Workqueue: i915 __i915_vm_release [i915]
<4> [36.901264] RIP: 0010:intel_unmap+0x1f5/0x230
<4> [36.901274] Code: 01 e8 9f bc a9 ff 85 c0 74 09 80 3d df 60 09 01 00 74 19 65 ff 0d 13 12 97 7e 0f 85 fc fe ff ff e8 82 b0 95 ff e9 f2 fe ff ff <0f> 0b e8 d4 bd a9 ff 85 c0 75 de 48 c7 c2 10 84 2c 82 be 54 00 00
<4> [36.901302] RSP: 0018:ffffc900001ebdc0 EFLAGS: 00010246
<4> [36.901313] RAX: 0000000000000000 RBX: ffff8882561dd000 RCX: 0000000000000000
<4> [36.901324] RDX: 0000000000001000 RSI: 00000000ffd9c000 RDI: ffff888274c94000
<4> [36.901336] RBP: ffff888274c940b0 R08: 0000000000000000 R09: 0000000000000001
<4> [36.901348] R10: 000000000a25d812 R11: 00000000112af2d4 R12: ffff888252c70200
<4> [36.901360] R13: 00000000ffd9c000 R14: 0000000000001000 R15: ffff8882561dd010
<4> [36.901372] FS:  0000000000000000(0000) GS:ffff888278000000(0000) knlGS:0000000000000000
<4> [36.901386] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [36.901396] CR2: 00007f06def54950 CR3: 0000000255844000 CR4: 0000000000340ef0
<4> [36.901408] Call Trace:
<4> [36.901418]  ? process_one_work+0x1de/0x600
<4> [36.901494]  cleanup_page_dma+0x37/0x70 [i915]
<4> [36.901573]  free_pd+0x9/0x20 [i915]
<4> [36.901644]  gen8_ppgtt_cleanup+0x59/0xc0 [i915]
<4> [36.901721]  __i915_vm_release+0x14/0x30 [i915]
<4> [36.901733]  process_one_work+0x268/0x600
<4> [36.901744]  ? __schedule+0x307/0x8d0
<4> [36.901756]  worker_thread+0x37/0x380
<4> [36.901766]  ? process_one_work+0x600/0x600
<4> [36.901775]  kthread+0x140/0x160
<4> [36.901783]  ? kthread_park+0x80/0x80
<4> [36.901792]  ret_from_fork+0x24/0x50
<4> [36.901804] Modules linked in: mei_hdcp i915 x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ax88179_178a usbnet mii mei_me mei prime_numbers intel_lpss_pci
<4> [36.901857] ---[ end trace 52d1b4d81f8d1ea7 ]---

v2: Iterate over user file list maintained by DRM layer, not over our
    list of contexts (Michał),
  - split out explicit address space removal to a separate patch,
  - refactor existing functions for enhanced readability and reduced
    code duplication,
  - move WARN on context list not empty to driver remove code path,
  - take care of adding mock files used by selftest to the DRM list of
    user files so their contexts are also removed,
  - no need to call i915_gem_driver_release__contexts() again on device
    release.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
Cc: Michał Winiarski <michal.winiarski at intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 35 ++++++++++-----
 drivers/gpu/drm/i915/gem/i915_gem_context.h |  1 +
 drivers/gpu/drm/i915/i915_drv.h             |  3 ++
 drivers/gpu/drm/i915/i915_gem.c             | 47 +++++++++++++++++++--
 4 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d0bdb6d447ed..d5e3ba7c4097 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -988,16 +988,37 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	return err;
 }
 
+static int context_remove(struct drm_i915_file_private *file_priv,
+			  unsigned long idx)
+{
+	struct i915_gem_context *ctx;
+
+	ctx = xa_erase(&file_priv->context_xa, idx);
+	if (!ctx)
+		return -ENOENT;
+
+	context_close(ctx);
+	return 0;
+}
+
+void i915_gem_context_remove(struct drm_i915_file_private *file_priv)
+{
+	unsigned long idx;
+	void *ptr;
+
+	xa_for_each(&file_priv->context_xa, idx, ptr)
+		context_remove(file_priv, idx);
+}
+
 void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_private *i915 = file_priv->dev_priv;
 	struct i915_address_space *vm;
-	struct i915_gem_context *ctx;
 	unsigned long idx;
 
-	xa_for_each(&file_priv->context_xa, idx, ctx)
-		context_close(ctx);
+	i915_gem_context_remove(file_priv);
+
 	xa_destroy(&file_priv->context_xa);
 
 	xa_for_each(&file_priv->vm_xa, idx, vm)
@@ -2383,7 +2404,6 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_context_destroy *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
 
 	if (args->pad != 0)
 		return -EINVAL;
@@ -2391,12 +2411,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!args->ctx_id)
 		return -ENOENT;
 
-	ctx = xa_erase(&file_priv->context_xa, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
-	context_close(ctx);
-	return 0;
+	return context_remove(file_priv, args->ctx_id);
 }
 
 static int get_sseu(struct i915_gem_context *ctx,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index a133f92bbedb..ef40d548a02d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -114,6 +114,7 @@ void i915_gem_driver_release__contexts(struct drm_i915_private *i915);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file);
+void i915_gem_context_remove(struct drm_i915_file_private *file);
 void i915_gem_context_close(struct drm_file *file);
 
 void i915_gem_context_release(struct kref *ctx_ref);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e3de3f2ad7ae..c44caaa7e2b6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1188,6 +1188,9 @@ struct drm_i915_private {
 	struct intel_gt gt;
 
 	struct {
+		/* for serialization of GEM postclose against GEM remove */
+		struct mutex fpriv_mutex;
+
 		struct i915_gem_contexts {
 			spinlock_t lock; /* locks list */
 			struct list_head list;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a79fcd48d950..ec7e13b41556 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1118,6 +1118,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		mkwrite_device_info(dev_priv)->page_sizes =
 			I915_GTT_PAGE_SIZE_4K;
 
+	mutex_init(&dev_priv->gem.fpriv_mutex);
+
 	ret = i915_gem_init_userptr(dev_priv);
 	if (ret)
 		return ret;
@@ -1196,11 +1198,45 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
 	i915_gem_driver_unregister__shrinker(i915);
 }
 
+static void i915_gem_remove_user_resources(struct drm_i915_private *i915)
+{
+	struct drm_i915_file_private *fpriv;
+
+	mutex_lock(&i915->gem.fpriv_mutex);
+	do {
+		struct i915_gem_context *ctx;
+
+		fpriv = NULL;
+
+		/* find a file from contexts list */
+		spin_lock(&i915->gem.contexts.lock);
+		list_for_each_entry(ctx, &i915->gem.contexts.list, link) {
+			if (i915_gem_context_is_closed(ctx) ||
+			    IS_ERR_OR_NULL(ctx->file_priv))
+				continue;
+
+			fpriv = ctx->file_priv;
+			break;
+		}
+		spin_unlock(&i915->gem.contexts.lock);
+
+		if (fpriv)
+			i915_gem_context_remove(fpriv);
+	} while (fpriv);
+	mutex_unlock(&i915->gem.fpriv_mutex);
+
+	i915_gem_driver_release__contexts(i915);
+	drm_WARN_ON(&i915->drm, !list_empty(&i915->gem.contexts.list));
+}
+
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 {
 	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
 
 	i915_gem_suspend_late(dev_priv);
+
+	i915_gem_remove_user_resources(dev_priv);
+
 	intel_gt_driver_remove(&dev_priv->gt);
 	dev_priv->uabi_engines = RB_ROOT;
 
@@ -1212,8 +1248,6 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 
 void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
-	i915_gem_driver_release__contexts(dev_priv);
-
 	intel_gt_driver_release(&dev_priv->gt);
 
 	intel_wa_list_free(&dev_priv->gt_wa_list);
@@ -1221,9 +1255,9 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 	intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
 	i915_gem_cleanup_userptr(dev_priv);
 
-	i915_gem_drain_freed_objects(dev_priv);
+	mutex_destroy(&dev_priv->gem.fpriv_mutex);
 
-	drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list));
+	i915_gem_drain_freed_objects(dev_priv);
 }
 
 static void i915_gem_init__mm(struct drm_i915_private *i915)
@@ -1304,8 +1338,11 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
 void i915_gem_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_request *request;
 
+	mutex_lock(&i915->gem.fpriv_mutex);
+
 	i915_gem_context_close(file);
 
 	/* Clean up our request list when the client is going away, so that
@@ -1318,6 +1355,8 @@ void i915_gem_postclose(struct drm_device *dev, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 
 	kfree_rcu(file_priv, rcu);
+
+	mutex_unlock(&i915->gem.fpriv_mutex);
 }
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
-- 
2.21.1



More information about the Intel-gfx-trybot mailing list