[PATCH v2 1/7] drm/i915: Drop user contexts on driver remove

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Mon Jul 6 06:29:05 UTC 2020


Contexts associated with open device file descriptors together with
their assigned address spaces are now closed on device file close.
On address space closure its underlying DMA mappings are revoked.
When the device is removed while being open, DMA API may then issue
a warning on device associated DMA mappings still active.  Moreover,
subsequent attempts to revoke those mappings while late closing the
device file descriptor may be judged by intel-iommu code as bugs and
result in kernel panic.

Since user contexts become useless after the device is no longer
available, drop them unconditionally on device removal.

<4> [36.900985] ------------[ cut here ]------------
<2> [36.901005] kernel BUG at drivers/iommu/intel-iommu.c:3717!
<4> [36.901105] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
<4> [36.901117] CPU: 0 PID: 39 Comm: kworker/u8:1 Tainted: G     U  W         5.7.0-rc5-CI-CI_DRM_8485+ #1
<4> [36.901133] Hardware name: Intel Corporation Elkhart Lake Embedded Platform/ElkhartLake LPDDR4x T3 CRB, BIOS EHLSFWI1.R00.1484.A00.1911290833 11/29/2019
<4> [36.901250] Workqueue: i915 __i915_vm_release [i915]
<4> [36.901264] RIP: 0010:intel_unmap+0x1f5/0x230
<4> [36.901274] Code: 01 e8 9f bc a9 ff 85 c0 74 09 80 3d df 60 09 01 00 74 19 65 ff 0d 13 12 97 7e 0f 85 fc fe ff ff e8 82 b0 95 ff e9 f2 fe ff ff <0f> 0b e8 d4 bd a9 ff 85 c0 75 de 48 c7 c2 10 84 2c 82 be 54 00 00
<4> [36.901302] RSP: 0018:ffffc900001ebdc0 EFLAGS: 00010246
<4> [36.901313] RAX: 0000000000000000 RBX: ffff8882561dd000 RCX: 0000000000000000
<4> [36.901324] RDX: 0000000000001000 RSI: 00000000ffd9c000 RDI: ffff888274c94000
<4> [36.901336] RBP: ffff888274c940b0 R08: 0000000000000000 R09: 0000000000000001
<4> [36.901348] R10: 000000000a25d812 R11: 00000000112af2d4 R12: ffff888252c70200
<4> [36.901360] R13: 00000000ffd9c000 R14: 0000000000001000 R15: ffff8882561dd010
<4> [36.901372] FS:  0000000000000000(0000) GS:ffff888278000000(0000) knlGS:0000000000000000
<4> [36.901386] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [36.901396] CR2: 00007f06def54950 CR3: 0000000255844000 CR4: 0000000000340ef0
<4> [36.901408] Call Trace:
<4> [36.901418]  ? process_one_work+0x1de/0x600
<4> [36.901494]  cleanup_page_dma+0x37/0x70 [i915]
<4> [36.901573]  free_pd+0x9/0x20 [i915]
<4> [36.901644]  gen8_ppgtt_cleanup+0x59/0xc0 [i915]
<4> [36.901721]  __i915_vm_release+0x14/0x30 [i915]
<4> [36.901733]  process_one_work+0x268/0x600
<4> [36.901744]  ? __schedule+0x307/0x8d0
<4> [36.901756]  worker_thread+0x37/0x380
<4> [36.901766]  ? process_one_work+0x600/0x600
<4> [36.901775]  kthread+0x140/0x160
<4> [36.901783]  ? kthread_park+0x80/0x80
<4> [36.901792]  ret_from_fork+0x24/0x50
<4> [36.901804] Modules linked in: mei_hdcp i915 x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ax88179_178a usbnet mii mei_me mei prime_numbers intel_lpss_pci
<4> [36.901857] ---[ end trace 52d1b4d81f8d1ea7 ]---

v2: Iterate over user file list maintained by DRM layer, not over our
    list of contexts (Michał),
  - split out explicit address space removal to a separate patch,
  - refactor existing functions for enhanced readability and reduced
    code duplication,
  - move WARN on context list not empty to driver remove code path,
  - take care of adding mock files used by selftest to the DRM list of
    user files so their contexts are also removed,
  - no need to call i915_gem_driver_release__contexts() again on device
    release.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
Cc: Michał Winiarski <michal.winiarski at intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 35 +++++++++++++++------
 drivers/gpu/drm/i915/gem/i915_gem_context.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c             | 20 +++++++++---
 drivers/gpu/drm/i915/selftests/mock_drm.h   | 26 ++++++++++++++-
 4 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 41784df51e58..fcfc2cf4a11a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -988,16 +988,37 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	return err;
 }
 
+static int context_remove(struct drm_i915_file_private *file_priv,
+			  unsigned long idx)
+{
+	struct i915_gem_context *ctx;
+
+	ctx = xa_erase(&file_priv->context_xa, idx);
+	if (!ctx)
+		return -ENOENT;
+
+	context_close(ctx);
+	return 0;
+}
+
+void i915_gem_context_remove(struct drm_i915_file_private *file_priv)
+{
+	unsigned long idx;
+	void *ptr;
+
+	xa_for_each(&file_priv->context_xa, idx, ptr)
+		context_remove(file_priv, idx);
+}
+
 void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_private *i915 = file_priv->dev_priv;
 	struct i915_address_space *vm;
-	struct i915_gem_context *ctx;
 	unsigned long idx;
 
-	xa_for_each(&file_priv->context_xa, idx, ctx)
-		context_close(ctx);
+	i915_gem_context_remove(file_priv);
+
 	xa_destroy(&file_priv->context_xa);
 
 	xa_for_each(&file_priv->vm_xa, idx, vm)
@@ -2382,7 +2403,6 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_context_destroy *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
 
 	if (args->pad != 0)
 		return -EINVAL;
@@ -2390,12 +2410,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!args->ctx_id)
 		return -ENOENT;
 
-	ctx = xa_erase(&file_priv->context_xa, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
-	context_close(ctx);
-	return 0;
+	return context_remove(file_priv, args->ctx_id);
 }
 
 static int get_sseu(struct i915_gem_context *ctx,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 3702b2fb27ab..70111bd61c00 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -114,6 +114,7 @@ void i915_gem_driver_release__contexts(struct drm_i915_private *i915);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file);
+void i915_gem_context_remove(struct drm_i915_file_private *file);
 void i915_gem_context_close(struct drm_file *file);
 
 void i915_gem_context_release(struct kref *ctx_ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9aa3066cb75d..5aab9b4b8395 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1196,11 +1196,27 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
 	i915_gem_driver_unregister__shrinker(i915);
 }
 
+static void i915_gem_remove_user_resources(struct drm_i915_private *i915)
+{
+	struct drm_file *file_priv;
+
+	mutex_lock(&i915->drm.filelist_mutex);
+	list_for_each_entry(file_priv, &i915->drm.filelist, lhead)
+		i915_gem_context_remove(file_priv->driver_priv);
+	mutex_unlock(&i915->drm.filelist_mutex);
+
+	i915_gem_driver_release__contexts(i915);
+	drm_WARN_ON(&i915->drm, !list_empty(&i915->gem.contexts.list));
+}
+
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 {
 	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
 
 	i915_gem_suspend_late(dev_priv);
+
+	i915_gem_remove_user_resources(dev_priv);
+
 	intel_gt_driver_remove(&dev_priv->gt);
 	dev_priv->uabi_engines = RB_ROOT;
 
@@ -1212,8 +1228,6 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 
 void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
-	i915_gem_driver_release__contexts(dev_priv);
-
 	intel_gt_driver_release(&dev_priv->gt);
 
 	intel_wa_list_free(&dev_priv->gt_wa_list);
@@ -1222,8 +1236,6 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 	i915_gem_cleanup_userptr(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
-
-	drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list));
 }
 
 static void i915_gem_init__mm(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.h b/drivers/gpu/drm/i915/selftests/mock_drm.h
index 9916b6f95526..657cb3bb80b1 100644
--- a/drivers/gpu/drm/i915/selftests/mock_drm.h
+++ b/drivers/gpu/drm/i915/selftests/mock_drm.h
@@ -34,7 +34,31 @@ struct file;
 
 static inline struct file *mock_file(struct drm_i915_private *i915)
 {
-	return mock_drm_getfile(i915->drm.primary, O_RDWR);
+	struct file *file = mock_drm_getfile(i915->drm.primary, O_RDWR);
+
+	if (!IS_ERR(file)) {
+		/*
+		 * Our driver .remove() handler must be able to find the file
+		 * on the list of open user files maintained by DRM layer,
+		 * otherwise on driver unbind we may be not able to clean up
+		 * hardware dependent resources associated with the file before
+		 * the underlying hardware related structures are destroyed and
+		 * freed.  Calling fput() on subtest closure is not sufficient
+		 * as the file .release() operation may be postponed until the
+		 * selftest userspace process exits.  As long as
+		 * mock_drm_getfile() doesn't take care of adding files it
+		 * creates to that list, we must do that ourselves.
+		 */
+		struct drm_file *file_priv = file->private_data;
+
+ 		if  (list_empty(&file_priv->lhead)) {
+			mutex_lock(&i915->drm.filelist_mutex);
+			list_add(&file_priv->lhead, &i915->drm.filelist);
+			mutex_unlock(&i915->drm.filelist_mutex);
+		}
+	}
+
+	return file;
 }
 
 static inline struct drm_file *to_drm_file(struct file *f)
-- 
2.21.1



More information about the Intel-gfx-trybot mailing list