[Intel-gfx] [PATCH] drm/i915: Optionally defer context closing

Ben Widawsky benjamin.widawsky at intel.com
Wed Nov 27 07:53:26 CET 2013


This patch doesn't seem to be the panacea that I had hoped, although
I've yet to thoroughly test if it actually improves some of the other
tests which caused trouble.

What occurs is the following (assume just 2 procs)
1. proc A gets to execbuf while out of memory, gets struct_mutex.
2. OOM killer comes in and chooses proc B
3. proc B closes it's fds, which requires struct mutex, blocks
4, OOM killer waits for B to die before killing another process (this
part is speculative)

In order to let the OOM complete, we defer processing the context
destruction parts which are those that require struct_mutex.

This patch has not really been cleaned up, I am only posting it for
posterity. (Several of the hunks are only relevant to full PPGTT patches
which are not merged).

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  6 ++-
 drivers/gpu/drm/i915/i915_drv.h         |  9 +++--
 drivers/gpu/drm/i915/i915_gem.c         |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c | 68 ++++++++++++++++++++++++---------
 4 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 835a43e..e6b5f3e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1717,11 +1717,13 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 		struct drm_i915_file_private *file_priv = file->driver_priv;
 		struct i915_hw_ppgtt *pvt_ppgtt;
 
-		pvt_ppgtt = ctx_to_ppgtt(file_priv->private_default_ctx);
+		pvt_ppgtt =
+			ctx_to_ppgtt(file_priv->ctx_info->private_default_ctx);
 		seq_printf(m, "proc: %s\n",
 			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
 		seq_puts(m, "  default context:\n");
-		idr_for_each(&file_priv->context_idr, per_file_ctx, m);
+		idr_for_each(&file_priv->ctx_info->context_idr, per_file_ctx,
+			     m);
 	}
 	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6fdab40..931fc42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1735,15 +1735,18 @@ struct drm_i915_gem_request {
 
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
-
 	struct {
 		spinlock_t lock;
 		struct list_head request_list;
 		struct delayed_work idle_work;
 	} mm;
-	struct idr context_idr;
+	struct i915_gem_ctx_info {
+		struct drm_i915_private *dev_priv;
+		struct idr context_idr;
+		struct work_struct close_work;
+		struct i915_hw_context *private_default_ctx;
+	} *ctx_info;
 
-	struct i915_hw_context *private_default_ctx;
 	atomic_t rps_wait_boost;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6312d61..a32f6df 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2326,7 +2326,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
 	if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
 		hs = &request->ctx->hang_stats;
 	else if (request->file_priv)
-		hs = &request->file_priv->private_default_ctx->hang_stats;
+		hs = &request->file_priv->ctx_info->private_default_ctx->hang_stats;
 
 	if (hs) {
 		if (guilty) {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f55f0a9..770b394 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -209,8 +209,8 @@ __create_hw_context(struct drm_device *dev,
 	if (file_priv == NULL)
 		return ctx;
 
-	ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID, 0,
-			GFP_KERNEL);
+	ret = idr_alloc(&file_priv->ctx_info->context_idr,
+			ctx, DEFAULT_CONTEXT_ID, 0, GFP_KERNEL);
 	if (ret < 0)
 		goto err_out;
 
@@ -481,29 +481,54 @@ static int context_idr_cleanup(int id, void *p, void *data)
 	return 0;
 }
 
+static void i915_gem_context_close_work_handler(struct work_struct *work)
+{
+	struct i915_gem_ctx_info *ctx_info = container_of(work,
+							  struct i915_gem_ctx_info,
+							  close_work);
+	struct drm_i915_private *dev_priv = ctx_info->dev_priv;
+
+	mutex_lock(&dev_priv->dev->struct_mutex);
+	idr_for_each(&ctx_info->context_idr, context_idr_cleanup, NULL);
+	i915_gem_context_unreference(ctx_info->private_default_ctx);
+	idr_destroy(&ctx_info->context_idr);
+	mutex_unlock(&dev_priv->dev->struct_mutex);
+
+	kfree(ctx_info);
+}
+
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	file_priv->ctx_info = kzalloc(sizeof(*file_priv->ctx_info), GFP_KERNEL);
+	if (!file_priv->ctx_info)
+		return -ENOMEM;
+
+	file_priv->ctx_info->dev_priv = file_priv->dev_priv;
+	INIT_WORK(&file_priv->ctx_info->close_work,
+		  i915_gem_context_close_work_handler);
+
 	if (!HAS_HW_CONTEXTS(dev)) {
 		/* Cheat for hang stats */
-		file_priv->private_default_ctx =
+		file_priv->ctx_info->private_default_ctx =
 			kzalloc(sizeof(struct i915_hw_context), GFP_KERNEL);
-		file_priv->private_default_ctx->vm = &dev_priv->gtt.base;
+		file_priv->ctx_info->private_default_ctx->vm =
+			&dev_priv->gtt.base;
 		return 0;
 	}
 
-	idr_init(&file_priv->context_idr);
+	idr_init(&file_priv->ctx_info->context_idr);
 
 	mutex_lock(&dev->struct_mutex);
-	file_priv->private_default_ctx =
+	file_priv->ctx_info->private_default_ctx =
 		i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
 	mutex_unlock(&dev->struct_mutex);
 
-	if (IS_ERR(file_priv->private_default_ctx)) {
-		idr_destroy(&file_priv->context_idr);
-		return PTR_ERR(file_priv->private_default_ctx);
+	if (IS_ERR(file_priv->ctx_info->private_default_ctx)) {
+		idr_destroy(&file_priv->ctx_info->context_idr);
+		return PTR_ERR(file_priv->ctx_info->private_default_ctx);
 	}
 
 	return 0;
@@ -511,27 +536,34 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
 	if (!HAS_HW_CONTEXTS(dev)) {
-		kfree(file_priv->private_default_ctx);
+		kfree(file_priv->ctx_info->private_default_ctx);
+		kfree(file_priv->ctx_info);
 		return;
 	}
 
-	mutex_lock(&dev->struct_mutex);
-	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
-	i915_gem_context_unreference(file_priv->private_default_ctx);
-	idr_destroy(&file_priv->context_idr);
-	mutex_unlock(&dev->struct_mutex);
+	if (mutex_trylock(&dev->struct_mutex)) {
+		idr_for_each(&file_priv->ctx_info->context_idr,
+			     context_idr_cleanup, NULL);
+		i915_gem_context_unreference(file_priv->ctx_info->private_default_ctx);
+		idr_destroy(&file_priv->ctx_info->context_idr);
+		mutex_unlock(&dev->struct_mutex);
+		kfree(file_priv->ctx_info);
+	} else {
+		queue_work(dev_priv->wq, &file_priv->ctx_info->close_work);
+	}
 }
 
 struct i915_hw_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
 {
 	if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev))
-		return file_priv->private_default_ctx;
+		return file_priv->ctx_info->private_default_ctx;
 
-	return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id);
+	return (struct i915_hw_context *)idr_find(&file_priv->ctx_info->context_idr, id);
 }
 
 static inline int
@@ -773,7 +805,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 
-	idr_remove(&ctx->file_priv->context_idr, ctx->id);
+	idr_remove(&ctx->file_priv->ctx_info->context_idr, ctx->id);
 	i915_gem_context_unreference(ctx);
 	mutex_unlock(&dev->struct_mutex);
 
-- 
1.8.4.2




More information about the Intel-gfx mailing list