[Intel-gfx] [PATCH 2/4] drm/i915: Add i915_gpu_state debugfs entry

Mika Kuoppala mika.kuoppala at linux.intel.com
Tue Dec 9 08:04:32 PST 2014


'i915_gpu_state' debugfs entry can be used to
capture the current gpu state. This is similar to
what one would get from 'i915_error_state' if gpu
error state would have been captured.

The motivation for this was to enhance our toolbox
so that we can direct bug reporters to do things like:

'grep -i suspend /sys/kernel/debug/dri/0/i915_gpu_state'

pre and postmortem to gain insight in triaging and
save ourselves from writing some new debugfs entries, when
the information is already in our error state.

v2: - use symmetrical put/get (Chris)
    - document the api (Daniel)
    - take a mutex when capturing

Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c   | 129 ++++++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_drv.h       |   5 ++
 drivers/gpu/drm/i915/i915_gpu_error.c |  57 +++++++++++++--
 3 files changed, 141 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d0e445e..30f56f3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -888,58 +888,41 @@ static int i915_hws_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static ssize_t
-i915_error_state_write(struct file *filp,
-		       const char __user *ubuf,
-		       size_t cnt,
-		       loff_t *ppos)
-{
-	struct i915_error_state_file_priv *error_priv = filp->private_data;
-	struct drm_device *dev = error_priv->dev;
-	int ret;
-
-	DRM_DEBUG_DRIVER("Resetting error state\n");
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	i915_destroy_error_state(dev);
-	mutex_unlock(&dev->struct_mutex);
-
-	return cnt;
-}
-
-static int i915_error_state_open(struct inode *inode, struct file *file)
+static int i915_gpu_state_open(struct inode *inode, struct file *file)
 {
 	struct drm_device *dev = inode->i_private;
-	struct i915_error_state_file_priv *error_priv;
+	struct i915_error_state_file_priv *state_priv;
+	int ret = 0;
 
-	error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
-	if (!error_priv)
+	state_priv = kzalloc(sizeof(*state_priv), GFP_KERNEL);
+	if (!state_priv)
 		return -ENOMEM;
 
-	error_priv->dev = dev;
-
-	i915_error_state_get(dev, error_priv);
-
-	file->private_data = error_priv;
+	state_priv->dev = dev;
 
-	return 0;
-}
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
 
-static int i915_error_state_release(struct inode *inode, struct file *file)
-{
-	struct i915_error_state_file_priv *error_priv = file->private_data;
+	ret = i915_gpu_idle(dev);
+	if (ret)
+		goto unlock;
 
-	i915_error_state_put(error_priv);
-	kfree(error_priv);
+	state_priv->error = i915_gpu_state_capture(dev);
+	if (state_priv->error == NULL) {
+		kfree(state_priv);
+		ret = -ENOMEM;
+		goto unlock;
+	}
 
-	return 0;
+	file->private_data = state_priv;
+unlock:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
 }
 
-static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
-				     size_t count, loff_t *pos)
+static ssize_t i915_gpu_state_read(struct file *file, char __user *userbuf,
+				   size_t count, loff_t *pos)
 {
 	struct i915_error_state_file_priv *error_priv = file->private_data;
 	struct drm_i915_error_state_buf error_str;
@@ -968,13 +951,72 @@ out:
 	return ret ?: ret_count;
 }
 
+static int i915_gpu_state_release(struct inode *inode, struct file *file)
+{
+	struct i915_error_state_file_priv *state_priv = file->private_data;
+
+	i915_gpu_state_put(state_priv->error);
+	kfree(state_priv);
+
+	return 0;
+}
+
+static const struct file_operations i915_gpu_state_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_gpu_state_open,
+	.read = i915_gpu_state_read,
+	.write = NULL,
+	.llseek = default_llseek,
+	.release = i915_gpu_state_release,
+};
+
+static int i915_error_state_open(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct i915_error_state_file_priv *error_priv;
+
+	error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
+	if (!error_priv)
+		return -ENOMEM;
+
+	error_priv->dev = dev;
+
+	i915_error_state_get(dev, error_priv);
+
+	file->private_data = error_priv;
+
+	return 0;
+}
+
+static ssize_t
+i915_error_state_write(struct file *filp,
+		       const char __user *ubuf,
+		       size_t cnt,
+		       loff_t *ppos)
+{
+	struct i915_error_state_file_priv *error_priv = filp->private_data;
+	struct drm_device *dev = error_priv->dev;
+	int ret;
+
+	DRM_DEBUG_DRIVER("Resetting error state\n");
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	i915_destroy_error_state(dev);
+	mutex_unlock(&dev->struct_mutex);
+
+	return cnt;
+}
+
 static const struct file_operations i915_error_state_fops = {
 	.owner = THIS_MODULE,
 	.open = i915_error_state_open,
-	.read = i915_error_state_read,
+	.read = i915_gpu_state_read,
 	.write = i915_error_state_write,
 	.llseek = default_llseek,
-	.release = i915_error_state_release,
+	.release = i915_gpu_state_release,
 };
 
 static int
@@ -4367,6 +4409,7 @@ static const struct i915_debugfs_files {
 	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
 	{"i915_error_state", &i915_error_state_fops},
+	{"i915_gpu_state", &i915_gpu_state_fops},
 	{"i915_next_seqno", &i915_next_seqno_fops},
 	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
 	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 11e85cb..ca487b4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2866,6 +2866,11 @@ static inline void i915_error_state_buf_release(
 {
 	kfree(eb->buf);
 }
+
+struct drm_i915_error_state * __must_check
+i915_gpu_state_capture(struct drm_device *dev);
+void i915_gpu_state_put(struct drm_i915_error_state *error);
+
 void i915_capture_error_state(struct drm_device *dev, bool wedge,
 			      const char *error_msg);
 void i915_error_state_get(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index dcea1fa..0cefeebc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -540,7 +540,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
 	kfree(obj);
 }
 
-static void i915_error_state_free(struct kref *error_ref)
+static void i915_gpu_state_free(struct kref *error_ref)
 {
 	struct drm_i915_error_state *error = container_of(error_ref,
 							  typeof(*error), ref);
@@ -1266,7 +1266,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
 }
 
 static struct drm_i915_error_state *
-__i915_capture_gpu_state(struct drm_device *dev)
+__i915_gpu_state_capture(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_error_state *error;
@@ -1295,6 +1295,49 @@ __i915_capture_gpu_state(struct drm_device *dev)
 }
 
 /**
+ * i915_gpu_state_capture - capture a gpu hardware state
+ * @dev: drm device
+ *
+ * This will capture the current gpu hw state, identical what
+ * i915_capture_error_state() does, but without any error condition.
+ * The returned state is reference counted and caller is responsible
+ * to release the state by calling i915_gpu_state_put().
+ */
+struct drm_i915_error_state *
+i915_gpu_state_capture(struct drm_device *dev)
+{
+	struct drm_i915_error_state *gpu_state;
+
+	gpu_state = __i915_gpu_state_capture(dev);
+	if (gpu_state == NULL)
+		return NULL;
+
+	scnprintf(gpu_state->error_msg, sizeof(gpu_state->error_msg),
+		  "GPU hw state snapshot\n");
+
+	return gpu_state;
+}
+
+static void i915_gpu_state_get(struct drm_i915_error_state *state)
+{
+	WARN_ON(!state);
+	kref_get(&state->ref);
+}
+
+/**
+ * i915_gpu_state_put - release a drm_i915_error_state
+ * @state: gpu state
+ *
+ * This will release the reference to the the drm_i915_error_state.
+ * Possibly freeing the state if the reference count reaches zero.
+ */
+void i915_gpu_state_put(struct drm_i915_error_state *state)
+{
+	if (state)
+		kref_put(&state->ref, i915_gpu_state_free);
+}
+
+/**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
  *
@@ -1311,7 +1354,7 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
 	struct drm_i915_error_state *error;
 	unsigned long flags;
 
-	error = __i915_capture_gpu_state(dev);
+	error = __i915_gpu_state_capture(dev);
 	if (error == NULL)
 		return;
 
@@ -1326,7 +1369,7 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
 	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
 
 	if (error) {
-		i915_error_state_free(&error->ref);
+		i915_gpu_state_free(&error->ref);
 		return;
 	}
 
@@ -1348,7 +1391,7 @@ void i915_error_state_get(struct drm_device *dev,
 	spin_lock_irq(&dev_priv->gpu_error.lock);
 	error_priv->error = dev_priv->gpu_error.first_error;
 	if (error_priv->error)
-		kref_get(&error_priv->error->ref);
+		i915_gpu_state_get(error_priv->error);
 	spin_unlock_irq(&dev_priv->gpu_error.lock);
 
 }
@@ -1356,7 +1399,7 @@ void i915_error_state_get(struct drm_device *dev,
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
 {
 	if (error_priv->error)
-		kref_put(&error_priv->error->ref, i915_error_state_free);
+		i915_gpu_state_put(error_priv->error);
 }
 
 void i915_destroy_error_state(struct drm_device *dev)
@@ -1370,7 +1413,7 @@ void i915_destroy_error_state(struct drm_device *dev)
 	spin_unlock_irq(&dev_priv->gpu_error.lock);
 
 	if (error)
-		kref_put(&error->ref, i915_error_state_free);
+		i915_gpu_state_put(error);
 }
 
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
-- 
1.9.1



More information about the Intel-gfx mailing list