[Intel-gfx] [PATCH v3 6/6] drm/i915: The return of i915_gpu_info to debugfs
Chris Wilson
chris at chris-wilson.co.uk
Mon Feb 6 09:51:46 UTC 2017
Once upon a time before we had automated GPU state capture upon hangs,
we had intel_gpu_dump. Now we come almost full circle and reinstate that
view of the current GPU queues and registers by using the error capture
facility to snapshot the GPU state when debugfs/.../i915_gpu_info is
opened - which should provided useful debugging to both the error
capture routines (without having to cause a hang and avoid the error
state being eaten by igt) and generally.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 123 +++++++++++++++++++---------------
drivers/gpu/drm/i915/i915_drv.h | 31 ++++++---
drivers/gpu/drm/i915/i915_gpu_error.c | 117 ++++++++++++++++----------------
drivers/gpu/drm/i915/i915_sysfs.c | 27 ++++----
drivers/gpu/drm/i915/intel_display.c | 2 +-
5 files changed, 160 insertions(+), 140 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0e6d950bf7e9..76c453b98f6a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -978,89 +978,103 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
}
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-
-static ssize_t
-i915_error_state_write(struct file *filp,
- const char __user *ubuf,
- size_t cnt,
- loff_t *ppos)
+static ssize_t error_state_read(struct file *file, char __user *ubuf,
+ size_t count, loff_t *pos)
{
- struct i915_error_state_file_priv *error_priv = filp->private_data;
-
- DRM_DEBUG_DRIVER("Resetting error state\n");
- i915_destroy_error_state(error_priv->i915);
-
- return cnt;
-}
+ struct drm_i915_error_state *error = file->private_data;
+ struct drm_i915_error_state_buf str;
+ ssize_t ret;
+ loff_t tmp;
-static int i915_error_state_open(struct inode *inode, struct file *file)
-{
- struct drm_i915_private *dev_priv = inode->i_private;
- struct i915_error_state_file_priv *error_priv;
+ if (!error)
+ return 0;
- error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
- if (!error_priv)
- return -ENOMEM;
+ ret = i915_error_state_buf_init(&str, error->i915, count, *pos);
+ if (ret)
+ return ret;
- error_priv->i915 = dev_priv;
+ ret = i915_error_state_to_str(&str, error);
+ if (ret)
+ goto out;
- i915_error_state_get(&dev_priv->drm, error_priv);
+ tmp = 0;
+ ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes);
+ if (ret < 0)
+ goto out;
- file->private_data = error_priv;
+ *pos = str.start + ret;
+out:
+ i915_error_state_buf_release(&str);
+ return ret;
+}
+static int error_state_release(struct inode *inode, struct file *file)
+{
+ i915_error_state_put(file->private_data);
return 0;
}
-static int i915_error_state_release(struct inode *inode, struct file *file)
+static int i915_gpu_info_open(struct inode *inode, struct file *file)
{
- struct i915_error_state_file_priv *error_priv = file->private_data;
+ struct drm_i915_error_state *error;
- i915_error_state_put(error_priv);
- kfree(error_priv);
+ error = i915_error_state(inode->i_private);
+ if (!error)
+ return -ENOMEM;
+ file->private_data = error;
return 0;
}
-static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *pos)
+static const struct file_operations i915_gpu_info_fops = {
+ .owner = THIS_MODULE,
+ .open = i915_gpu_info_open,
+ .read = error_state_read,
+ .llseek = default_llseek,
+ .release = error_state_release,
+};
+
+static ssize_t
+i915_error_state_write(struct file *filp,
+ const char __user *ubuf,
+ size_t cnt,
+ loff_t *ppos)
{
- struct i915_error_state_file_priv *error_priv = file->private_data;
- struct drm_i915_error_state_buf error_str;
- loff_t tmp_pos = 0;
- ssize_t ret_count = 0;
- int ret;
+ struct drm_i915_error_state *error = filp->private_data;
- ret = i915_error_state_buf_init(&error_str, error_priv->i915,
- count, *pos);
- if (ret)
- return ret;
+ if (!error)
+ return 0;
- ret = i915_error_state_to_str(&error_str, error_priv);
- if (ret)
- goto out;
+ DRM_DEBUG_DRIVER("Resetting error state\n");
+ i915_destroy_error_state(error->i915);
- ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos,
- error_str.buf,
- error_str.bytes);
+ return cnt;
+}
- if (ret_count < 0)
- ret = ret_count;
- else
- *pos = error_str.start + ret_count;
-out:
- i915_error_state_buf_release(&error_str);
- return ret ?: ret_count;
+static int i915_error_state_open(struct inode *inode, struct file *file)
+{
+ struct drm_i915_private *i915 = inode->i_private;
+ struct drm_i915_error_state *error;
+
+ spin_lock_irq(&i915->gpu_error.lock);
+ error = i915->gpu_error.first_error;
+ if (error)
+ i915_error_state_get(error);
+ spin_unlock_irq(&i915->gpu_error.lock);
+
+ file->private_data = error;
+
+ return 0;
}
static const struct file_operations i915_error_state_fops = {
.owner = THIS_MODULE,
.open = i915_error_state_open,
- .read = i915_error_state_read,
+ .read = error_state_read,
.write = i915_error_state_write,
.llseek = default_llseek,
- .release = i915_error_state_release,
+ .release = error_state_release,
};
-
#endif
static int
@@ -4731,6 +4745,7 @@ static const struct i915_debugfs_files {
{"i915_gem_drop_caches", &i915_drop_caches_fops},
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
{"i915_error_state", &i915_error_state_fops},
+ {"i915_gpu_info", &i915_gpu_info_fops},
#endif
{"i915_next_seqno", &i915_next_seqno_fops},
{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0cbd289da4ba..263fabd1581c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -910,7 +910,7 @@ struct drm_i915_error_state {
u32 eir;
u32 pgtbl_er;
u32 ier;
- u32 gtier[4];
+ u32 gtier[4], ngtier;
u32 ccid;
u32 derrmr;
u32 forcewake;
@@ -924,6 +924,7 @@ struct drm_i915_error_state {
u32 gab_ctl;
u32 gfx_mode;
+ u32 nfence;
u64 fence[I915_MAX_NUM_FENCES];
struct intel_overlay_error_state *overlay;
struct intel_display_error_state *display;
@@ -1505,11 +1506,6 @@ struct drm_i915_error_state_buf {
loff_t pos;
};
-struct i915_error_state_file_priv {
- struct drm_i915_private *i915;
- struct drm_i915_error_state *error;
-};
-
#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */
@@ -3558,7 +3554,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
- const struct i915_error_state_file_priv *error);
+ const struct drm_i915_error_state *error);
int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
struct drm_i915_private *i915,
size_t count, loff_t pos);
@@ -3567,12 +3563,26 @@ static inline void i915_error_state_buf_release(
{
kfree(eb->buf);
}
+
+struct drm_i915_error_state *i915_error_state(struct drm_i915_private *i915);
void i915_capture_error_state(struct drm_i915_private *dev_priv,
u32 engine_mask,
const char *error_msg);
-void i915_error_state_get(struct drm_device *dev,
- struct i915_error_state_file_priv *error_priv);
-void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
+
+static inline struct drm_i915_error_state *
+i915_error_state_get(struct drm_i915_error_state *error)
+{
+ kref_get(&error->ref);
+ return error;
+}
+
+void __i915_error_state_free(struct kref *error_ref);
+static inline void i915_error_state_put(struct drm_i915_error_state *error)
+{
+ if (error)
+ kref_put(&error->ref, __i915_error_state_free);
+}
+
void i915_destroy_error_state(struct drm_i915_private *dev_priv);
#else
@@ -3739,7 +3749,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
extern struct intel_display_error_state *
intel_display_capture_error_state(struct drm_i915_private *dev_priv);
extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
- struct drm_i915_private *dev_priv,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 824b6c0383c2..dac844270998 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -342,7 +342,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
}
static void error_print_instdone(struct drm_i915_error_state_buf *m,
- struct drm_i915_error_engine *ee)
+ const struct drm_i915_error_engine *ee)
{
int slice;
int subslice;
@@ -372,7 +372,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
static void error_print_request(struct drm_i915_error_state_buf *m,
const char *prefix,
- struct drm_i915_error_request *erq)
+ const struct drm_i915_error_request *erq)
{
if (!erq->seqno)
return;
@@ -386,7 +386,7 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
- struct drm_i915_error_context *ctx)
+ const struct drm_i915_error_context *ctx)
{
err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n",
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
@@ -394,7 +394,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
}
static void error_print_engine(struct drm_i915_error_state_buf *m,
- struct drm_i915_error_engine *ee)
+ const struct drm_i915_error_engine *ee)
{
err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
err_printf(m, " START: 0x%08x\n", ee->start);
@@ -571,21 +571,32 @@ static void err_print_params(struct drm_i915_error_state_buf *m,
#undef PRINT
}
+static void err_print_pciid(struct drm_i915_error_state_buf *m,
+ struct drm_i915_private *i915)
+{
+ struct pci_dev *pdev = i915->drm.pdev;
+
+ err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
+ err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
+ err_printf(m, "PCI Subsystem: %04x:%04x\n",
+ pdev->subsystem_vendor,
+ pdev->subsystem_device);
+}
+
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
- const struct i915_error_state_file_priv *error_priv)
+ const struct drm_i915_error_state *error)
{
- struct drm_i915_private *dev_priv = error_priv->i915;
- struct pci_dev *pdev = dev_priv->drm.pdev;
- struct drm_i915_error_state *error = error_priv->error;
+ struct drm_i915_private *dev_priv = m->i915;
struct drm_i915_error_object *obj;
int i, j;
if (!error) {
- err_printf(m, "no error state collected\n");
- goto out;
+ err_printf(m, "No error state collected\n");
+ return 0;
}
- err_printf(m, "%s\n", error->error_msg);
+ if (*error->error_msg)
+ err_printf(m, "%s\n", error->error_msg);
err_printf(m, "Kernel: " UTS_RELEASE "\n");
err_printf(m, "Time: %ld s %ld us\n",
error->time.tv_sec, error->time.tv_usec);
@@ -607,11 +618,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, "Reset count: %u\n", error->reset_count);
err_printf(m, "Suspend count: %u\n", error->suspend_count);
err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
- err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
- err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
- err_printf(m, "PCI Subsystem: %04x:%04x\n",
- pdev->subsystem_vendor,
- pdev->subsystem_device);
+ err_print_pciid(m, error->i915);
err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
@@ -627,19 +634,15 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, "EIR: 0x%08x\n", error->eir);
err_printf(m, "IER: 0x%08x\n", error->ier);
- if (INTEL_GEN(dev_priv) >= 8) {
- for (i = 0; i < 4; i++)
- err_printf(m, "GTIER gt %d: 0x%08x\n", i,
- error->gtier[i]);
- } else if (HAS_PCH_SPLIT(dev_priv) || IS_VALLEYVIEW(dev_priv))
- err_printf(m, "GTIER: 0x%08x\n", error->gtier[0]);
+ for (i = 0; i < error->ngtier; i++)
+ err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
err_printf(m, "CCID: 0x%08x\n", error->ccid);
err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
- for (i = 0; i < dev_priv->num_fence_regs; i++)
+ for (i = 0; i < error->nfence; i++)
err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]);
if (INTEL_GEN(dev_priv) >= 6) {
@@ -688,7 +691,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
error->pinned_bo_count);
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
- struct drm_i915_error_engine *ee = &error->engine[i];
+ const struct drm_i915_error_engine *ee = &error->engine[i];
obj = ee->batchbuffer;
if (obj) {
@@ -753,12 +756,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
intel_overlay_print_error_state(m, error->overlay);
if (error->display)
- intel_display_print_error_state(m, dev_priv, error->display);
+ intel_display_print_error_state(m, error->display);
err_print_capabilities(m, &error->device_info);
err_print_params(m, &error->params);
-out:
if (m->bytes == 0 && m->err)
return m->err;
@@ -816,7 +818,7 @@ static inline void free_param(const char *type, void *x)
kfree(*(void **)x);
}
-static void i915_error_state_free(struct kref *error_ref)
+void __i915_error_state_free(struct kref *error_ref)
{
struct drm_i915_error_state *error = container_of(error_ref,
typeof(*error), ref);
@@ -1018,16 +1020,17 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
{
int i;
- if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) {
+ if (INTEL_GEN(dev_priv) >= 6) {
for (i = 0; i < dev_priv->num_fence_regs; i++)
- error->fence[i] = I915_READ(FENCE_REG(i));
- } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) {
+ error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
+ } else if (INTEL_GEN(dev_priv) >= 4) {
for (i = 0; i < dev_priv->num_fence_regs; i++)
error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
- } else if (INTEL_GEN(dev_priv) >= 6) {
+ } else {
for (i = 0; i < dev_priv->num_fence_regs; i++)
- error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
+ error->fence[i] = I915_READ(FENCE_REG(i));
}
+ error->nfence = i;
}
static inline u32
@@ -1568,9 +1571,11 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
error->ier = I915_READ(GEN8_DE_MISC_IER);
for (i = 0; i < 4; i++)
error->gtier[i] = I915_READ(GEN8_GT_IER(i));
+ error->ngtier = 4;
} else if (HAS_PCH_SPLIT(dev_priv)) {
error->ier = I915_READ(DEIER);
error->gtier[0] = I915_READ(GTIER);
+ error->ngtier = 1;
} else if (IS_GEN2(dev_priv)) {
error->ier = I915_READ16(IER);
} else if (!IS_VALLEYVIEW(dev_priv)) {
@@ -1659,6 +1664,23 @@ static int capture(void *data)
#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
+struct drm_i915_error_state *
+i915_error_state(struct drm_i915_private *i915)
+{
+ struct drm_i915_error_state *error;
+
+ error = kzalloc(sizeof(*error), GFP_ATOMIC);
+ if (!error)
+ return NULL;
+
+ kref_init(&error->ref);
+ error->i915 = i915;
+
+ stop_machine(capture, error, NULL);
+
+ return error;
+}
+
/**
* i915_capture_error_state - capture an error record for later analysis
* @dev: drm device
@@ -1682,18 +1704,12 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
if (READ_ONCE(dev_priv->gpu_error.first_error))
return;
- /* Account for pipe specific data like PIPE*STAT */
- error = kzalloc(sizeof(*error), GFP_ATOMIC);
+ error = i915_error_state(dev_priv);
if (!error) {
DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
return;
}
- kref_init(&error->ref);
- error->i915 = dev_priv;
-
- stop_machine(capture, error, NULL);
-
i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
DRM_INFO("%s\n", error->error_msg);
@@ -1707,7 +1723,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
}
if (error) {
- i915_error_state_free(&error->ref);
+ __i915_error_state_free(&error->ref);
return;
}
@@ -1723,24 +1739,6 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
}
}
-void i915_error_state_get(struct drm_device *dev,
- struct i915_error_state_file_priv *error_priv)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
-
- spin_lock_irq(&dev_priv->gpu_error.lock);
- error_priv->error = dev_priv->gpu_error.first_error;
- if (error_priv->error)
- kref_get(&error_priv->error->ref);
- spin_unlock_irq(&dev_priv->gpu_error.lock);
-}
-
-void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
-{
- if (error_priv->error)
- kref_put(&error_priv->error->ref, i915_error_state_free);
-}
-
void i915_destroy_error_state(struct drm_i915_private *dev_priv)
{
struct drm_i915_error_state *error;
@@ -1750,6 +1748,5 @@ void i915_destroy_error_state(struct drm_i915_private *dev_priv)
dev_priv->gpu_error.first_error = NULL;
spin_unlock_irq(&dev_priv->gpu_error.lock);
- if (error)
- kref_put(&error->ref, i915_error_state_free);
+ i915_error_state_put(error);
}
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index a721ff116101..85f34191d29f 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -522,33 +522,32 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
struct device *kdev = kobj_to_dev(kobj);
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct drm_device *dev = &dev_priv->drm;
- struct i915_error_state_file_priv error_priv;
struct drm_i915_error_state_buf error_str;
- ssize_t ret_count = 0;
- int ret;
-
- memset(&error_priv, 0, sizeof(error_priv));
+ struct drm_i915_error_state *error;
+ ssize_t ret;
- ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off);
+ ret = i915_error_state_buf_init(&error_str, dev_priv, count, off);
if (ret)
return ret;
- error_priv.i915 = dev_priv;
- i915_error_state_get(dev, &error_priv);
+ spin_lock_irq(&dev_priv->gpu_error.lock);
+ error = dev_priv->gpu_error.first_error;
+ if (error)
+ i915_error_state_get(error);
+ spin_unlock_irq(&dev_priv->gpu_error.lock);
- ret = i915_error_state_to_str(&error_str, &error_priv);
+ ret = i915_error_state_to_str(&error_str, error);
if (ret)
goto out;
- ret_count = count < error_str.bytes ? count : error_str.bytes;
+ ret = count < error_str.bytes ? count : error_str.bytes;
+ memcpy(buf, error_str.buf, ret);
- memcpy(buf, error_str.buf, ret_count);
out:
- i915_error_state_put(&error_priv);
+ i915_error_state_put(error);
i915_error_state_buf_release(&error_str);
- return ret ?: ret_count;
+ return ret;
}
static ssize_t error_state_write(struct file *file, struct kobject *kobj,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 45e587496886..a4f70e14177f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -17515,9 +17515,9 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv)
void
intel_display_print_error_state(struct drm_i915_error_state_buf *m,
- struct drm_i915_private *dev_priv,
struct intel_display_error_state *error)
{
+ struct drm_i915_private *dev_priv = m->i915;
int i;
if (!error)
--
2.11.0
More information about the Intel-gfx
mailing list