[Intel-gfx] [PATCH v5] drm/i915/guc: capture GuC logs if FW fails to load

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Tue May 16 21:52:53 UTC 2017


We're currently deleting the GuC logs if the FW fails to load, but those
are still useful to understand why the loading failed. Keeping the
object around allows us to access them after driver load is completed.

v2: keep the object around instead of using kernel memory (chris)
    don't store the logs in the gpu_error struct (Chris)
    add a check on guc_log_level to avoid snapshotting empty logs

v3: use separate debugfs for error log (Chris)

v4: rebased

v5: clean up obj selection, move err_load inside guc_log, move err_load
    cleanup, rename functions (Michal)

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Oscar Mateo <oscar.mateo at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 38 +++++++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_guc_log.c | 17 ++++++++++++++++
 drivers/gpu/drm/i915/intel_uc.c      | 14 +++++++++++--
 drivers/gpu/drm/i915/intel_uc.h      |  5 +++++
 4 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bd9abef..740395c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2607,27 +2607,36 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
 
 static int i915_guc_log_dump(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_i915_gem_object *obj;
-	int i = 0, pg;
-
-	if (!dev_priv->guc.log.vma)
-		return 0;
+	struct drm_info_node *node = m->private;
+	struct drm_i915_private *dev_priv = node_to_i915(node);
+	bool dump_load_err = !!node->info_ent->data;
+	struct drm_i915_gem_object *obj = NULL;
+	u32 *log;
+	int i = 0;
 
-	obj = dev_priv->guc.log.vma->obj;
-	for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-		u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
+	if (dump_load_err)
+		obj = dev_priv->guc.log.load_err;
+	else if (dev_priv->guc.log.vma)
+		obj = dev_priv->guc.log.vma->obj;
 
-		for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-			seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-				   *(log + i), *(log + i + 1),
-				   *(log + i + 2), *(log + i + 3));
+	if (!obj)
+		return 0;
 
-		kunmap_atomic(log);
+	log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(log)) {
+		seq_puts(m, "Failed to pin object\n");
+		return PTR_ERR(log);
 	}
 
+	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+		seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+			   *(log + i), *(log + i + 1),
+			   *(log + i + 2), *(log + i + 3));
+
 	seq_putc(m, '\n');
 
+	i915_gem_object_unpin_map(obj);
+
 	return 0;
 }
 
@@ -4811,6 +4820,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
 	{"i915_guc_info", i915_guc_info, 0},
 	{"i915_guc_load_status", i915_guc_load_status_info, 0},
 	{"i915_guc_log_dump", i915_guc_log_dump, 0},
+	{"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},
 	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
 	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 16d3b87..31e7bec 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
 	guc_log_runtime_destroy(&dev_priv->guc);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
+
+void intel_guc_log_capture_load_err(struct intel_guc *guc)
+{
+	if (!guc->log.vma || i915.guc_log_level < 0)
+		return;
+
+	if (!guc->log.load_err)
+		guc->log.load_err = i915_gem_object_get(guc->log.vma->obj);
+
+	return;
+}
+
+void intel_guc_log_free_load_err(struct intel_guc *guc)
+{
+	if (guc->log.load_err)
+		i915_gem_object_put(guc->log.load_err);
+}
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 07c5658..dfea7d0 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -309,6 +309,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
 	guc_disable_communication(guc);
 	gen9_reset_guc_interrupts(dev_priv);
+	intel_guc_log_free_load_err(guc);
 
 	/* We need to notify the guc whenever we change the GGTT */
 	i915_ggtt_enable_guc(dev_priv);
@@ -355,11 +356,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
 	/* Did we succeded or run out of retries? */
 	if (ret)
-		goto err_submission;
+		goto err_log_capture;
 
 	ret = guc_enable_communication(guc);
 	if (ret)
-		goto err_submission;
+		goto err_log_capture;
 
 	intel_guc_auth_huc(dev_priv);
 	if (i915.enable_guc_submission) {
@@ -385,6 +386,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 err_interrupts:
 	guc_disable_communication(guc);
 	gen9_disable_guc_interrupts(dev_priv);
+err_log_capture:
+	intel_guc_log_capture_load_err(guc);
 err_submission:
 	if (i915.enable_guc_submission)
 		i915_guc_submission_fini(dev_priv);
@@ -407,6 +410,13 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 {
+	/*
+	 * The err load log exists if guc failed to load, so we need to check
+	 * for it before checking the module params as we might have updated
+	 * those to reflect the load failure.
+	 */
+	intel_guc_log_free_load_err(&dev_priv->guc);
+
 	if (!i915.enable_guc_loading)
 		return;
 
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 7618b71..05fa751 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -176,6 +176,9 @@ struct intel_guc_log {
 	u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
 	u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
 	u32 flush_count[GUC_MAX_LOG_BUFFER];
+
+	/* Log snapshot if GuC errors during load */
+	struct drm_i915_gem_object *load_err;
 };
 
 struct intel_guc {
@@ -272,6 +275,8 @@ static inline void intel_guc_notify(struct intel_guc *guc)
 int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
 void i915_guc_log_register(struct drm_i915_private *dev_priv);
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
+void intel_guc_log_capture_load_err(struct intel_guc *guc);
+void intel_guc_log_free_load_err(struct intel_guc *guc);
 
 static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 {
-- 
1.9.1



More information about the Intel-gfx mailing list