[Intel-gfx] [PATCH v3] drm/i915/guc: capture GuC logs if FW fails to load

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Wed May 10 23:28:35 UTC 2017



On 05/05/17 16:23, Daniele Ceraolo Spurio wrote:
> We're currently deleting the GuC logs if the FW fails to load, but those
> are still useful to understand why the loading failed. Keeping the
> object around allows us to access them after driver load is completed.
>
> v2: keep the object around instead of using kernel memory (chris)
>     don't store the logs in the gpu_error struct (Chris)
>     add a check on guc_log_level to avoid snapshotting empty logs
>
> v3: use separate debugfs for error log (Chris)
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Oscar Mateo <oscar.mateo at intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---

Chris r-b'ed this patch on IRC. I'm going to wait for Oscar's guc stage 
pool dump patch to get merged to avoid conflicts in i915_debugfs.c then 
rebase and re-send.

Daniele

>  drivers/gpu/drm/i915/i915_debugfs.c  | 35 ++++++++++++++++++++++-------------
>  drivers/gpu/drm/i915/i915_drv.c      |  3 +++
>  drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_uc.c      |  7 +++++--
>  drivers/gpu/drm/i915/intel_uc.h      |  5 +++++
>  5 files changed, 52 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 870c470..4d39e08d3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2542,27 +2542,35 @@ static int i915_guc_info(struct seq_file *m, void *data)
>
>  static int i915_guc_log_dump(struct seq_file *m, void *data)
>  {
> -	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	struct drm_info_node *node = m->private;
> +	struct drm_i915_private *dev_priv = node_to_i915(node);
> +	bool dump_err_log = !!node->info_ent->data;
>  	struct drm_i915_gem_object *obj;
> -	int i = 0, pg;
> +	u32 *log;
> +	int i = 0;
>
> -	if (!dev_priv->guc.log.vma)
> +	if (!dump_err_log && dev_priv->guc.log.vma)
> +		obj = dev_priv->guc.log.vma->obj;
> +	else if (dump_err_log && dev_priv->guc.err_load_log)
> +		obj = dev_priv->guc.err_load_log;
> +	else
>  		return 0;
>
> -	obj = dev_priv->guc.log.vma->obj;
> -	for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
> -		u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
> -
> -		for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
> -			seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> -				   *(log + i), *(log + i + 1),
> -				   *(log + i + 2), *(log + i + 3));
> -
> -		kunmap_atomic(log);
> +	log = i915_gem_object_pin_map(obj, I915_MAP_WC);
> +	if (IS_ERR(log)) {
> +		DRM_ERROR("Failed to pin guc_log object\n");
> +		return PTR_ERR(log);
>  	}
>
> +	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
> +		seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> +			   *(log + i), *(log + i + 1),
> +			   *(log + i + 2), *(log + i + 3));
> +
>  	seq_putc(m, '\n');
>
> +	i915_gem_object_unpin_map(obj);
> +
>  	return 0;
>  }
>
> @@ -4774,6 +4782,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
>  	{"i915_guc_info", i915_guc_info, 0},
>  	{"i915_guc_load_status", i915_guc_load_status_info, 0},
>  	{"i915_guc_log_dump", i915_guc_log_dump, 0},
> +	{"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1},
>  	{"i915_huc_load_status", i915_huc_load_status_info, 0},
>  	{"i915_frequency_info", i915_frequency_info, 0},
>  	{"i915_hangcheck_info", i915_hangcheck_info, 0},
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 452c265..d8c82ac 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1354,6 +1354,9 @@ void i915_driver_unload(struct drm_device *dev)
>  	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
>  	i915_reset_error_state(dev_priv);
>
> +	/* release GuC error log (if any) */
> +	i915_guc_load_error_log_free(&dev_priv->guc);
> +
>  	/* Flush any outstanding unpin_work. */
>  	drain_workqueue(dev_priv->wq);
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index 16d3b87..691da42 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>  	guc_log_runtime_destroy(&dev_priv->guc);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  }
> +
> +void i915_guc_load_error_log_capture(struct intel_guc *guc)
> +{
> +	if (!guc->log.vma || i915.guc_log_level < 0)
> +		return;
> +
> +	if (!guc->err_load_log)
> +		guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
> +
> +	return;
> +}
> +
> +void i915_guc_load_error_log_free(struct intel_guc *guc)
> +{
> +	if (guc->err_load_log)
> +		i915_gem_object_put(guc->err_load_log);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 7fd75ca..d66ffab 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -274,6 +274,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>
>  	guc_disable_communication(guc);
>  	gen9_reset_guc_interrupts(dev_priv);
> +	i915_guc_load_error_log_free(guc);
>
>  	/* We need to notify the guc whenever we change the GGTT */
>  	i915_ggtt_enable_guc(dev_priv);
> @@ -320,11 +321,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>
>  	/* Did we succeded or run out of retries? */
>  	if (ret)
> -		goto err_submission;
> +		goto err_log_capture;
>
>  	ret = guc_enable_communication(guc);
>  	if (ret)
> -		goto err_submission;
> +		goto err_log_capture;
>
>  	intel_guc_auth_huc(dev_priv);
>  	if (i915.enable_guc_submission) {
> @@ -350,6 +351,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>  err_interrupts:
>  	guc_disable_communication(guc);
>  	gen9_disable_guc_interrupts(dev_priv);
> +err_log_capture:
> +	i915_guc_load_error_log_capture(guc);
>  err_submission:
>  	if (i915.enable_guc_submission)
>  		i915_guc_submission_fini(dev_priv);
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index 1e0eecd..c46e1ea 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -210,6 +210,9 @@ struct intel_guc {
>
>  	/* GuC's FW specific send function */
>  	int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
> +
> +	/* Log snapshot if GuC errors during load */
> +	struct drm_i915_gem_object *err_load_log;
>  };
>
>  struct intel_huc {
> @@ -256,6 +259,8 @@ static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 l
>  int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
>  void i915_guc_log_register(struct drm_i915_private *dev_priv);
>  void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
> +void i915_guc_load_error_log_capture(struct intel_guc *guc);
> +void i915_guc_load_error_log_free(struct intel_guc *guc);
>
>  static inline u32 guc_ggtt_offset(struct i915_vma *vma)
>  {
>


More information about the Intel-gfx mailing list