[Intel-gfx] [PATCH v2 1/4] drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex

Michal Wajdeczko michal.wajdeczko at intel.com
Tue Jan 23 16:44:33 UTC 2018


On Tue, 23 Jan 2018 16:42:17 +0100, Sagar Arun Kamble  
<sagar.a.kamble at intel.com> wrote:

> This patch fixes lockdep issue due to circular locking dependency of
> struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex.
> For GuC log relay channel we create debugfs file that requires  
> i_mutex_key
> lock and we are doing that under struct_mutex. So we introduced newer
> dependency as:
>     &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
> However, there is dependency from mmap_sem to struct_mutex. Hence we
> separate the relay create/destroy operation from under struct_mutex.
>

... <snip>

> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c  
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 80dc679..b45be0d 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2467,7 +2467,6 @@ static int i915_guc_log_control_get(void *data,  
> u64 *val)
>  static int i915_guc_log_control_set(void *data, u64 val)
>  {
>  	struct drm_i915_private *dev_priv = data;
> -	int ret;
> 	if (!HAS_GUC(dev_priv))
>  		return -ENODEV;
> @@ -2475,16 +2474,7 @@ static int i915_guc_log_control_set(void *data,  
> u64 val)
>  	if (!dev_priv->guc.log.vma)
>  		return -EINVAL;
> -	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
> -	if (ret)
> -		return ret;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	ret = i915_guc_log_control(dev_priv, val);
> -	intel_runtime_pm_put(dev_priv);
> -
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> -	return ret;
> +	return i915_guc_log_control(dev_priv, val);

I hope that one day we change signature of this function to

int intel_guc_log_control(struct intel_guc *guc, u64 control);

... <snip>

> diff --git a/drivers/gpu/drm/i915/intel_guc.c  
> b/drivers/gpu/drm/i915/intel_guc.c
> index ea30e7c..cab3c98 100644
> --- a/drivers/gpu/drm/i915/intel_guc.c
> +++ b/drivers/gpu/drm/i915/intel_guc.c
> @@ -66,6 +66,7 @@ void intel_guc_init_early(struct intel_guc *guc)
>  	intel_guc_ct_init_early(&guc->ct);
> 	mutex_init(&guc->send_mutex);
> +	mutex_init(&guc->log.runtime.relay_lock);

Maybe this can be done in intel_guc_loc.c (or .h) as

	void intel_guc_log_init_early() { }

>  	guc->send = intel_guc_send_nop;
>  	guc->notify = gen8_guc_raise_irq;
>  }
> @@ -87,8 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc)
>  	 */
>  	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
>  						WQ_HIGHPRI | WQ_FREEZABLE);
> -	if (!guc->log.runtime.flush_wq)
> +	if (!guc->log.runtime.flush_wq) {
> +		DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
>  		return -ENOMEM;
> +	}
> 	/*
>  	 * Even though both sending GuC action, and adding a new workitem to
> @@ -109,6 +112,8 @@ int intel_guc_init_wq(struct intel_guc *guc)
>  							  WQ_HIGHPRI);
>  		if (!guc->preempt_wq) {
>  			destroy_workqueue(guc->log.runtime.flush_wq);
> +			DRM_ERROR("Couldn't allocate workqueue for GuC "
> +				  "preemption\n");
>  			return -ENOMEM;
>  		}
>  	}

... <snip>


> static void capture_logs_work(struct work_struct *work)
> @@ -363,12 +377,12 @@ static int guc_log_runtime_create(struct intel_guc  
> *guc)
>  {
>  	struct drm_i915_private *dev_priv = guc_to_i915(guc);
>  	void *vaddr;
> -	struct rchan *guc_log_relay_chan;
> -	size_t n_subbufs, subbuf_size;
>  	int ret;
> 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +	GEM_BUG_ON(!guc_log_has_relay(guc));
> +

Do we need this line?

>  	GEM_BUG_ON(guc_log_has_runtime(guc));
> 	ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
> @@ -387,8 +401,40 @@ static int guc_log_runtime_create(struct intel_guc  
> *guc)
> 	guc->log.runtime.buf_addr = vaddr;
> +	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);

I'm not sure about other BKMs, but I prefer to not delay such  
initialization
and perform them in functions like init_early

> +
> +	return 0;
> +}
> +
> +static void guc_log_runtime_destroy(struct intel_guc *guc)
> +{
> +	/*
> +	 * It's possible that the runtime stuff was never allocated because
> +	 * GuC log was disabled at the boot time.
> +	 **/

Is this correct comment style ?

> +	if (!guc_log_has_runtime(guc))
> +		return;
> +
> +	i915_gem_object_unpin_map(guc->log.vma->obj);
> +	guc->log.runtime.buf_addr = NULL;
> +}
> +

... <snip>

> @@ -605,7 +681,11 @@ int i915_guc_log_control(struct drm_i915_private  
> *dev_priv, u64 control_val)
>  		}
> 		/* GuC logging is currently the only user of Guc2Host interrupts */
> +		mutex_lock(&dev_priv->drm.struct_mutex);
> +		intel_runtime_pm_get(dev_priv);
>  		gen9_enable_guc_interrupts(dev_priv);
> +		intel_runtime_pm_put(dev_priv);
> +		mutex_unlock(&dev_priv->drm.struct_mutex);

Maybe pm_get/lock/xxx/unlock/pm_put would be better order ?

... <snip>

> diff --git a/drivers/gpu/drm/i915/intel_uc.c  
> b/drivers/gpu/drm/i915/intel_uc.c
> index f78a17b..b119e94 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -236,28 +236,49 @@ static void guc_disable_communication(struct  
> intel_guc *guc)
>  	guc->send = intel_guc_send_nop;
>  }
> -int intel_uc_init_wq(struct drm_i915_private *dev_priv)
> +int intel_uc_init_misc(struct drm_i915_private *dev_priv)
>  {
> +	struct intel_guc *guc = &dev_priv->guc;
>  	int ret;
> 	if (!USES_GUC(dev_priv))
>  		return 0;
> -	ret = intel_guc_init_wq(&dev_priv->guc);
> +	ret = intel_guc_init_wq(guc);
>  	if (ret) {
>  		DRM_ERROR("Couldn't allocate workqueues for GuC\n");
> -		return ret;
> +		goto err;
> +	}
> +

Hmm, maybe below code

> +	mutex_lock(&guc->log.runtime.relay_lock);
> +	ret = intel_guc_log_relay_create(guc);
> +	mutex_unlock(&guc->log.runtime.relay_lock);
> +
> +	if (ret) {
> +		DRM_ERROR("Couldn't allocate relay for GuC log\n");
> +		goto err_relay;

can be done in intel_guc_log.c as kind of init function ?

>  	}
> 	return 0;
> +
> +err_relay:
> +	intel_guc_fini_wq(guc);
> +err:
> +	return ret;
>  }
> -void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
> +void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
>  {
> +	struct intel_guc *guc = &dev_priv->guc;
> +
>  	if (!USES_GUC(dev_priv))
>  		return;
> -	intel_guc_fini_wq(&dev_priv->guc);
> +	intel_guc_fini_wq(guc);
> +
> +	mutex_lock(&guc->log.runtime.relay_lock);
> +	intel_guc_log_relay_destroy(guc);
> +	mutex_unlock(&guc->log.runtime.relay_lock);

Maybe lock/unlock can be done inside intel_guc_log_relay_destroy ?
same with intel_guc_log_relay_create.

>  }
> int intel_uc_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_uc.h  
> b/drivers/gpu/drm/i915/intel_uc.h
> index 8a72497..f2984e0 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -33,8 +33,8 @@
>  void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
>  void intel_uc_init_fw(struct drm_i915_private *dev_priv);
>  void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
> -int intel_uc_init_wq(struct drm_i915_private *dev_priv);
> -void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
> +int intel_uc_init_misc(struct drm_i915_private *dev_priv);
> +void intel_uc_fini_misc(struct drm_i915_private *dev_priv);
>  int intel_uc_init_hw(struct drm_i915_private *dev_priv);
>  void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
>  int intel_uc_init(struct drm_i915_private *dev_priv);


More information about the Intel-gfx mailing list