[Intel-gfx] [PATCH v2 1/4] drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex
Michal Wajdeczko
michal.wajdeczko at intel.com
Tue Jan 23 16:44:33 UTC 2018
On Tue, 23 Jan 2018 16:42:17 +0100, Sagar Arun Kamble
<sagar.a.kamble at intel.com> wrote:
> This patch fixes lockdep issue due to circular locking dependency of
> struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex.
> For GuC log relay channel we create debugfs file that requires
> i_mutex_key
> lock and we are doing that under struct_mutex. So we introduced newer
> dependency as:
> &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
> However, there is dependency from mmap_sem to struct_mutex. Hence we
> separate the relay create/destroy operation from under struct_mutex.
>
... <snip>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 80dc679..b45be0d 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2467,7 +2467,6 @@ static int i915_guc_log_control_get(void *data,
> u64 *val)
> static int i915_guc_log_control_set(void *data, u64 val)
> {
> struct drm_i915_private *dev_priv = data;
> - int ret;
> if (!HAS_GUC(dev_priv))
> return -ENODEV;
> @@ -2475,16 +2474,7 @@ static int i915_guc_log_control_set(void *data,
> u64 val)
> if (!dev_priv->guc.log.vma)
> return -EINVAL;
> - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
> - if (ret)
> - return ret;
> -
> - intel_runtime_pm_get(dev_priv);
> - ret = i915_guc_log_control(dev_priv, val);
> - intel_runtime_pm_put(dev_priv);
> -
> - mutex_unlock(&dev_priv->drm.struct_mutex);
> - return ret;
> + return i915_guc_log_control(dev_priv, val);
I hope that one day we change signature of this function to
int intel_guc_log_control(struct intel_guc *guc, u64 control);
... <snip>
> diff --git a/drivers/gpu/drm/i915/intel_guc.c
> b/drivers/gpu/drm/i915/intel_guc.c
> index ea30e7c..cab3c98 100644
> --- a/drivers/gpu/drm/i915/intel_guc.c
> +++ b/drivers/gpu/drm/i915/intel_guc.c
> @@ -66,6 +66,7 @@ void intel_guc_init_early(struct intel_guc *guc)
> intel_guc_ct_init_early(&guc->ct);
> mutex_init(&guc->send_mutex);
> + mutex_init(&guc->log.runtime.relay_lock);
Maybe this can be done in intel_guc_loc.c (or .h) as
void intel_guc_log_init_early() { }
> guc->send = intel_guc_send_nop;
> guc->notify = gen8_guc_raise_irq;
> }
> @@ -87,8 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc)
> */
> guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
> WQ_HIGHPRI | WQ_FREEZABLE);
> - if (!guc->log.runtime.flush_wq)
> + if (!guc->log.runtime.flush_wq) {
> + DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
> return -ENOMEM;
> + }
> /*
> * Even though both sending GuC action, and adding a new workitem to
> @@ -109,6 +112,8 @@ int intel_guc_init_wq(struct intel_guc *guc)
> WQ_HIGHPRI);
> if (!guc->preempt_wq) {
> destroy_workqueue(guc->log.runtime.flush_wq);
> + DRM_ERROR("Couldn't allocate workqueue for GuC "
> + "preemption\n");
> return -ENOMEM;
> }
> }
... <snip>
> static void capture_logs_work(struct work_struct *work)
> @@ -363,12 +377,12 @@ static int guc_log_runtime_create(struct intel_guc
> *guc)
> {
> struct drm_i915_private *dev_priv = guc_to_i915(guc);
> void *vaddr;
> - struct rchan *guc_log_relay_chan;
> - size_t n_subbufs, subbuf_size;
> int ret;
> lockdep_assert_held(&dev_priv->drm.struct_mutex);
> + GEM_BUG_ON(!guc_log_has_relay(guc));
> +
Do we need this line?
> GEM_BUG_ON(guc_log_has_runtime(guc));
> ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
> @@ -387,8 +401,40 @@ static int guc_log_runtime_create(struct intel_guc
> *guc)
> guc->log.runtime.buf_addr = vaddr;
> + INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
I'm not sure about other BKMs, but I prefer to not delay such
initialization
and perform them in functions like init_early
> +
> + return 0;
> +}
> +
> +static void guc_log_runtime_destroy(struct intel_guc *guc)
> +{
> + /*
> + * It's possible that the runtime stuff was never allocated because
> + * GuC log was disabled at the boot time.
> + **/
Is this correct comment style ?
> + if (!guc_log_has_runtime(guc))
> + return;
> +
> + i915_gem_object_unpin_map(guc->log.vma->obj);
> + guc->log.runtime.buf_addr = NULL;
> +}
> +
... <snip>
> @@ -605,7 +681,11 @@ int i915_guc_log_control(struct drm_i915_private
> *dev_priv, u64 control_val)
> }
> /* GuC logging is currently the only user of Guc2Host interrupts */
> + mutex_lock(&dev_priv->drm.struct_mutex);
> + intel_runtime_pm_get(dev_priv);
> gen9_enable_guc_interrupts(dev_priv);
> + intel_runtime_pm_put(dev_priv);
> + mutex_unlock(&dev_priv->drm.struct_mutex);
Maybe pm_get/lock/xxx/unlock/pm_put would be better order ?
... <snip>
> diff --git a/drivers/gpu/drm/i915/intel_uc.c
> b/drivers/gpu/drm/i915/intel_uc.c
> index f78a17b..b119e94 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -236,28 +236,49 @@ static void guc_disable_communication(struct
> intel_guc *guc)
> guc->send = intel_guc_send_nop;
> }
> -int intel_uc_init_wq(struct drm_i915_private *dev_priv)
> +int intel_uc_init_misc(struct drm_i915_private *dev_priv)
> {
> + struct intel_guc *guc = &dev_priv->guc;
> int ret;
> if (!USES_GUC(dev_priv))
> return 0;
> - ret = intel_guc_init_wq(&dev_priv->guc);
> + ret = intel_guc_init_wq(guc);
> if (ret) {
> DRM_ERROR("Couldn't allocate workqueues for GuC\n");
> - return ret;
> + goto err;
> + }
> +
Hmm, maybe below code
> + mutex_lock(&guc->log.runtime.relay_lock);
> + ret = intel_guc_log_relay_create(guc);
> + mutex_unlock(&guc->log.runtime.relay_lock);
> +
> + if (ret) {
> + DRM_ERROR("Couldn't allocate relay for GuC log\n");
> + goto err_relay;
can be done in intel_guc_log.c as kind of init function ?
> }
> return 0;
> +
> +err_relay:
> + intel_guc_fini_wq(guc);
> +err:
> + return ret;
> }
> -void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
> +void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
> {
> + struct intel_guc *guc = &dev_priv->guc;
> +
> if (!USES_GUC(dev_priv))
> return;
> - intel_guc_fini_wq(&dev_priv->guc);
> + intel_guc_fini_wq(guc);
> +
> + mutex_lock(&guc->log.runtime.relay_lock);
> + intel_guc_log_relay_destroy(guc);
> + mutex_unlock(&guc->log.runtime.relay_lock);
Maybe lock/unlock can be done inside intel_guc_log_relay_destroy ?
same with intel_guc_log_relay_create.
> }
> int intel_uc_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_uc.h
> b/drivers/gpu/drm/i915/intel_uc.h
> index 8a72497..f2984e0 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -33,8 +33,8 @@
> void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
> void intel_uc_init_fw(struct drm_i915_private *dev_priv);
> void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
> -int intel_uc_init_wq(struct drm_i915_private *dev_priv);
> -void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
> +int intel_uc_init_misc(struct drm_i915_private *dev_priv);
> +void intel_uc_fini_misc(struct drm_i915_private *dev_priv);
> int intel_uc_init_hw(struct drm_i915_private *dev_priv);
> void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
> int intel_uc_init(struct drm_i915_private *dev_priv);
More information about the Intel-gfx
mailing list