[Intel-gfx] [PATCH v3 3/6] drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex

Sagar Arun Kamble sagar.a.kamble at intel.com
Wed Jan 24 10:52:28 UTC 2018



On 1/24/2018 3:48 PM, Chris Wilson wrote:
> Quoting Sagar Arun Kamble (2018-01-24 04:09:09)
>> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
>> index 8f2da30..35de889 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_log.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
>> @@ -153,6 +153,8 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
>>          if (!i915_modparams.guc_log_level)
>>                  return 0;
>>   
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>>          /* For now create the log file in /sys/kernel/debug/dri/0 dir */
>>          log_dir = dev_priv->drm.primary->debugfs_root;
>>   
>> @@ -169,16 +171,26 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
>>           */
>>          if (!log_dir) {
>>                  DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
>> -               return -ENODEV;
>> +               ret = -ENODEV;
>> +               goto out_unlock;
>>          }
>>   
>>          ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
>>          if (ret < 0 && ret != -EEXIST) {
>>                  DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
>> -               return ret;
>> +               goto out_unlock;
>>          }
>>   
>> -       return 0;
>> +out_unlock:
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>> +       return ret;
>> +}
>> +
>> +static bool guc_log_has_relay(struct intel_guc *guc)
>> +{
>> +       lockdep_assert_held(&guc->log.runtime.relay_lock);
>> +
>> +       return guc->log.runtime.relay_chan != NULL;
>>   }
>>   
>>   static void guc_move_to_next_buf(struct intel_guc *guc)
>> @@ -188,6 +200,9 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
>>           */
>>          smp_wmb();
>>   
>> +       if (!guc_log_has_relay(guc))
>> +               return;
>> +
>>          /* All data has been written, so now move the offset of sub buffer. */
>>          relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);
>>   
>> @@ -197,7 +212,7 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
>>   
>>   static void *guc_get_write_buffer(struct intel_guc *guc)
>>   {
>> -       if (!guc->log.runtime.relay_chan)
>> +       if (!guc_log_has_relay(guc))
>>                  return NULL;
>>   
>>          /* Just get the base address of a new sub buffer and copy data into it
>> @@ -265,6 +280,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>>          /* Get the pointer to shared GuC log buffer */
>>          log_buf_state = src_data = guc->log.runtime.buf_addr;
>>   
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>>          /* Get the pointer to local buffer to store the logs */
>>          log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);
> Hmm. The locking here tells me that we are being careful in case the
> relay_chan disappears, but we don't handle the NULL pointer here.
>   
There is check for log_bug_snapshot_state below in for loop. But yes, we 
should return from here.
Will update.
>> @@ -344,6 +361,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>>                  DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
>>                  guc->log.capture_miss_count++;
>>          }
>> +
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>>   }
>>   
>>   static void capture_logs_work(struct work_struct *work)
>> @@ -363,8 +382,6 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>>   {
>>          struct drm_i915_private *dev_priv = guc_to_i915(guc);
>>          void *vaddr;
>> -       struct rchan *guc_log_relay_chan;
>> -       size_t n_subbufs, subbuf_size;
>>          int ret;
>>   
>>          lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> @@ -387,8 +404,44 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>>   
>>          guc->log.runtime.buf_addr = vaddr;
>>   
>> +       return 0;
>> +}
>> +
>> +static void guc_log_runtime_destroy(struct intel_guc *guc)
>> +{
>> +       /*
>> +        * It's possible that the runtime stuff was never allocated because
>> +        * GuC log was disabled at the boot time.
>> +        **/
>> +       if (!guc_log_has_runtime(guc))
>> +               return;
>> +
>> +       i915_gem_object_unpin_map(guc->log.vma->obj);
>> +       guc->log.runtime.buf_addr = NULL;
>> +}
>> +
>> +void intel_guc_log_init_early(struct intel_guc *guc)
>> +{
>> +       mutex_init(&guc->log.runtime.relay_lock);
>> +       INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
>> +}
>> +
>> +int intel_guc_log_relay_create(struct intel_guc *guc)
>> +{
>> +       struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> +       struct rchan *guc_log_relay_chan;
>> +       size_t n_subbufs, subbuf_size;
>> +       int ret;
>> +
>> +       if (!i915_modparams.guc_log_level)
>> +               return 0;
>> +
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>> +       GEM_BUG_ON(guc_log_has_relay(guc));
>> +
>>           /* Keep the size of sub buffers same as shared log buffer */
>> -       subbuf_size = guc->log.vma->obj->base.size;
>> +       subbuf_size = GUC_LOG_SIZE;
>>   
>>          /* Store up to 8 snapshots, which is large enough to buffer sufficient
>>           * boot time logs and provides enough leeway to User, in terms of
>> @@ -407,33 +460,39 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>>                  DRM_ERROR("Couldn't create relay chan for GuC logging\n");
>>   
>>                  ret = -ENOMEM;
>> -               goto err_vaddr;
>> +               goto err;
>>          }
>>   
>>          GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
>>          guc->log.runtime.relay_chan = guc_log_relay_chan;
>>   
>> -       INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>> +
>>          return 0;
>>   
>> -err_vaddr:
>> -       i915_gem_object_unpin_map(guc->log.vma->obj);
>> -       guc->log.runtime.buf_addr = NULL;
>> +err:
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>> +       /* logging will be off */
>> +       i915_modparams.guc_log_level = 0;
>>          return ret;
>>   }
>>   
>> -static void guc_log_runtime_destroy(struct intel_guc *guc)
>> +void intel_guc_log_relay_destroy(struct intel_guc *guc)
>>   {
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>>          /*
>> -        * It's possible that the runtime stuff was never allocated because
>> +        * It's possible that the relay was never allocated because
>>           * GuC log was disabled at the boot time.
>>           */
>> -       if (!guc_log_has_runtime(guc))
>> -               return;
>> +       if (!guc_log_has_relay(guc))
>> +               goto out_unlock;
>>   
>>          relay_close(guc->log.runtime.relay_chan);
>> -       i915_gem_object_unpin_map(guc->log.vma->obj);
>> -       guc->log.runtime.buf_addr = NULL;
>> +       guc->log.runtime.relay_chan = NULL;
>> +
>> +out_unlock:
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>>   }
>>   
>>   static int guc_log_late_setup(struct intel_guc *guc)
>> @@ -441,17 +500,24 @@ static int guc_log_late_setup(struct intel_guc *guc)
>>          struct drm_i915_private *dev_priv = guc_to_i915(guc);
>>          int ret;
>>   
>> -       lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> -
>>          if (!guc_log_has_runtime(guc)) {
>>                  /*
>>                   * If log was disabled at boot time, then setup needed to handle
>>                   * log buffer flush interrupts would not have been done yet, so
>>                   * do that now.
>>                   */
>> -               ret = guc_log_runtime_create(guc);
>> +               ret = intel_guc_log_relay_create(guc);
>>                  if (ret)
>>                          goto err;
>> +
>> +               mutex_lock(&dev_priv->drm.struct_mutex);
>> +               intel_runtime_pm_get(dev_priv);
>> +               ret = guc_log_runtime_create(guc);
>> +               intel_runtime_pm_put(dev_priv);
>> +               mutex_unlock(&dev_priv->drm.struct_mutex);
>> +
>> +               if (ret)
>> +                       goto err_relay;
>>          }
>>   
>>          ret = guc_log_relay_file_create(guc);
>> @@ -461,7 +527,11 @@ static int guc_log_late_setup(struct intel_guc *guc)
>>          return 0;
>>   
>>   err_runtime:
>> +       mutex_lock(&dev_priv->drm.struct_mutex);
>>          guc_log_runtime_destroy(guc);
>> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>> +err_relay:
>> +       intel_guc_log_relay_destroy(guc);
>>   err:
>>          /* logging will remain off */
>>          i915_modparams.guc_log_level = 0;
>> @@ -490,7 +560,11 @@ static void guc_flush_logs(struct intel_guc *guc)
>>                  return;
>>   
>>          /* First disable the interrupts, will be renabled afterwards */
>> +       mutex_lock(&dev_priv->drm.struct_mutex);
>> +       intel_runtime_pm_get(dev_priv);
>>          gen9_disable_guc_interrupts(dev_priv);
>> +       intel_runtime_pm_put(dev_priv);
>> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>>   
>>          /* Before initiating the forceful flush, wait for any pending/ongoing
>>           * flush to complete otherwise forceful flush may not actually happen.
>> @@ -498,7 +572,9 @@ static void guc_flush_logs(struct intel_guc *guc)
>>          flush_work(&guc->log.runtime.flush_work);
>>   
>>          /* Ask GuC to update the log buffer state */
>> +       intel_runtime_pm_get(dev_priv);
>>          guc_log_flush(guc);
>> +       intel_runtime_pm_put(dev_priv);
>>   
>>          /* GuC would have updated log buffer by now, so capture it */
>>          guc_log_capture_logs(guc);
>> @@ -509,17 +585,10 @@ int intel_guc_log_create(struct intel_guc *guc)
>>          struct i915_vma *vma;
>>          unsigned long offset;
>>          u32 flags;
>> -       u32 size;
>>          int ret;
>>   
>>          GEM_BUG_ON(guc->log.vma);
>>   
>> -       /* The first page is to save log buffer state. Allocate one
>> -        * extra page for others in case for overlap */
>> -       size = (1 + GUC_LOG_DPC_PAGES + 1 +
>> -               GUC_LOG_ISR_PAGES + 1 +
>> -               GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
>> -
>>          /* We require SSE 4.1 for fast reads from the GuC log buffer and
>>           * it should be present on the chipsets supporting GuC based
>>           * submisssions.
>> @@ -529,7 +598,7 @@ int intel_guc_log_create(struct intel_guc *guc)
>>                  goto err;
>>          }
>>   
>> -       vma = intel_guc_allocate_vma(guc, size);
>> +       vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
>>          if (IS_ERR(vma)) {
>>                  ret = PTR_ERR(vma);
>>                  goto err;
>> @@ -584,7 +653,15 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
>>                  return 0;
>>   
>>          verbosity = enable_logging ? control_val - 1 : 0;
>> +
>> +       ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
>> +       if (ret)
>> +               return ret;
>> +       intel_runtime_pm_get(dev_priv);
>>          ret = guc_log_control(guc, enable_logging, verbosity);
>> +       intel_runtime_pm_put(dev_priv);
>> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>> +
>>          if (ret < 0) {
>>                  DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
>>                  return ret;
>> @@ -605,7 +682,11 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
>>                  }
>>   
>>                  /* GuC logging is currently the only user of Guc2Host interrupts */
>> +               mutex_lock(&dev_priv->drm.struct_mutex);
>> +               intel_runtime_pm_get(dev_priv);
>>                  gen9_enable_guc_interrupts(dev_priv);
>> +               intel_runtime_pm_put(dev_priv);
>> +               mutex_unlock(&dev_priv->drm.struct_mutex);
>>          } else {
>>                  /*
>>                   * Once logging is disabled, GuC won't generate logs & send an
>> @@ -627,13 +708,13 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
>>          if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
>>                  return;
>>   
>> -       mutex_lock(&dev_priv->drm.struct_mutex);
>>          guc_log_late_setup(&dev_priv->guc);
>> -       mutex_unlock(&dev_priv->drm.struct_mutex);
>>   }
>>   
>>   void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>>   {
>> +       struct intel_guc *guc = &dev_priv->guc;
>> +
>>          if (!USES_GUC_SUBMISSION(dev_priv))
>>                  return;
>>   
>> @@ -643,6 +724,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>>          gen9_disable_guc_interrupts(dev_priv);
>>          intel_runtime_pm_put(dev_priv);
>>   
>> -       guc_log_runtime_destroy(&dev_priv->guc);
>> +       guc_log_runtime_destroy(guc);
>>          mutex_unlock(&dev_priv->drm.struct_mutex);
>> +
>> +       intel_guc_log_relay_destroy(guc);
>>   }
> This looks all reasonably well described by the addition of the
> relay_lock and the interactions look fine. The only mistake I could see,
> in the story told by this patch, was the runtime checking.
Could you please elaborate more on this.
> -Chris

-- 
Thanks,
Sagar



More information about the Intel-gfx mailing list