[Intel-gfx] [PATCH] drm/i915/perf: fix perf stream opening lock

Lionel Landwerlin lionel.g.landwerlin at intel.com
Thu Mar 1 10:30:30 UTC 2018


On 28/02/18 18:10, Matthew Auld wrote:
> On 28 February 2018 at 11:45, Lionel Landwerlin
> <lionel.g.landwerlin at intel.com> wrote:
>> We're seeing on CI that some contexts don't have the programmed OA
>> period timer that directs the OA unit on how often to write reports.
>>
>> The issue is that we're not holding the drm lock from when we edit the
>> context images down to when we set the exclusive_stream variable. This
>> leaves a window for the deferred context allocation to call
>> i915_oa_init_reg_state() that will not program the expected OA timer
>> value, because we haven't set the exclusive_stream yet.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Fixes: 701f8231a2f ("drm/i915/perf: prune OA configs")
>> Cc: <stable at vger.kernel.org> # v4.14+
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102254
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103715
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103755
>> ---
>>   drivers/gpu/drm/i915/i915_perf.c | 41 +++++++++++++++++++---------------------
>>   1 file changed, 19 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 2741b1bc7095..7016abfc8ba9 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -1757,21 +1757,16 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
>>    */
>>   static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>>                                         const struct i915_oa_config *oa_config,
>> -                                      bool interruptible)
>> +                                      bool need_lock)
>>   {
>>          struct i915_gem_context *ctx;
>>          int ret;
>>          unsigned int wait_flags = I915_WAIT_LOCKED;
>>
>> -       if (interruptible) {
>> -               ret = i915_mutex_lock_interruptible(&dev_priv->drm);
>> -               if (ret)
>> -                       return ret;
>> -
>> -               wait_flags |= I915_WAIT_INTERRUPTIBLE;
>> -       } else {
>> +       if (need_lock)
>>                  mutex_lock(&dev_priv->drm.struct_mutex);
>> -       }
>> +
>> +       lockdep_assert_held(&dev_priv->drm.struct_mutex);
>>
>>          /* Switch away from any user context. */
>>          ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
>> @@ -1819,7 +1814,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>>          }
>>
>>    out:
>> -       mutex_unlock(&dev_priv->drm.struct_mutex);
>> +       if (need_lock)
>> +               mutex_unlock(&dev_priv->drm.struct_mutex);
>>
>>          return ret;
>>   }
>> @@ -1863,7 +1859,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>>           * to make sure all slices/subslices are ON before writing to NOA
>>           * registers.
>>           */
>> -       ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
>> +       ret = gen8_configure_all_contexts(dev_priv, oa_config, false);
>>          if (ret)
>>                  return ret;
>>
>> @@ -1878,7 +1874,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>>   static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>>   {
>>          /* Reset all contexts' slices/subslices configurations. */
>> -       gen8_configure_all_contexts(dev_priv, NULL, false);
>> +       gen8_configure_all_contexts(dev_priv, NULL, true);
> Maybe move the ops.disable_metric_set() to also be within the lock, so
> need_lock=false, then we can get rid of need_lock?

Yeah, sounds like a nice cleanup.

>
>>          I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
>>                                        ~GT_NOA_ENABLE));
>> @@ -1888,7 +1884,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>>   static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
>>   {
>>          /* Reset all contexts' slices/subslices configurations. */
>> -       gen8_configure_all_contexts(dev_priv, NULL, false);
>> +       gen8_configure_all_contexts(dev_priv, NULL, true);
>>
>>          /* Make sure we disable noa to save power. */
>>          I915_WRITE(RPM_CONFIG1,
>> @@ -2138,13 +2134,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>          if (ret)
>>                  goto err_oa_buf_alloc;
>>
>> -       ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
>> -                                                     stream->oa_config);
>> -       if (ret)
>> -               goto err_enable;
>> -
>> -       stream->ops = &i915_oa_stream_ops;
>> -
>>          /* Lock device for exclusive_stream access late because
>>           * enable_metric_set() might lock as well on gen8+.
>>           */
> Ok, we can get rid of this comment now.

Thanks, removing.

>
>> @@ -2152,16 +2141,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>          if (ret)
>>                  goto err_lock;
>>
>> +       ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
>> +                                                     stream->oa_config);
>> +       if (ret)
>> +               goto err_enable;
>> +
>> +       stream->ops = &i915_oa_stream_ops;
>> +
>>          dev_priv->perf.oa.exclusive_stream = stream;
>>
>>          mutex_unlock(&dev_priv->drm.struct_mutex);
>>
>>          return 0;
>>
>> -err_lock:
>> +err_enable:
>> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>>          dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
> We can drop the disable_metric_set here; no need to disable it if we
> never enabled it.

Well in this case we did, we edited all the context images.
Better cleanup out changes.

>
> Nice catch,
> Reviewed-by: Matthew Auld <matthew.auld at intel.com>
>



More information about the Intel-gfx mailing list