[Intel-gfx] [PATCH] drm/i915/perf: fix perf stream opening lock
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Mar 1 10:30:30 UTC 2018
On 28/02/18 18:10, Matthew Auld wrote:
> On 28 February 2018 at 11:45, Lionel Landwerlin
> <lionel.g.landwerlin at intel.com> wrote:
>> We're seeing on CI that some contexts don't have the programmed OA
>> period timer that directs the OA unit on how often to write reports.
>>
>> The issue is that we're not holding the drm lock from when we edit the
>> context images down to when we set the exclusive_stream variable. This
>> leaves a window for the deferred context allocation to call
>> i915_oa_init_reg_state() that will not program the expected OA timer
>> value, because we haven't set the exclusive_stream yet.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Fixes: 701f8231a2f ("drm/i915/perf: prune OA configs")
>> Cc: <stable at vger.kernel.org> # v4.14+
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102254
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103715
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103755
>> ---
>> drivers/gpu/drm/i915/i915_perf.c | 41 +++++++++++++++++++---------------------
>> 1 file changed, 19 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 2741b1bc7095..7016abfc8ba9 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -1757,21 +1757,16 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
>> */
>> static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>> const struct i915_oa_config *oa_config,
>> - bool interruptible)
>> + bool need_lock)
>> {
>> struct i915_gem_context *ctx;
>> int ret;
>> unsigned int wait_flags = I915_WAIT_LOCKED;
>>
>> - if (interruptible) {
>> - ret = i915_mutex_lock_interruptible(&dev_priv->drm);
>> - if (ret)
>> - return ret;
>> -
>> - wait_flags |= I915_WAIT_INTERRUPTIBLE;
>> - } else {
>> + if (need_lock)
>> mutex_lock(&dev_priv->drm.struct_mutex);
>> - }
>> +
>> + lockdep_assert_held(&dev_priv->drm.struct_mutex);
>>
>> /* Switch away from any user context. */
>> ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
>> @@ -1819,7 +1814,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>> }
>>
>> out:
>> - mutex_unlock(&dev_priv->drm.struct_mutex);
>> + if (need_lock)
>> + mutex_unlock(&dev_priv->drm.struct_mutex);
>>
>> return ret;
>> }
>> @@ -1863,7 +1859,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>> * to make sure all slices/subslices are ON before writing to NOA
>> * registers.
>> */
>> - ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
>> + ret = gen8_configure_all_contexts(dev_priv, oa_config, false);
>> if (ret)
>> return ret;
>>
>> @@ -1878,7 +1874,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>> static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>> {
>> /* Reset all contexts' slices/subslices configurations. */
>> - gen8_configure_all_contexts(dev_priv, NULL, false);
>> + gen8_configure_all_contexts(dev_priv, NULL, true);
> Maybe move the ops.disable_metric_set() to also be within the lock, so
> need_lock=false, then we can get rid of need_lock?
Yeah, sounds like a nice cleanup.
>
>> I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
>> ~GT_NOA_ENABLE));
>> @@ -1888,7 +1884,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>> static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
>> {
>> /* Reset all contexts' slices/subslices configurations. */
>> - gen8_configure_all_contexts(dev_priv, NULL, false);
>> + gen8_configure_all_contexts(dev_priv, NULL, true);
>>
>> /* Make sure we disable noa to save power. */
>> I915_WRITE(RPM_CONFIG1,
>> @@ -2138,13 +2134,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>> if (ret)
>> goto err_oa_buf_alloc;
>>
>> - ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
>> - stream->oa_config);
>> - if (ret)
>> - goto err_enable;
>> -
>> - stream->ops = &i915_oa_stream_ops;
>> -
>> /* Lock device for exclusive_stream access late because
>> * enable_metric_set() might lock as well on gen8+.
>> */
> Ok, we can get rid of this comment now.
Thanks, removing.
>
>> @@ -2152,16 +2141,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>> if (ret)
>> goto err_lock;
>>
>> + ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
>> + stream->oa_config);
>> + if (ret)
>> + goto err_enable;
>> +
>> + stream->ops = &i915_oa_stream_ops;
>> +
>> dev_priv->perf.oa.exclusive_stream = stream;
>>
>> mutex_unlock(&dev_priv->drm.struct_mutex);
>>
>> return 0;
>>
>> -err_lock:
>> +err_enable:
>> + mutex_unlock(&dev_priv->drm.struct_mutex);
>> dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
> We can drop the disable_metric_set here; no need to disable it if we
> never enabled it.
Well in this case we did, we edited all the context images.
Better cleanup out changes.
>
> Nice catch,
> Reviewed-by: Matthew Auld <matthew.auld at intel.com>
>
More information about the Intel-gfx
mailing list