[Intel-gfx] [PATCH] drm/i915/perf: fix perf stream opening lock
Matthew Auld
matthew.william.auld at gmail.com
Wed Feb 28 18:10:50 UTC 2018
On 28 February 2018 at 11:45, Lionel Landwerlin
<lionel.g.landwerlin at intel.com> wrote:
> We're seeing on CI that some contexts don't have the programmed OA
> period timer that directs the OA unit on how often to write reports.
>
> The issue is that we're not holding the drm lock from when we edit the
> context images down to when we set the exclusive_stream variable. This
> leaves a window for the deferred context allocation to call
> i915_oa_init_reg_state() that will not program the expected OA timer
> value, because we haven't set the exclusive_stream yet.
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Fixes: 701f8231a2f ("drm/i915/perf: prune OA configs")
> Cc: <stable at vger.kernel.org> # v4.14+
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102254
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103715
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103755
> ---
> drivers/gpu/drm/i915/i915_perf.c | 41 +++++++++++++++++++---------------------
> 1 file changed, 19 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 2741b1bc7095..7016abfc8ba9 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1757,21 +1757,16 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
> */
> static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> const struct i915_oa_config *oa_config,
> - bool interruptible)
> + bool need_lock)
> {
> struct i915_gem_context *ctx;
> int ret;
> unsigned int wait_flags = I915_WAIT_LOCKED;
>
> - if (interruptible) {
> - ret = i915_mutex_lock_interruptible(&dev_priv->drm);
> - if (ret)
> - return ret;
> -
> - wait_flags |= I915_WAIT_INTERRUPTIBLE;
> - } else {
> + if (need_lock)
> mutex_lock(&dev_priv->drm.struct_mutex);
> - }
> +
> + lockdep_assert_held(&dev_priv->drm.struct_mutex);
>
> /* Switch away from any user context. */
> ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
> @@ -1819,7 +1814,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> }
>
> out:
> - mutex_unlock(&dev_priv->drm.struct_mutex);
> + if (need_lock)
> + mutex_unlock(&dev_priv->drm.struct_mutex);
>
> return ret;
> }
> @@ -1863,7 +1859,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
> * to make sure all slices/subslices are ON before writing to NOA
> * registers.
> */
> - ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
> + ret = gen8_configure_all_contexts(dev_priv, oa_config, false);
> if (ret)
> return ret;
>
> @@ -1878,7 +1874,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
> static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
> {
> /* Reset all contexts' slices/subslices configurations. */
> - gen8_configure_all_contexts(dev_priv, NULL, false);
> + gen8_configure_all_contexts(dev_priv, NULL, true);
Maybe move the ops.disable_metric_set() to also be within the lock, so
need_lock=false, then we can get rid of need_lock?
>
> I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
> ~GT_NOA_ENABLE));
> @@ -1888,7 +1884,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
> static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
> {
> /* Reset all contexts' slices/subslices configurations. */
> - gen8_configure_all_contexts(dev_priv, NULL, false);
> + gen8_configure_all_contexts(dev_priv, NULL, true);
>
> /* Make sure we disable noa to save power. */
> I915_WRITE(RPM_CONFIG1,
> @@ -2138,13 +2134,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> if (ret)
> goto err_oa_buf_alloc;
>
> - ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
> - stream->oa_config);
> - if (ret)
> - goto err_enable;
> -
> - stream->ops = &i915_oa_stream_ops;
> -
> /* Lock device for exclusive_stream access late because
> * enable_metric_set() might lock as well on gen8+.
> */
Ok, we can get rid of this comment now.
> @@ -2152,16 +2141,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
> if (ret)
> goto err_lock;
>
> + ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
> + stream->oa_config);
> + if (ret)
> + goto err_enable;
> +
> + stream->ops = &i915_oa_stream_ops;
> +
> dev_priv->perf.oa.exclusive_stream = stream;
>
> mutex_unlock(&dev_priv->drm.struct_mutex);
>
> return 0;
>
> -err_lock:
> +err_enable:
> + mutex_unlock(&dev_priv->drm.struct_mutex);
> dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
We can drop the disable_metric_set here; no need to disable it if we
never enabled it.
Nice catch,
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
More information about the Intel-gfx
mailing list