[Intel-gfx] [v4, 1/6] drm/i915/skl: Add support for the SAGV, fix underrun hangs

Tue Aug 2 11:16:09 UTC 2016

Hi,

On 26-07-16 19:34, cpaul at redhat.com wrote:
> Since the watermark calculations for Skylake are still broken, we're apt
> to hitting underruns very easily under multi-monitor configurations.
> While it would be lovely if this was fixed, it's not. Another problem
> that's been coming from this however, is the mysterious issue of
> underruns causing full system hangs. An easy way to reproduce this with
> a skylake system:
>
> - Get a laptop with a skylake GPU, and hook up two external monitors to
>   it
> - Move the cursor from the built-in LCD to one of the external displays
>   as quickly as you can
> - You'll get a few pipe underruns, and eventually the entire system will
>   just freeze.
>
> After doing a lot of investigation and reading through the bspec, I
> found the existence of the SAGV, which is responsible for adjusting the
> system agent voltage and clock frequencies depending on how much power
> we need. According to the bspec:
>
> "The display engine access to system memory is blocked during the
>  adjustment time. SAGV defaults to enabled. Software must use the
>  GT-driver pcode mailbox to disable SAGV when the display engine is not
>  able to tolerate the blocking time."
>
> The rest of the bspec goes on to explain that software can simply leave
> the SAGV enabled, and disable it when we use interlaced pipes/have more
> then one pipe active.
>
> Sure enough, with this patchset the system hangs resulting from pipe
> underruns on Skylake have completely vanished on my T460s. Additionally,
> the bspec mentions turning off the SAGV	with more then one pipe enabled
> as a workaround for display underruns. While this patch doesn't entirely
> fix that, it looks like it does improve the situation a little bit so
> it's likely this is going to be required to make watermarks on Skylake
> fully functional.
>
> Changes since v4:
>  - Use is_power_of_2 against active_crtcs to check whether we have > 1
>    pipe enabled
>  - Fix skl_sagv_get_hw_state(): (temp & 0x1) indicates disabled, 0x0
>    enabled
>  - Call skl_sagv_enable/disable() from pre/post-plane updates

This seems to not do what you want it to do, if I'm reading your changes
and the original code correct:

<snip>

> @@ -4589,6 +4592,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
>  		     !old_primary_state->visible))
>  			intel_post_enable_primary(&crtc->base);
>  	}
> +
> +	if (old_intel_state->modeset &&
> +	    (old_intel_state->active_crtcs == 0 ||
> +	     is_power_of_2(old_intel_state->active_crtcs)))
> +		skl_enable_sagv(dev_priv);
>  }
>

Here you are enabling the sagv if the *old* state allows it
(0 or 1 pipes active).

But judging from previous patches / the commit msg the
intent is to enable the sag if the *new* state allows
it, not the old one.

See e.g. the checks for calling intel_post_enable_primary()
which use both primary_state and old_primary_state

Also if you're going to respin you may want to switch
to using hweight as mentioned before, then you can simply
do something like (hamming_weight(active_crtcs) <= 1) as
condition to check for 0 or 1 active crtcs instead of
having 2 checks. Note please double check my logic here.

<snip>

> @@ -4649,6 +4659,15 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
>  	}
>
>  	/*
> +	 * SKL workaround: bspec recommends we disable the SAGV when we have
> +	 * more then one pipe enabled
> +	 */
> +	if (old_intel_state->modeset &&
> +	    !is_power_of_2(old_intel_state->active_crtcs) &&
> +	    old_intel_state->active_crtcs != 0)
> +		skl_disable_sagv(dev_priv);
> +
> +	/*

Same thing, you're disabling the sagv if the old state
disallows it, but I believe you should be looking at the new
state instead.

Regards,

Hans

>  	 * If we're doing a modeset, we're done.  No need to do any pre-vblank
>  	 * watermark programming here.
>  	 */
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index e74d851..113bf48 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1709,6 +1709,8 @@ void ilk_wm_get_hw_state(struct drm_device *dev);
>  void skl_wm_get_hw_state(struct drm_device *dev);
>  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
>  			  struct skl_ddb_allocation *ddb /* out */);
> +int skl_enable_sagv(struct drm_i915_private *dev_priv);
> +int skl_disable_sagv(struct drm_i915_private *dev_priv);
>  uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
>  bool ilk_disable_lp_wm(struct drm_device *dev);
>  int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 64d628c..55a9694 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2876,6 +2876,109 @@ skl_wm_plane_id(const struct intel_plane *plane)
>  }
>
>  static void
> +skl_sagv_get_hw_state(struct drm_i915_private *dev_priv)
> +{
> +	u32 temp;
> +	int ret;
> +
> +	if (IS_BROXTON(dev_priv))
> +		return;
> +
> +	mutex_lock(&dev_priv->rps.hw_lock);
> +	ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, &temp);
> +	mutex_unlock(&dev_priv->rps.hw_lock);
> +
> +	if (!ret) {
> +		dev_priv->skl_sagv_enabled = !(temp & 0x1);
> +	} else {
> +		/*
> +		 * If for some reason we can't access the SAGV state, follow
> +		 * the bspec and assume it's enabled
> +		 */
> +		DRM_ERROR("Failed to get SAGV state, assuming enabled\n");
> +		dev_priv->skl_sagv_enabled = true;
> +	}
> +}
> +
> +/*
> + * SAGV dynamically adjusts the system agent voltage and clock frequencies
> + * depending on power and performance requirements. The display engine access
> + * to system memory is blocked during the adjustment time. Having this enabled
> + * in multi-pipe configurations can cause issues (such as underruns causing
> + * full system hangs), and the bspec also suggests that software disable it
> + * when more then one pipe is enabled.
> + */
> +int
> +skl_enable_sagv(struct drm_i915_private *dev_priv)
> +{
> +	int ret;
> +
> +	if (IS_BROXTON(dev_priv))
> +		return 0;
> +	if (dev_priv->skl_sagv_enabled)
> +		return 0;
> +
> +	mutex_lock(&dev_priv->rps.hw_lock);
> +	DRM_DEBUG_KMS("Enabling the SAGV\n");
> +
> +	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
> +				      GEN9_SAGV_DYNAMIC_FREQ);
> +	if (!ret)
> +		dev_priv->skl_sagv_enabled = true;
> +	else
> +		DRM_ERROR("Failed to enable the SAGV\n");
> +
> +	/* We don't need to wait for SAGV when enabling */
> +	mutex_unlock(&dev_priv->rps.hw_lock);
> +	return ret;
> +}
> +
> +int
> +skl_disable_sagv(struct drm_i915_private *dev_priv)
> +{
> +	int ret = 0;
> +	unsigned long timeout;
> +	u32 temp;
> +
> +	if (IS_BROXTON(dev_priv))
> +		return 0;
> +	if (!dev_priv->skl_sagv_enabled)
> +		return 0;
> +
> +	mutex_lock(&dev_priv->rps.hw_lock);
> +	DRM_DEBUG_KMS("Disabling the SAGV\n");
> +
> +	/* bspec says to keep retrying for at least 1 ms */
> +	timeout = jiffies + msecs_to_jiffies(1);
> +	do {
> +		ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
> +					      GEN9_SAGV_DISABLE);
> +		if (ret) {
> +			DRM_ERROR("Failed to disable the SAGV\n");
> +			goto out;
> +		}
> +
> +		ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL,
> +					     &temp);
> +		if (ret) {
> +			DRM_ERROR("Failed to check the status of the SAGV\n");
> +			goto out;
> +		}
> +	} while (!(temp & 0x1) && time_before(jiffies, timeout));
> +
> +	if (temp & 0x1) {
> +		dev_priv->skl_sagv_enabled = false;
> +	} else {
> +		ret = -1;
> +		DRM_ERROR("Request to disable SAGV timed out\n");
> +	}
> +
> +out:
> +	mutex_unlock(&dev_priv->rps.hw_lock);
> +	return ret;
> +}
> +
> +static void
>  skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
>  				   const struct intel_crtc_state *cstate,
>  				   struct skl_ddb_entry *alloc, /* out */
> @@ -4228,6 +4331,8 @@ void skl_wm_get_hw_state(struct drm_device *dev)
>  		/* Easy/common case; just sanitize DDB now if everything off */
>  		memset(ddb, 0, sizeof(*ddb));
>  	}
> +
> +	skl_sagv_get_hw_state(dev_priv);
>  }
>
>  static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
>