[Intel-gfx] [PATCH v3] drm/i915: Restrict qgv points which don't have enough bandwidth.

Fri Oct 11 23:49:54 UTC 2019

On Wed, Sep 25, 2019 at 03:17:37PM +0300, Stanislav Lisovskiy wrote:
> According to BSpec 53998, we should try to
> restrict qgv points, which can't provide
> enough bandwidth for desired display configuration.
> 
> Currently we are just comparing against all of
> those and take minimum(worst case).
> 
> v2: Fixed wrong PCode reply mask, removed hardcoded
>     values.
> 
> v3: Forbid simultaneous legacy SAGV PCode requests and
>     restricting qgv points. Put the actual restriction
>     to commit function, added serialization(thanks to Ville)
>     to prevent commit being applied out of order in case of
>     nonblocking and/or nomodeset commits.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy at intel.com>
> Cc: Ville Syrjälä <ville.syrjala at intel.com>
> Cc: James Ausmus <james.ausmus at intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_atomic.c   | 16 ++++
>  drivers/gpu/drm/i915/display/intel_atomic.h   |  3 +
>  drivers/gpu/drm/i915/display/intel_bw.c       | 79 +++++++++++++------
>  drivers/gpu/drm/i915/display/intel_bw.h       |  2 +
>  drivers/gpu/drm/i915/display/intel_display.c  | 78 +++++++++++++++++-
>  .../drm/i915/display/intel_display_types.h    |  3 +
>  drivers/gpu/drm/i915/i915_drv.h               |  2 +
>  drivers/gpu/drm/i915/i915_reg.h               |  3 +
>  8 files changed, 160 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
> index 698802da07b7..903537c4fb0f 100644
> --- a/drivers/gpu/drm/i915/display/intel_atomic.c
> +++ b/drivers/gpu/drm/i915/display/intel_atomic.c
> @@ -208,6 +208,22 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc)
>  	return &crtc_state->base;
>  }
>  
> +int intel_atomic_serialize_global_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc *crtc;
> +
> +	for_each_intel_crtc(&dev_priv->drm, crtc) {
> +		struct intel_crtc_state *crtc_state;
> +
> +		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
> +		if (IS_ERR(crtc_state))
> +			return PTR_ERR(crtc_state);
> +	}
> +
> +	return 0;
> +}
> +

I'll leave reviewing the atomic serialization bits to Ville and I'll
just review the rest, as I don't have a good head for atomic at this
point...

>  /**
>   * intel_crtc_destroy_state - destroy crtc state
>   * @crtc: drm crtc
> diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h
> index 58065d3161a3..fd17b3ca257f 100644
> --- a/drivers/gpu/drm/i915/display/intel_atomic.h
> +++ b/drivers/gpu/drm/i915/display/intel_atomic.h
> @@ -7,6 +7,7 @@
>  #define __INTEL_ATOMIC_H__
>  
>  #include <linux/types.h>
> +#include "intel_display_types.h"
>  
>  struct drm_atomic_state;
>  struct drm_connector;
> @@ -38,6 +39,8 @@ void intel_crtc_destroy_state(struct drm_crtc *crtc,
>  struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev);
>  void intel_atomic_state_clear(struct drm_atomic_state *state);
>  
> +int intel_atomic_serialize_global_state(struct intel_atomic_state *state);
> +
>  struct intel_crtc_state *
>  intel_atomic_get_crtc_state(struct drm_atomic_state *state,
>  			    struct intel_crtc *crtc);
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> index cd58e47ab7b2..81e3468306fa 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -8,6 +8,7 @@
>  #include "intel_bw.h"
>  #include "intel_display_types.h"
>  #include "intel_sideband.h"
> +#include "intel_atomic.h"
>  
>  /* Parameters for Qclk Geyserville (QGV) */
>  struct intel_qgv_point {
> @@ -90,6 +91,27 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
>  	return 0;
>  }
>  
> +int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
> +				  u32 points_mask)
> +{
> +	int ret;
> +
> +	/* bspec says to keep retrying for at least 1 ms */
> +	ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG,
> +				points_mask,
> +				GEN11_PCODE_POINTS_RESTRICTED_MASK,
> +				GEN11_PCODE_POINTS_RESTRICTED,
> +				1);
> +
> +	if (ret < 0) {
> +		DRM_ERROR("Failed to disable qgv points (%d)\n", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +
>  static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
>  			      struct intel_qgv_info *qi)
>  {
> @@ -247,22 +269,6 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv)
>  		icl_get_bw_info(dev_priv, &icl_sa_info);
>  }
>  
> -static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> -					int num_planes)
> -{
> -	if (INTEL_GEN(dev_priv) >= 11)
> -		/*
> -		 * FIXME with SAGV disabled maybe we can assume
> -		 * point 1 will always be used? Seems to match
> -		 * the behaviour observed in the wild.
> -		 */
> -		return min3(icl_max_bw(dev_priv, num_planes, 0),
> -			    icl_max_bw(dev_priv, num_planes, 1),
> -			    icl_max_bw(dev_priv, num_planes, 2));
> -	else
> -		return UINT_MAX;
> -}
> -
>  static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
>  {
>  	/*
> @@ -354,7 +360,9 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
>  	unsigned int data_rate, max_data_rate;
>  	unsigned int num_active_planes;
>  	struct intel_crtc *crtc;
> -	int i;
> +	int i, ret;
> +	struct intel_qgv_info qi = {};
> +	u32 allowed_points = 0;
>  
>  	/* FIXME earlier gens need some checks too */
>  	if (INTEL_GEN(dev_priv) < 11)
> @@ -398,16 +406,43 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
>  	data_rate = intel_bw_data_rate(dev_priv, bw_state);
>  	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
>  
> -	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> -
>  	data_rate = DIV_ROUND_UP(data_rate, 1000);
>  
> -	if (data_rate > max_data_rate) {
> -		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> -			      data_rate, max_data_rate, num_active_planes);
> +	ret = icl_get_qgv_points(dev_priv, &qi);
> +	if (ret < 0)
> +		return 0;
> +
> +	for (i = 0; i < qi.num_points; i++) {
> +		max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
> +		if (max_data_rate < data_rate)
> +			DRM_DEBUG_KMS("QGV point %d: max bw %d required %d\n",
> +				      i, max_data_rate, data_rate);
> +		else {
> +			DRM_DEBUG_KMS("QGV point %d: max bw %d required %d\n",
> +				      i, max_data_rate, data_rate);

You're doing the same debug print either way - move it out of the if, and then
that simplifies your blocks:

if (max_data_rate >= data_rate)
	allowed_points |= 1 << i;
DRM_DEBUG_KMS...

> +			allowed_points |= 1 << i;
> +		}

According to the BSpec page, we also need to save off the QGV point that has
the most available bandwidth:

"At least one GV point must always remain unmasked. The point providing the
highest bandwidth for display must always remain unmasked."

We should stash that point separately, and ensure it always remains unmasked.

> +	}
> +
> +	if (allowed_points == 0) {
> +		DRM_DEBUG_KMS("Could not find any suitable QGV points\n");
>  		return -EINVAL;
>  	}
>  
> +	state->qgv_points_mask = (~allowed_points) & ((1 << qi.num_points) - 1);
> +
> +	/*
> +	 * If the actual mask had changed we need to make sure that
> +	 * the commits are serialized(in case this is a nomodeset, nonblocking)
> +	 */
> +	if (state->qgv_points_mask != dev_priv->qgv_points_mask) {
> +		ret = intel_atomic_serialize_global_state(state);
> +		if (ret) {
> +			DRM_DEBUG_KMS("Could not serialize global state\n");
> +			return ret;
> +		}
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
> index 9db10af012f4..66bf9bc10b73 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.h
> +++ b/drivers/gpu/drm/i915/display/intel_bw.h
> @@ -28,5 +28,7 @@ int intel_bw_init(struct drm_i915_private *dev_priv);
>  int intel_bw_atomic_check(struct intel_atomic_state *state);
>  void intel_bw_crtc_update(struct intel_bw_state *bw_state,
>  			  const struct intel_crtc_state *crtc_state);
> +int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
> +				  u32 points_mask);
>  
>  #endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 5ecf54270181..c3196d0e4be3 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -13960,6 +13960,68 @@ static void intel_atomic_cleanup_work(struct work_struct *work)
>  	intel_atomic_helper_free_state(i915);
>  }
>  
> +static void intel_qgv_point_pre_update(struct intel_atomic_state *state)

It would be nice to name this either "mask" or "unmask", so it's easier
at first glance to see which function is turning on masked bits vs
toggling them off.

> +{
> +	struct drm_device *dev = state->base.dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	int i, ret;
> +
> +	/*
> +	 * Restrict required qgv points before updating the configuration.
> +	 * According to BPsec we can't mask and unmask qgv points at the same

s/BPsec/BSpec/

> +	 * time. Also masking should be done before updating the configuration
> +	 * and unmasking afterwards.
> +	 */
> +	u32 new_qgv_points_mask = dev_priv->qgv_points_mask;
> +	int num_points = dev_priv->max_bw[0].num_qgv_points;
> +
> +	for (i = num_points; i > 0; i--) {
> +		int new_mask_bit = state->qgv_points_mask & (1 << num_points);
> +		int old_mask_bit = new_qgv_points_mask & (1 << num_points);

The naming here is spinning my head a bit, as we're getting the
"old_mask_bit" from the "new_qgv_points_mask".

> +
> +		if (old_mask_bit != new_mask_bit)
> +			if (new_mask_bit != 0)

Can't this just be

if (new_mask_bit)
        new_qgv_points_mask |= new_mask_bit;

?

Since the only way that (old_mask_bit != new_mask_bit) is when we're
going from a 0 to a 1, and it's ok to go from a 1 to a 1, so the only
thing that matters here is new_mask_bit, right? If that's the case,
can't you just drop the old_mask_bit parts entirely? Or am I confusing
myself with the naming? :)

Actually, wouldn't the whole function at that point just be:

ret = icl_pcode_restrict_qgv_points(dev_priv, dev_priv->qgv_points_mask | state->qgv_points_mask);

Since you're just wanting to "turn on" masking points in the _pre, and
leave the "turn off" of masking points to the _post?

> +				new_qgv_points_mask |= new_mask_bit;
> +	}
> +
> +	ret = icl_pcode_restrict_qgv_points(dev_priv, new_qgv_points_mask);
> +	if (ret < 0)
> +		DRM_DEBUG_KMS("Could not restrict required gqv points(%d)\n", ret);

s/gqv/qgv/

Also, if we fail masking off the qgv points that can't support our BW
req, shouldn't we handle that failure somehow - maybe just disable SAGV
entirely?  Better we lose power than have flickering screens...

> +	else
> +		dev_priv->qgv_points_mask = new_qgv_points_mask;
> +}
> +
> +static void intel_qgv_point_post_update(struct intel_atomic_state *state)

Same comment on the naming

> +{
> +	struct drm_device *dev = state->base.dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	int i, ret;
> +
> +	/*
> +	 * Restrict required qgv points before updating the configuration.
> +	 * According to BPsec we can't mask and unmask qgv points at the same

s/BPsec/BSpec/

> +	 * time. Also masking should be done before updating the configuration
> +	 * and unmasking afterwards.
> +	 */
> +	u32 new_qgv_points_mask = dev_priv->qgv_points_mask;
> +	int num_points = dev_priv->max_bw[0].num_qgv_points;
> +
> +	for (i = num_points; i > 0; i--) {
> +		int new_mask_bit = state->qgv_points_mask & (1 << num_points);
> +		int old_mask_bit = new_qgv_points_mask & (1 << num_points);
> +
> +		if (old_mask_bit != new_mask_bit)
> +			if (new_mask_bit == 0)
> +				new_qgv_points_mask &= ~old_mask_bit;
> +	}

Same comment here - can't this really be simplified to:

ret = icl_pcode_restrict_qgv_points(dev_priv, dev_priv->qgv_points_mask & state->qgv_points_mask);

Since here we're just wnating to "turn off" the mask for points that the
new state allows, and we should have already "turned on" all the points
in _pre?

> +
> +	ret = icl_pcode_restrict_qgv_points(dev_priv, new_qgv_points_mask);
> +	if (ret < 0)
> +		DRM_DEBUG_KMS("Could not restrict required gqv points(%d)\n", ret);

Maybe change the error message to something like "Could not enable
required qgv points", so it's more easily differentiated?

Same here about error handling - if we fail to enable qgv points that
may be required, we might just want to entirely disable SAGV, as we
might not have a point that works for our BW reqs, and it's better to
lose power than flicker.

> +	else
> +		dev_priv->qgv_points_mask = new_qgv_points_mask;
> +}
> +
>  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
>  {
>  	struct drm_device *dev = state->base.dev;
> @@ -13987,6 +14049,9 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
>  		}
>  	}
>  
> +	if ((INTEL_GEN(dev_priv) >= 11))
> +		intel_qgv_point_pre_update(state);
> +
>  	intel_commit_modeset_disables(state);
>  
>  	/* FIXME: Eventually get rid of our crtc->config pointer */
> @@ -14005,8 +14070,9 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
>  		 * SKL workaround: bspec recommends we disable the SAGV when we
>  		 * have more then one pipe enabled
>  		 */
> -		if (!intel_can_enable_sagv(state))
> -			intel_disable_sagv(dev_priv);
> +		if (INTEL_GEN(dev_priv) < 11)
> +			if (!intel_can_enable_sagv(state))
> +				intel_disable_sagv(dev_priv);
>  
>  		intel_modeset_verify_disabled(dev_priv, state);
>  	}
> @@ -14084,8 +14150,12 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
>  	if (state->modeset)
>  		intel_verify_planes(state);
>  
> -	if (state->modeset && intel_can_enable_sagv(state))
> -		intel_enable_sagv(dev_priv);
> +	if (INTEL_GEN(dev_priv) < 11)
> +		if (state->modeset && intel_can_enable_sagv(state))
> +			intel_enable_sagv(dev_priv);
> +
> +	if ((INTEL_GEN(dev_priv) >= 11) && intel_can_enable_sagv(state))
> +		intel_qgv_point_post_update(state);

I keep going back and forth in my mind about the above block - what do you
think of doing it this way?

if (intel_can_enable_sagv(state)) {
        if (INTEL_GEN(dev_priv) >= 11)
                intel_qgv_point_post_update(state);
        else if (state->modeset)
                intel_enable_sagv(dev_priv);
}

Feels a little cleaner, I think, and lets us keep our standard New Gen -> Old
Gen if ladder style - but I'm not 100% sold on it myself :)

Thanks!

-James

>  
>  	drm_atomic_helper_commit_hw_done(&state->base);
>  
> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
> index 6b0a646f0170..82f8df65347e 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> @@ -512,6 +512,9 @@ struct intel_atomic_state {
>  	struct i915_sw_fence commit_ready;
>  
>  	struct llist_node freed;
> +
> +	/* Gen11+ only */
> +	u32 qgv_points_mask;
>  };
>  
>  struct intel_plane_state {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fcf7423075ef..383de77a7b73 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1652,6 +1652,8 @@ struct drm_i915_private {
>  		u8 num_planes;
>  	} max_bw[6];
>  
> +	u32 qgv_points_mask;
> +
>  	struct drm_private_obj bw_obj;
>  
>  	struct intel_runtime_pm runtime_pm;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e752de9470bd..c78ee180c1aa 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8854,6 +8854,7 @@ enum {
>  #define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
>  #define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
>  #define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
> +#define   ICL_PCODE_SAGV_DE_MEM_SS_CONFIG	0xe
>  #define   GEN6_PCODE_READ_D_COMP		0x10
>  #define   GEN6_PCODE_WRITE_D_COMP		0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> @@ -8865,6 +8866,8 @@ enum {
>  #define     GEN9_SAGV_DISABLE			0x0
>  #define     GEN9_SAGV_IS_DISABLED		0x1
>  #define     GEN9_SAGV_ENABLE			0x3
> +#define GEN11_PCODE_POINTS_RESTRICTED		0x0
> +#define GEN11_PCODE_POINTS_RESTRICTED_MASK	0x1
>  #define GEN6_PCODE_DATA				_MMIO(0x138128)
>  #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
>  #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT	16
> -- 
> 2.17.1
>