[Intel-gfx] [PATCH 3/3] drm/i915/gen9: WM memory bandwidth related workaround

Wed Feb 1 09:55:20 UTC 2017

Hey,

Op 31-01-17 om 15:57 schreef Mahesh Kumar:
> This patch enables workarounds related to display arbitrated memory
> bandwidth only if it's necessary. WA's are applicable for all GEN9
> based platforms.
>
> Changes since v1:
>  - Rebase on top of Paulo's patch series
> Changes since v2:
>  - Address review comments
>  - Rebase/rework as per other patch changes in series
> Changes since v3:
>  - Rebase the patch
>  - introduce ww_mutex to protect WM operations
>  - Protect system memory bandwidth calculation with ww_mutex
>
> Signed-off-by: Mahesh Kumar <mahesh1.kumar at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |  15 ++++
>  drivers/gpu/drm/i915/intel_display.c |  34 ++++++++
>  drivers/gpu/drm/i915/intel_pm.c      | 163 +++++++++++++++++++++++++++++++----
>  4 files changed, 194 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index b8d0dd2811a9..a14b2a9d2e6a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -814,6 +814,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
>  	mutex_init(&dev_priv->modeset_restore_lock);
>  	mutex_init(&dev_priv->av_mutex);
>  	mutex_init(&dev_priv->wm.wm_mutex);
> +	drm_modeset_lock_init(&dev_priv->wm.wm_ww_mutex);
>  	mutex_init(&dev_priv->pps_mutex);
This is not the right approach wrt serialization. In case of a nonblocking update you need to grab the other crtc states so the update is serialized correctly.

Else you will get races with nonblocking modesets.
Please just grab all other crtc mutexes to update. In fact when changing the workaround you probably need to grab the crtc state as well to make sure all previous updates complete. I think in the last iteration I told you what worked, can't remember for sure.

Lastly this won't work as intended, we need to clean up all callers of lock_all_ctx first.
>  	intel_uc_init_early(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index df24de2b65f2..6e5cdd6c9dfd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1174,6 +1174,13 @@ enum intel_sbi_destination {
>  	SBI_MPHY,
>  };
>  
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> +	WATERMARK_WA_NONE,
> +	WATERMARK_WA_X_TILED,
> +	WATERMARK_WA_Y_TILED,
> +};
> +
>  #define QUIRK_PIPEA_FORCE (1<<0)
>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1744,8 +1751,15 @@ struct skl_ddb_allocation {
>  	struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES];
>  };
>  
> +struct skl_mem_bw_attr {
> +	enum watermark_memory_wa mem_wa;
> +	uint32_t pipe_bw_kbps[I915_MAX_PIPES];
> +	bool pipe_y_tiled[I915_MAX_PIPES];
> +};
> +
>  struct skl_wm_values {
>  	unsigned dirty_pipes;
> +	struct skl_mem_bw_attr mem_attr;
>  	struct skl_ddb_allocation ddb;
>  };
>  
> @@ -2348,6 +2362,7 @@ struct drm_i915_private {
>  		 * cstate->wm.need_postvbl_update.
>  		 */
>  		struct mutex wm_mutex;
> +		struct drm_modeset_lock wm_ww_mutex; /* protect DDB/WM redistribution among pipes */
>  
>  		/*
>  		 * Set during HW readout of watermarks/DDB.  Some platforms
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 1aa708b6f55e..de512bd8136b 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -14589,6 +14589,38 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state)
>  				  to_intel_plane(plane)->frontbuffer_bit);
>  }
>  
> +static void
> +intel_update_wm_bw_wa(struct drm_atomic_state *state)
> +{
> +	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
> +	struct drm_i915_private *dev_priv = to_i915(state->dev);
> +	const struct drm_crtc *crtc;
> +	const struct drm_crtc_state *cstate;
> +	const struct memdev_info *memdev_info = &dev_priv->memdev_info;
> +	struct skl_mem_bw_attr *results = &intel_state->wm_results.mem_attr;
> +	struct skl_mem_bw_attr *hw_vals = &dev_priv->wm.skl_hw.mem_attr;
> +	int i;
> +
> +	if (!IS_GEN9(dev_priv))
> +		return;
> +
> +	if (!memdev_info->valid)
> +		return;
> +
> +	for_each_crtc_in_state(state, crtc, cstate, i) {
> +		hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
> +		hw_vals->pipe_y_tiled[i] = results->pipe_y_tiled[i];
> +	}
> +
> +	hw_vals->mem_wa = results->mem_wa;
> +
> +	/*
> +	 * As we already updated state variables no need to hold the lock,
> +	 * other commits can proceed now with their calculation
> +	 */
> +	drm_modeset_unlock(&dev_priv->wm.wm_ww_mutex);
> +}
Ugh, please don't do this. Ever. Locking is the caller's responsibility
>  /**
>   * intel_atomic_commit - commit validated state object
>   * @dev: DRM device
> @@ -14629,6 +14661,8 @@ static int intel_atomic_commit(struct drm_device *dev,
>  	intel_shared_dpll_swap_state(state);
>  	intel_atomic_track_fbs(state);
>  
> +	intel_update_wm_bw_wa(state);
> +
>  	if (intel_state->modeset) {
>  		memcpy(dev_priv->min_pixclk, intel_state->min_pixclk,
>  		       sizeof(intel_state->min_pixclk));
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 16e83efa1118..ae43df5cdfd8 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2887,21 +2887,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>  
>  #define SKL_SAGV_BLOCK_TIME	30 /* µs */
>  
> -/*
> - * FIXME: We still don't have the proper code detect if we need to apply the WA,
> - * so assume we'll always need it in order to avoid underruns.
> - */
> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
> -{
> -	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> -
> -	if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
> -	    IS_KABYLAKE(dev_priv))
> -		return true;
> -
> -	return false;
> -}
> -
>  static bool
>  intel_has_sagv(struct drm_i915_private *dev_priv)
>  {
> @@ -3049,7 +3034,8 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
>  
>  		latency = dev_priv->wm.skl_latency[level];
>  
> -		if (skl_needs_memory_bw_wa(intel_state) &&
> +		if (intel_state->wm_results.mem_attr.mem_wa !=
> +		    WATERMARK_WA_NONE &&
>  		    plane->base.state->fb->modifier ==
>  		    I915_FORMAT_MOD_X_TILED)
>  			latency += 15;
> @@ -3581,7 +3567,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  	uint32_t y_min_scanlines;
>  	struct intel_atomic_state *state =
>  		to_intel_atomic_state(cstate->base.state);
> -	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
> +	enum watermark_memory_wa mem_wa;
>  	bool y_tiled, x_tiled;
>  
>  	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
> @@ -3597,7 +3583,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  	if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>  		latency += 4;
>  
> -	if (apply_memory_bw_wa && x_tiled)
> +	mem_wa = state->wm_results.mem_attr.mem_wa;
> +	if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>  		latency += 15;
>  
>  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
> @@ -3632,7 +3619,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  		y_min_scanlines = 4;
>  	}
>  
> -	if (apply_memory_bw_wa)
> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>  		y_min_scanlines *= 2;
>  
>  	plane_bytes_per_line = width * cpp;
> @@ -4061,6 +4048,16 @@ skl_compute_ddb(struct drm_atomic_state *state)
>  	}
>  
>  	/*
> +	 * If Watermark workaround is changed we need to recalculate
> +	 * watermark values for all active pipes
> +	 */
> +	if (intel_state->wm_results.mem_attr.mem_wa !=
> +				dev_priv->wm.skl_hw.mem_attr.mem_wa) {
> +		realloc_pipes = ~0;
> +		intel_state->wm_results.dirty_pipes = ~0;
> +	}
> +
> +	/*
>  	 * We're not recomputing for the pipes not included in the commit, so
>  	 * make sure we start with the current state.
>  	 */
> @@ -4085,6 +4082,130 @@ skl_compute_ddb(struct drm_atomic_state *state)
>  	return 0;
>  }
>  
> +static int
> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
> +{
> +	struct drm_device *dev = state->dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
> +	struct memdev_info *memdev_info = &dev_priv->memdev_info;
> +	struct skl_mem_bw_attr *mem_attr = &intel_state->wm_results.mem_attr;
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *cstate;
> +	int active_pipes = 0;
> +	uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
> +	int display_bw_percentage;
> +	bool y_tile_enabled = false;
> +	int ret, i;
> +
> +	if (!IS_GEN9(dev_priv)) {
> +		mem_attr->mem_wa = WATERMARK_WA_NONE;
> +		return 0;
> +	}
> +
> +	if (!memdev_info->valid)
> +		goto exit;
> +
> +	/*
> +	 * We hold wm_mutex lock, so any other flip can't proceed beyond WM
> +	 * calculation step until this flip writes modified WM values.
> +	 * So it's safe to read plane_state of other pipes without taking CRTC lock
> +	 */
> +	ret = drm_modeset_lock(&dev_priv->wm.wm_ww_mutex, state->acquire_ctx);
> +	if (ret)
> +		return ret;
> +
> +	memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(mem_attr));
> +
> +	for_each_crtc_in_state(state, crtc, cstate, i) {
> +		struct drm_plane *plane;
> +		const struct drm_plane_state *pstate;
> +		int active_planes = 0;
> +		uint32_t max_plane_bw_kbps = 0;
> +
> +		mem_attr->pipe_y_tiled[i] = false;
> +
> +		if (!cstate->active) {
> +			mem_attr->pipe_bw_kbps[i] = 0;
> +			continue;
> +		}
> +
> +		drm_atomic_crtc_state_for_each_plane_state(plane, pstate,
> +								cstate) {
> +			struct drm_framebuffer *fb;
> +			uint32_t plane_bw_kbps;
> +			enum plane_id id = to_intel_plane(plane)->id;
> +
> +			if (!pstate->visible)
> +				continue;
> +
> +			if (WARN_ON(!pstate->fb))
> +				continue;
> +
> +			if (id == PLANE_CURSOR)
> +				continue;
> +
> +			fb = pstate->fb;
> +			if ((fb->modifier == I915_FORMAT_MOD_Y_TILED ||
> +				fb->modifier == I915_FORMAT_MOD_Yf_TILED))
> +				mem_attr->pipe_y_tiled[i] = true;
> +
> +			plane_bw_kbps = skl_adjusted_plane_pixel_rate(
> +						to_intel_crtc_state(cstate),
> +						to_intel_plane_state(pstate));
> +			max_plane_bw_kbps = max(plane_bw_kbps,
> +							max_plane_bw_kbps);
> +			active_planes++;
> +		}
> +		mem_attr->pipe_bw_kbps[i] = max_plane_bw_kbps * active_planes;
> +	}
> +
> +	for_each_pipe(dev_priv, i) {
> +		if (mem_attr->pipe_bw_kbps[i]) {
> +			max_pipe_bw_kbps = max(max_pipe_bw_kbps,
> +					mem_attr->pipe_bw_kbps[i]);
> +			active_pipes++;
> +		}
> +		if (mem_attr->pipe_y_tiled[i])
> +			y_tile_enabled = true;
> +	}
> +
> +	total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
> +	display_bw_percentage = DIV_ROUND_UP_ULL((uint64_t)total_pipe_bw_kbps *
> +					100, memdev_info->bandwidth_kbps);
> +
> +	/*
> +	 * If there is any Ytile plane enabled and arbitrated display
> +	 * bandwidth > 20% of raw system memory bandwidth
> +	 * Enale Y-tile related WA
> +	 *
> +	 * If memory is dual channel single rank, Xtile limit = 35%, else Xtile
> +	 * limit = 60%
> +	 * If there is no Ytile plane enabled and
> +	 * arbitrated display bandwidth > Xtile limit
> +	 * Enable X-tile realated WA
> +	 */
> +	if (y_tile_enabled && (display_bw_percentage > 20))
> +		mem_attr->mem_wa = WATERMARK_WA_Y_TILED;
> +	else {
> +		int x_tile_percentage = 60;
> +		enum rank rank = memdev_info->rank;
> +
> +		if ((rank == I915_DRAM_RANK_SINGLE) &&
> +					(memdev_info->num_channels == 2))
> +			x_tile_percentage = 35;
> +
> +		if (display_bw_percentage > x_tile_percentage)
> +			mem_attr->mem_wa = WATERMARK_WA_X_TILED;
> +	}
> +
> +	return 0;
> +
> +exit:
> +	mem_attr->mem_wa = WATERMARK_WA_Y_TILED;
> +	return 0;
> +}
> +
>  static void
>  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>  		     struct skl_wm_values *src,
> @@ -4160,6 +4281,10 @@ skl_compute_wm(struct drm_atomic_state *state)
>  	/* Clear all dirty flags */
>  	results->dirty_pipes = 0;
>  
> +	ret = skl_compute_memory_bandwidth_wm_wa(state);
> +	if (ret)
> +		return ret;
> +
>  	ret = skl_compute_ddb(state);
>  	if (ret)
>  		return ret;