[Intel-gfx] [PATCH 04/10] drm/i915: CHV DDR DVFS support and another watermark rewrite

Clint Taylor clinton.a.taylor at intel.com
Fri Jun 26 10:56:33 PDT 2015


On 06/24/2015 12:00 PM, ville.syrjala at linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>
> Turns out the VLV/CHV system agent doesn't understand memory
> latencies, so trying to rely on the PND deadline mechanism is not
> going to fly especially when DDR DVFS is enabled. Currently we try to
> avoid the problems by lying to the system agent about the deadlines
> and setting the FIFO watermarks to 8 cachelines. This however leads to
> bad memory self refresh residency.
>
> So in order to satosfy everyone we'll just give up on the deadline
> scheme and program the watermarks old school based on the worst case
> memory latency.
>
> I've modelled this a bit on the ILK+ approach where we compute multiple
> sets of watermarks for each pipe (PM2,PM5,DDR DVFS) and when merge thet
> appropriate one later with the watermarks from other pipes. There isn't
> too much to merge actually since each pipe has a totally independent
> FIFO (well apart from the mess with the partially shared DSPARB
> registers), but still decopuling the pipes from each other seems like a
> good idea.
>
> Eventually we'll want to perform the watermark update in two phases
> around the plane update to avoid underruns due to the single buffered
> watermark registers. But that's still in limbo for ILK+ too, so I've not
> gone that far yet for VLV/CHV either.
>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h      |  28 +--
>   drivers/gpu/drm/i915/intel_display.c |   6 +-
>   drivers/gpu/drm/i915/intel_drv.h     |  11 ++
>   drivers/gpu/drm/i915/intel_pm.c      | 318 ++++++++++++++++++++++++++++++++++-
>   4 files changed, 345 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 514adcf..37cc653 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -276,6 +276,12 @@ struct i915_hotplug {
>   			    &dev->mode_config.plane_list,	\
>   			    base.head)
>
> +#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
> +	list_for_each_entry(intel_plane,				\
> +			    &(dev)->mode_config.plane_list,		\
> +			    base.head)					\
> +		if ((intel_plane)->pipe == (intel_crtc)->pipe)
> +
>   #define for_each_intel_crtc(dev, intel_crtc) \
>   	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
>
> @@ -1498,18 +1504,20 @@ struct ilk_wm_values {
>   	enum intel_ddb_partitioning partitioning;
>   };
>
> -struct vlv_wm_values {
> -	struct {
> -		uint16_t primary;
> -		uint16_t sprite[2];
> -		uint8_t cursor;
> -	} pipe[3];
> +struct vlv_pipe_wm {
> +	uint16_t primary;
> +	uint16_t sprite[2];
> +	uint8_t cursor;
> +};
>
> -	struct {
> -		uint16_t plane;
> -		uint8_t cursor;
> -	} sr;
> +struct vlv_sr_wm {
> +	uint16_t plane;
> +	uint8_t cursor;
> +};
>
> +struct vlv_wm_values {
> +	struct vlv_pipe_wm pipe[3];
> +	struct vlv_sr_wm sr;
>   	struct {
>   		uint8_t cursor;
>   		uint8_t sprite[2];
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index b15d57f..1424320 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -4690,8 +4690,11 @@ intel_pre_disable_primary(struct drm_crtc *crtc)
>   	 * event which is after the vblank start event, so we need to have a
>   	 * wait-for-vblank between disabling the plane and the pipe.
>   	 */
> -	if (HAS_GMCH_DISPLAY(dev))
> +	if (HAS_GMCH_DISPLAY(dev)) {
>   		intel_set_memory_cxsr(dev_priv, false);
> +		dev_priv->wm.vlv.cxsr = false;
> +		intel_wait_for_vblank(dev, pipe);
> +	}
>
>   	/*
>   	 * FIXME IPS should be fine as long as one plane is
> @@ -6005,7 +6008,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
>
>   	intel_crtc_load_lut(crtc);
>
> -	intel_update_watermarks(crtc);
>   	intel_enable_pipe(intel_crtc);
>
>   	assert_vblank_disabled(crtc);
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 3673a71..f26a680 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -462,6 +462,15 @@ struct intel_crtc_state {
>   	enum pipe hsw_workaround_pipe;
>   };
>
> +struct vlv_wm_state {
> +	struct vlv_pipe_wm wm[3];
> +	struct vlv_sr_wm sr[3];
> +	uint8_t num_active_planes;
> +	uint8_t num_levels;
> +	uint8_t level;
> +	bool cxsr;
> +};
> +
>   struct intel_pipe_wm {
>   	struct intel_wm_level wm[5];
>   	uint32_t linetime;
> @@ -564,6 +573,8 @@ struct intel_crtc {
>
>   	/* scalers available on this crtc */
>   	int num_scalers;
> +
> +	struct vlv_wm_state wm_state;
>   };
>
>   struct intel_plane_wm_parameters {
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e67548d..d046e5f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -335,8 +335,6 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
>   	if (IS_VALLEYVIEW(dev)) {
>   		I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
>   		POSTING_READ(FW_BLC_SELF_VLV);
> -		if (IS_CHERRYVIEW(dev))
> -			chv_set_memory_pm5(dev_priv, enable);
>   	} else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
>   		I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
>   		POSTING_READ(FW_BLC_SELF);
> @@ -929,8 +927,6 @@ static void vlv_write_wm_values(struct intel_crtc *crtc,
>   	}
>
>   	POSTING_READ(DSPFW1);
> -
> -	dev_priv->wm.vlv = *wm;
>   }
>
>   #undef FW_WM_VLV
> @@ -1014,6 +1010,72 @@ enum vlv_wm_level {
>   	VLV_WM_NUM_LEVELS = 1,
>   };
>
> +/* latency must be in 0.1us units. */
> +static unsigned int vlv_wm_method2(unsigned int pixel_rate,
> +				   unsigned int pipe_htotal,
> +				   unsigned int horiz_pixels,
> +				   unsigned int bytes_per_pixel,
> +				   unsigned int latency)
> +{
> +	unsigned int ret;
> +
> +	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
> +	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
> +	ret = DIV_ROUND_UP(ret, 64);
> +
> +	return ret;
> +}
> +
> +static void vlv_setup_wm_latency(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	/* all latencies in usec */
> +	dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
> +
> +	if (IS_CHERRYVIEW(dev_priv)) {
> +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
> +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;

nit #defines for these magic values please

> +	}
> +}
> +
> +static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
> +				     struct intel_crtc *crtc,
> +				     const struct intel_plane_state *state,
> +				     int level)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
> +	int clock, htotal, pixel_size, width, wm;
> +
> +	if (dev_priv->wm.pri_latency[level] == 0)
> +		return USHRT_MAX;
> +
> +	if (!state->visible)
> +		return 0;
> +
> +	pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
> +	clock = crtc->config->base.adjusted_mode.crtc_clock;
> +	htotal = crtc->config->base.adjusted_mode.crtc_htotal;
> +	width = crtc->config->pipe_src_w;
> +	if (WARN_ON(htotal == 0))
> +		htotal = 1;
> +
> +	if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
> +		/*
> +		 * FIXME the formula gives values that are
> +		 * too big for the cursor FIFO, and hence we
> +		 * would never be able to use cursors. For
> +		 * now just hardcode the watermark.
> +		 */
> +		wm = 63;

Hard coding to maximum value of 63. Should probably be programmed to 
worst case instead of maximum.

> +	} else {
> +		wm = vlv_wm_method2(clock, htotal, width, pixel_size,
> +				    dev_priv->wm.pri_latency[level] * 10);
> +	}
> +
> +	return min_t(int, wm, USHRT_MAX);
> +}
> +
>   static bool vlv_compute_sr_wm(struct drm_device *dev,
>   			      struct vlv_wm_values *wm)
>   {
> @@ -1105,6 +1167,249 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
>
>   	if (cxsr_enabled)
>   		intel_set_memory_cxsr(dev_priv, true);
> +
> +	dev_priv->wm.vlv = wm;
> +}
> +
> +static void vlv_invert_wms(struct intel_crtc *crtc)
> +{
> +	struct vlv_wm_state *wm_state = &crtc->wm_state;
> +	int level;
> +
> +	for (level = 0; level < wm_state->num_levels; level++) {
> +		struct drm_device *dev = crtc->base.dev;
> +		const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> +		struct intel_plane *plane;
> +
> +		wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
> +		wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
> +
> +		for_each_intel_plane_on_crtc(dev, crtc, plane) {
> +			switch (plane->base.type) {
> +				int sprite;
> +			case DRM_PLANE_TYPE_CURSOR:
> +				wm_state->wm[level].cursor = plane->wm.fifo_size -
> +					wm_state->wm[level].cursor;
> +				break;
> +			case DRM_PLANE_TYPE_PRIMARY:
> +				wm_state->wm[level].primary = plane->wm.fifo_size -
> +					wm_state->wm[level].primary;
> +				break;
> +			case DRM_PLANE_TYPE_OVERLAY:
> +				sprite = plane->plane;
> +				wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
> +					wm_state->wm[level].sprite[sprite];
> +				break;
> +			}
> +		}
> +	}
> +}
> +
> +static void _vlv_compute_wm(struct intel_crtc *crtc)
> +{
> +	struct drm_device *dev = crtc->base.dev;
> +	struct vlv_wm_state *wm_state = &crtc->wm_state;
> +	struct intel_plane *plane;
> +	int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> +	int level;
> +
> +	memset(wm_state, 0, sizeof(*wm_state));
> +
> +	wm_state->cxsr = crtc->pipe != PIPE_C;
> +	if (IS_CHERRYVIEW(dev))
> +		wm_state->num_levels = CHV_WM_NUM_LEVELS;
> +	else
> +		wm_state->num_levels = VLV_WM_NUM_LEVELS;
> +
> +	wm_state->num_active_planes = 0;
> +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
> +		struct intel_plane_state *state =
> +			to_intel_plane_state(plane->base.state);
> +
> +		if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
> +			continue;
> +
> +		if (state->visible)
> +			wm_state->num_active_planes++;
> +	}
> +
> +	if (wm_state->num_active_planes != 1)
> +		wm_state->cxsr = false;
> +
> +	if (wm_state->cxsr) {
> +		for (level = 0; level < wm_state->num_levels; level++) {
> +			wm_state->sr[level].plane = sr_fifo_size;
> +			wm_state->sr[level].cursor = 63;
> +		}
> +	}
> +
> +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
> +		struct intel_plane_state *state =
> +			to_intel_plane_state(plane->base.state);
> +
> +		if (!state->visible)
> +			continue;
> +
> +		/* normal watermarks */
> +		for (level = 0; level < wm_state->num_levels; level++) {
> +			int wm = vlv_compute_wm_level(plane, crtc, state, level);
> +			int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
> +
> +			/* hack */
> +			if (WARN_ON(level == 0 && wm > max_wm))
> +				wm = max_wm;
> +
> +			if (wm > plane->wm.fifo_size)
> +				break;
> +
> +			switch (plane->base.type) {
> +				int sprite;
> +			case DRM_PLANE_TYPE_CURSOR:
> +				wm_state->wm[level].cursor = wm;
> +				break;
> +			case DRM_PLANE_TYPE_PRIMARY:
> +				wm_state->wm[level].primary = wm;
> +				break;
> +			case DRM_PLANE_TYPE_OVERLAY:
> +				sprite = plane->plane;
> +				wm_state->wm[level].sprite[sprite] = wm;
> +				break;
> +			}
> +		}
> +
> +		wm_state->num_levels = level;
> +
> +		if (!wm_state->cxsr)
> +			continue;
> +
> +		/* maxfifo watermarks */
> +		switch (plane->base.type) {
> +			int sprite, level;
> +		case DRM_PLANE_TYPE_CURSOR:
> +			for (level = 0; level < wm_state->num_levels; level++)
> +				wm_state->sr[level].cursor =
> +					wm_state->sr[level].cursor;
> +			break;
> +		case DRM_PLANE_TYPE_PRIMARY:
> +			for (level = 0; level < wm_state->num_levels; level++)
> +				wm_state->sr[level].plane =
> +					min(wm_state->sr[level].plane,
> +					    wm_state->wm[level].primary);
> +			break;
> +		case DRM_PLANE_TYPE_OVERLAY:
> +			sprite = plane->plane;
> +			for (level = 0; level < wm_state->num_levels; level++)
> +				wm_state->sr[level].plane =
> +					min(wm_state->sr[level].plane,
> +					    wm_state->wm[level].sprite[sprite]);
> +			break;
> +		}
> +	}
> +
> +	/* clear any (partially) filled invalid levels */
> +	for (level = wm_state->num_levels; level < CHV_WM_NUM_LEVELS; level++) {
> +		memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
> +		memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
> +	}
> +
> +	vlv_invert_wms(crtc);
> +}
> +
> +static void vlv_merge_wm(struct drm_device *dev,
> +			 struct vlv_wm_values *wm)
> +{
> +	struct intel_crtc *crtc;
> +	int num_active_crtcs = 0;
> +
> +	if (IS_CHERRYVIEW(dev))
> +		wm->level = VLV_WM_LEVEL_DDR_DVFS;
> +	else
> +		wm->level = VLV_WM_LEVEL_PM2;
> +	wm->cxsr = true;
> +
> +	for_each_intel_crtc(dev, crtc) {
> +		const struct vlv_wm_state *wm_state = &crtc->wm_state;
> +
> +		if (!crtc->active)
> +			continue;
> +
> +		if (!wm_state->cxsr)
> +			wm->cxsr = false;
> +
> +		num_active_crtcs++;
> +		wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
> +	}
> +
> +	if (num_active_crtcs != 1)
> +		wm->cxsr = false;
> +
> +	for_each_intel_crtc(dev, crtc) {
> +		struct vlv_wm_state *wm_state = &crtc->wm_state;
> +		enum pipe pipe = crtc->pipe;
> +
> +		if (!crtc->active)
> +			continue;
> +
> +		wm->pipe[pipe] = wm_state->wm[wm->level];
> +		if (wm->cxsr)
> +			wm->sr = wm_state->sr[wm->level];
> +
> +		wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
> +		wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
> +		wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
> +		wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;

Did we really decide that 0x2 was the final correct value for the DL 
registers? I figured we would be running 0x7 for CHV.

> +	}
> +}
> +
> +static void vlv_update_wm(struct drm_crtc *crtc)
> +{
> +	struct drm_device *dev = crtc->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	enum pipe pipe = intel_crtc->pipe;
> +	struct vlv_wm_values wm = {};
> +
> +	_vlv_compute_wm(intel_crtc);
> +	vlv_merge_wm(dev, &wm);
> +
> +	if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0)
> +		return;
> +
> +	if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
> +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
> +		chv_set_memory_dvfs(dev_priv, false);
> +
> +	if (wm.level < VLV_WM_LEVEL_PM5 &&
> +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
> +		chv_set_memory_pm5(dev_priv, false);
> +
> +	if (!wm.cxsr && dev_priv->wm.vlv.cxsr) {
> +		intel_set_memory_cxsr(dev_priv, false);
> +		intel_wait_for_vblank(dev, pipe);
> +	}
> +
> +	vlv_write_wm_values(intel_crtc, &wm);
> +
> +	DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
> +		      "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
> +		      pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
> +		      wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
> +		      wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
> +

Love the detailed DRM_DEBUG_KMS - of course its showing the only real 
issue I have found in the series that the cursor watermark is being 
programmed inverted. 63 when the cursor is disabled and 0 when its 
enabled. This leads to an SRR% increase of ~10% when the cursor is 
enabled.

// Cursor disabled
[ 2662.355218] [drm:vlv_update_wm] Setting FIFO watermarks - A: 
plane=340, cursor=63, sprite0=0, sprite1=0, SR: plane=1364, cursor=0 
level=2 cxsr=1

// Cursor Enabled
[ 2667.531049] [drm:vlv_update_wm] Setting FIFO watermarks - A: 
plane=340, cursor=0, sprite0=0, sprite1=0, SR: plane=1364, cursor=0 
level=2 cxsr=1

I haven't found the line of code that actually inverts this programming. 
I will continue to investigate during testing of this series.


> +	if (wm.cxsr && !dev_priv->wm.vlv.cxsr) {
> +		intel_wait_for_vblank(dev, pipe);
> +		intel_set_memory_cxsr(dev_priv, true);
> +	}
> +
> +	if (wm.level >= VLV_WM_LEVEL_PM5 &&
> +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
> +		chv_set_memory_pm5(dev_priv, true);
> +
> +	if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
> +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
> +		chv_set_memory_dvfs(dev_priv, true);
> +
> +	dev_priv->wm.vlv = wm;
>   }
>
>   static void valleyview_update_sprite_wm(struct drm_plane *plane,
> @@ -6823,8 +7128,9 @@ void intel_init_pm(struct drm_device *dev)
>   		else if (INTEL_INFO(dev)->gen == 8)
>   			dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
>   	} else if (IS_CHERRYVIEW(dev)) {
> -		dev_priv->display.update_wm = valleyview_update_wm;
> -		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
> +		vlv_setup_wm_latency(dev);
> +
> +		dev_priv->display.update_wm = vlv_update_wm;
>   		dev_priv->display.init_clock_gating =
>   			cherryview_init_clock_gating;
>   	} else if (IS_VALLEYVIEW(dev)) {
>



More information about the Intel-gfx mailing list