[Intel-gfx] [PATCH 04/10] drm/i915: CHV DDR DVFS support and another watermark rewrite
Clint Taylor
clinton.a.taylor at intel.com
Fri Jun 26 10:56:33 PDT 2015
On 06/24/2015 12:00 PM, ville.syrjala at linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>
> Turns out the VLV/CHV system agent doesn't understand memory
> latencies, so trying to rely on the PND deadline mechanism is not
> going to fly especially when DDR DVFS is enabled. Currently we try to
> avoid the problems by lying to the system agent about the deadlines
> and setting the FIFO watermarks to 8 cachelines. This however leads to
> bad memory self refresh residency.
>
> So in order to satosfy everyone we'll just give up on the deadline
> scheme and program the watermarks old school based on the worst case
> memory latency.
>
> I've modelled this a bit on the ILK+ approach where we compute multiple
> sets of watermarks for each pipe (PM2,PM5,DDR DVFS) and when merge thet
> appropriate one later with the watermarks from other pipes. There isn't
> too much to merge actually since each pipe has a totally independent
> FIFO (well apart from the mess with the partially shared DSPARB
> registers), but still decopuling the pipes from each other seems like a
> good idea.
>
> Eventually we'll want to perform the watermark update in two phases
> around the plane update to avoid underruns due to the single buffered
> watermark registers. But that's still in limbo for ILK+ too, so I've not
> gone that far yet for VLV/CHV either.
>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 28 +--
> drivers/gpu/drm/i915/intel_display.c | 6 +-
> drivers/gpu/drm/i915/intel_drv.h | 11 ++
> drivers/gpu/drm/i915/intel_pm.c | 318 ++++++++++++++++++++++++++++++++++-
> 4 files changed, 345 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 514adcf..37cc653 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -276,6 +276,12 @@ struct i915_hotplug {
> &dev->mode_config.plane_list, \
> base.head)
>
> +#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \
> + list_for_each_entry(intel_plane, \
> + &(dev)->mode_config.plane_list, \
> + base.head) \
> + if ((intel_plane)->pipe == (intel_crtc)->pipe)
> +
> #define for_each_intel_crtc(dev, intel_crtc) \
> list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
>
> @@ -1498,18 +1504,20 @@ struct ilk_wm_values {
> enum intel_ddb_partitioning partitioning;
> };
>
> -struct vlv_wm_values {
> - struct {
> - uint16_t primary;
> - uint16_t sprite[2];
> - uint8_t cursor;
> - } pipe[3];
> +struct vlv_pipe_wm {
> + uint16_t primary;
> + uint16_t sprite[2];
> + uint8_t cursor;
> +};
>
> - struct {
> - uint16_t plane;
> - uint8_t cursor;
> - } sr;
> +struct vlv_sr_wm {
> + uint16_t plane;
> + uint8_t cursor;
> +};
>
> +struct vlv_wm_values {
> + struct vlv_pipe_wm pipe[3];
> + struct vlv_sr_wm sr;
> struct {
> uint8_t cursor;
> uint8_t sprite[2];
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index b15d57f..1424320 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -4690,8 +4690,11 @@ intel_pre_disable_primary(struct drm_crtc *crtc)
> * event which is after the vblank start event, so we need to have a
> * wait-for-vblank between disabling the plane and the pipe.
> */
> - if (HAS_GMCH_DISPLAY(dev))
> + if (HAS_GMCH_DISPLAY(dev)) {
> intel_set_memory_cxsr(dev_priv, false);
> + dev_priv->wm.vlv.cxsr = false;
> + intel_wait_for_vblank(dev, pipe);
> + }
>
> /*
> * FIXME IPS should be fine as long as one plane is
> @@ -6005,7 +6008,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
>
> intel_crtc_load_lut(crtc);
>
> - intel_update_watermarks(crtc);
> intel_enable_pipe(intel_crtc);
>
> assert_vblank_disabled(crtc);
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 3673a71..f26a680 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -462,6 +462,15 @@ struct intel_crtc_state {
> enum pipe hsw_workaround_pipe;
> };
>
> +struct vlv_wm_state {
> + struct vlv_pipe_wm wm[3];
> + struct vlv_sr_wm sr[3];
> + uint8_t num_active_planes;
> + uint8_t num_levels;
> + uint8_t level;
> + bool cxsr;
> +};
> +
> struct intel_pipe_wm {
> struct intel_wm_level wm[5];
> uint32_t linetime;
> @@ -564,6 +573,8 @@ struct intel_crtc {
>
> /* scalers available on this crtc */
> int num_scalers;
> +
> + struct vlv_wm_state wm_state;
> };
>
> struct intel_plane_wm_parameters {
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e67548d..d046e5f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -335,8 +335,6 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
> if (IS_VALLEYVIEW(dev)) {
> I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
> POSTING_READ(FW_BLC_SELF_VLV);
> - if (IS_CHERRYVIEW(dev))
> - chv_set_memory_pm5(dev_priv, enable);
> } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
> I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
> POSTING_READ(FW_BLC_SELF);
> @@ -929,8 +927,6 @@ static void vlv_write_wm_values(struct intel_crtc *crtc,
> }
>
> POSTING_READ(DSPFW1);
> -
> - dev_priv->wm.vlv = *wm;
> }
>
> #undef FW_WM_VLV
> @@ -1014,6 +1010,72 @@ enum vlv_wm_level {
> VLV_WM_NUM_LEVELS = 1,
> };
>
> +/* latency must be in 0.1us units. */
> +static unsigned int vlv_wm_method2(unsigned int pixel_rate,
> + unsigned int pipe_htotal,
> + unsigned int horiz_pixels,
> + unsigned int bytes_per_pixel,
> + unsigned int latency)
> +{
> + unsigned int ret;
> +
> + ret = (latency * pixel_rate) / (pipe_htotal * 10000);
> + ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
> + ret = DIV_ROUND_UP(ret, 64);
> +
> + return ret;
> +}
> +
> +static void vlv_setup_wm_latency(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + /* all latencies in usec */
> + dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
> +
> + if (IS_CHERRYVIEW(dev_priv)) {
> + dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
> + dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
nit #defines for these magic values please
> + }
> +}
> +
> +static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
> + struct intel_crtc *crtc,
> + const struct intel_plane_state *state,
> + int level)
> +{
> + struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
> + int clock, htotal, pixel_size, width, wm;
> +
> + if (dev_priv->wm.pri_latency[level] == 0)
> + return USHRT_MAX;
> +
> + if (!state->visible)
> + return 0;
> +
> + pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
> + clock = crtc->config->base.adjusted_mode.crtc_clock;
> + htotal = crtc->config->base.adjusted_mode.crtc_htotal;
> + width = crtc->config->pipe_src_w;
> + if (WARN_ON(htotal == 0))
> + htotal = 1;
> +
> + if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
> + /*
> + * FIXME the formula gives values that are
> + * too big for the cursor FIFO, and hence we
> + * would never be able to use cursors. For
> + * now just hardcode the watermark.
> + */
> + wm = 63;
Hard coding to maximum value of 63. Should probably be programmed to
worst case instead of maximum.
> + } else {
> + wm = vlv_wm_method2(clock, htotal, width, pixel_size,
> + dev_priv->wm.pri_latency[level] * 10);
> + }
> +
> + return min_t(int, wm, USHRT_MAX);
> +}
> +
> static bool vlv_compute_sr_wm(struct drm_device *dev,
> struct vlv_wm_values *wm)
> {
> @@ -1105,6 +1167,249 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
>
> if (cxsr_enabled)
> intel_set_memory_cxsr(dev_priv, true);
> +
> + dev_priv->wm.vlv = wm;
> +}
> +
> +static void vlv_invert_wms(struct intel_crtc *crtc)
> +{
> + struct vlv_wm_state *wm_state = &crtc->wm_state;
> + int level;
> +
> + for (level = 0; level < wm_state->num_levels; level++) {
> + struct drm_device *dev = crtc->base.dev;
> + const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> + struct intel_plane *plane;
> +
> + wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
> + wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
> +
> + for_each_intel_plane_on_crtc(dev, crtc, plane) {
> + switch (plane->base.type) {
> + int sprite;
> + case DRM_PLANE_TYPE_CURSOR:
> + wm_state->wm[level].cursor = plane->wm.fifo_size -
> + wm_state->wm[level].cursor;
> + break;
> + case DRM_PLANE_TYPE_PRIMARY:
> + wm_state->wm[level].primary = plane->wm.fifo_size -
> + wm_state->wm[level].primary;
> + break;
> + case DRM_PLANE_TYPE_OVERLAY:
> + sprite = plane->plane;
> + wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
> + wm_state->wm[level].sprite[sprite];
> + break;
> + }
> + }
> + }
> +}
> +
> +static void _vlv_compute_wm(struct intel_crtc *crtc)
> +{
> + struct drm_device *dev = crtc->base.dev;
> + struct vlv_wm_state *wm_state = &crtc->wm_state;
> + struct intel_plane *plane;
> + int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> + int level;
> +
> + memset(wm_state, 0, sizeof(*wm_state));
> +
> + wm_state->cxsr = crtc->pipe != PIPE_C;
> + if (IS_CHERRYVIEW(dev))
> + wm_state->num_levels = CHV_WM_NUM_LEVELS;
> + else
> + wm_state->num_levels = VLV_WM_NUM_LEVELS;
> +
> + wm_state->num_active_planes = 0;
> + for_each_intel_plane_on_crtc(dev, crtc, plane) {
> + struct intel_plane_state *state =
> + to_intel_plane_state(plane->base.state);
> +
> + if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
> + continue;
> +
> + if (state->visible)
> + wm_state->num_active_planes++;
> + }
> +
> + if (wm_state->num_active_planes != 1)
> + wm_state->cxsr = false;
> +
> + if (wm_state->cxsr) {
> + for (level = 0; level < wm_state->num_levels; level++) {
> + wm_state->sr[level].plane = sr_fifo_size;
> + wm_state->sr[level].cursor = 63;
> + }
> + }
> +
> + for_each_intel_plane_on_crtc(dev, crtc, plane) {
> + struct intel_plane_state *state =
> + to_intel_plane_state(plane->base.state);
> +
> + if (!state->visible)
> + continue;
> +
> + /* normal watermarks */
> + for (level = 0; level < wm_state->num_levels; level++) {
> + int wm = vlv_compute_wm_level(plane, crtc, state, level);
> + int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
> +
> + /* hack */
> + if (WARN_ON(level == 0 && wm > max_wm))
> + wm = max_wm;
> +
> + if (wm > plane->wm.fifo_size)
> + break;
> +
> + switch (plane->base.type) {
> + int sprite;
> + case DRM_PLANE_TYPE_CURSOR:
> + wm_state->wm[level].cursor = wm;
> + break;
> + case DRM_PLANE_TYPE_PRIMARY:
> + wm_state->wm[level].primary = wm;
> + break;
> + case DRM_PLANE_TYPE_OVERLAY:
> + sprite = plane->plane;
> + wm_state->wm[level].sprite[sprite] = wm;
> + break;
> + }
> + }
> +
> + wm_state->num_levels = level;
> +
> + if (!wm_state->cxsr)
> + continue;
> +
> + /* maxfifo watermarks */
> + switch (plane->base.type) {
> + int sprite, level;
> + case DRM_PLANE_TYPE_CURSOR:
> + for (level = 0; level < wm_state->num_levels; level++)
> + wm_state->sr[level].cursor =
> + wm_state->sr[level].cursor;
> + break;
> + case DRM_PLANE_TYPE_PRIMARY:
> + for (level = 0; level < wm_state->num_levels; level++)
> + wm_state->sr[level].plane =
> + min(wm_state->sr[level].plane,
> + wm_state->wm[level].primary);
> + break;
> + case DRM_PLANE_TYPE_OVERLAY:
> + sprite = plane->plane;
> + for (level = 0; level < wm_state->num_levels; level++)
> + wm_state->sr[level].plane =
> + min(wm_state->sr[level].plane,
> + wm_state->wm[level].sprite[sprite]);
> + break;
> + }
> + }
> +
> + /* clear any (partially) filled invalid levels */
> + for (level = wm_state->num_levels; level < CHV_WM_NUM_LEVELS; level++) {
> + memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
> + memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
> + }
> +
> + vlv_invert_wms(crtc);
> +}
> +
> +static void vlv_merge_wm(struct drm_device *dev,
> + struct vlv_wm_values *wm)
> +{
> + struct intel_crtc *crtc;
> + int num_active_crtcs = 0;
> +
> + if (IS_CHERRYVIEW(dev))
> + wm->level = VLV_WM_LEVEL_DDR_DVFS;
> + else
> + wm->level = VLV_WM_LEVEL_PM2;
> + wm->cxsr = true;
> +
> + for_each_intel_crtc(dev, crtc) {
> + const struct vlv_wm_state *wm_state = &crtc->wm_state;
> +
> + if (!crtc->active)
> + continue;
> +
> + if (!wm_state->cxsr)
> + wm->cxsr = false;
> +
> + num_active_crtcs++;
> + wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
> + }
> +
> + if (num_active_crtcs != 1)
> + wm->cxsr = false;
> +
> + for_each_intel_crtc(dev, crtc) {
> + struct vlv_wm_state *wm_state = &crtc->wm_state;
> + enum pipe pipe = crtc->pipe;
> +
> + if (!crtc->active)
> + continue;
> +
> + wm->pipe[pipe] = wm_state->wm[wm->level];
> + if (wm->cxsr)
> + wm->sr = wm_state->sr[wm->level];
> +
> + wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
> + wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
> + wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
> + wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
Did we really decide that 0x2 was the final correct value for the DL
registers? I figured we would be running 0x7 for CHV.
> + }
> +}
> +
> +static void vlv_update_wm(struct drm_crtc *crtc)
> +{
> + struct drm_device *dev = crtc->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> + enum pipe pipe = intel_crtc->pipe;
> + struct vlv_wm_values wm = {};
> +
> + _vlv_compute_wm(intel_crtc);
> + vlv_merge_wm(dev, &wm);
> +
> + if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0)
> + return;
> +
> + if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
> + dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
> + chv_set_memory_dvfs(dev_priv, false);
> +
> + if (wm.level < VLV_WM_LEVEL_PM5 &&
> + dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
> + chv_set_memory_pm5(dev_priv, false);
> +
> + if (!wm.cxsr && dev_priv->wm.vlv.cxsr) {
> + intel_set_memory_cxsr(dev_priv, false);
> + intel_wait_for_vblank(dev, pipe);
> + }
> +
> + vlv_write_wm_values(intel_crtc, &wm);
> +
> + DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
> + "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
> + pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
> + wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
> + wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
> +
Love the detailed DRM_DEBUG_KMS - of course its showing the only real
issue I have found in the series that the cursor watermark is being
programmed inverted. 63 when the cursor is disabled and 0 when its
enabled. This leads to an SRR% increase of ~10% when the cursor is
enabled.
// Cursor disabled
[ 2662.355218] [drm:vlv_update_wm] Setting FIFO watermarks - A:
plane=340, cursor=63, sprite0=0, sprite1=0, SR: plane=1364, cursor=0
level=2 cxsr=1
// Cursor Enabled
[ 2667.531049] [drm:vlv_update_wm] Setting FIFO watermarks - A:
plane=340, cursor=0, sprite0=0, sprite1=0, SR: plane=1364, cursor=0
level=2 cxsr=1
I haven't found the line of code that actually inverts this programming.
I will continue to investigate during testing of this series.
> + if (wm.cxsr && !dev_priv->wm.vlv.cxsr) {
> + intel_wait_for_vblank(dev, pipe);
> + intel_set_memory_cxsr(dev_priv, true);
> + }
> +
> + if (wm.level >= VLV_WM_LEVEL_PM5 &&
> + dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
> + chv_set_memory_pm5(dev_priv, true);
> +
> + if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
> + dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
> + chv_set_memory_dvfs(dev_priv, true);
> +
> + dev_priv->wm.vlv = wm;
> }
>
> static void valleyview_update_sprite_wm(struct drm_plane *plane,
> @@ -6823,8 +7128,9 @@ void intel_init_pm(struct drm_device *dev)
> else if (INTEL_INFO(dev)->gen == 8)
> dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
> } else if (IS_CHERRYVIEW(dev)) {
> - dev_priv->display.update_wm = valleyview_update_wm;
> - dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
> + vlv_setup_wm_latency(dev);
> +
> + dev_priv->display.update_wm = vlv_update_wm;
> dev_priv->display.init_clock_gating =
> cherryview_init_clock_gating;
> } else if (IS_VALLEYVIEW(dev)) {
>
More information about the Intel-gfx
mailing list