[Intel-gfx] [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW

Paulo Zanoni przanoni at gmail.com
Mon May 6 22:38:16 CEST 2013


From: Paulo Zanoni <paulo.r.zanoni at intel.com>

We were previously calling sandybridge_update_wm on HSW, but the SNB
function didn't really match the HSW specification, so we were just
writing the wrong values. For example, I was not seeing any LP
watermark ever enabled.

So this patch implements the haswell_update_wm as described in our
specification. The values match the "Haswell Watermark Calculator"
tool. There are only 2 pieces missing for this code:
 1 - Sprite watermarks. We currently set the maximum possible value
 for the WM_PIPE register and we never enable LP watermarks if any
 sprite is enabled, so we should be safe from bugs. A future patch
 should add support for correctly setting the sprite WMs.
 2 - Support for 5/6 data buffer partitioning. This is only useful if
 we have sprite watermarks enabled and working, and should be simple
 to implement with the infrastructure provided by this patch.

With this patch I can finally see us reaching PC7 state with screen
sizes <= 1920x1080.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h |    3 +
 drivers/gpu/drm/i915/intel_pm.c |  411 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 401 insertions(+), 13 deletions(-)

This patch applies on top of dinq + all the 15 patches I previously sent to
the intel-gfx mailing list.

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c05bd4a..1aed704 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4924,6 +4924,9 @@
 #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
 #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
 
+#define WM_MISC				0x45260
+#define  WM_MISC_DATA_PARTITION_5_6	(1 << 0)
+
 #define WM_DBG				0x45280
 #define  WM_DBG_DISALLOW_MULTIPLE_LP	(1<<0)
 #define  WM_DBG_DISALLOW_MAXFIFO	(1<<1)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e58f47c..707d0a4 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2014,20 +2014,189 @@ static void ivybridge_update_wm(struct drm_device *dev)
 		   cursor_wm);
 }
 
-static void
-haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
+static int hsw_wm_get_pixel_rate(struct drm_device *dev,
+				 struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pixel_rate, pfit_size;
+
+	if (intel_crtc->config.pixel_target_clock)
+		pixel_rate = intel_crtc->config.pixel_target_clock;
+	else
+		pixel_rate = intel_crtc->config.adjusted_mode.clock;
+
+	/* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
+	 * adjust the pixel_rate here. */
+
+	pfit_size = intel_crtc->config.pch_pfit.size;
+	if (pfit_size) {
+		int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
+
+		x = (pfit_size >> 16) & 0xFFFF;
+		y = pfit_size & 0xFFFF;
+		crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
+		crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
+
+		hscale = crtc_x * 1000 / x;
+		vscale = crtc_y * 1000 / y;
+		hscale = (hscale < 1000) ? 1000 : hscale;
+		vscale = (vscale < 1000) ? 1000 : vscale;
+		totscale = hscale * vscale / 1000;
+		pixel_rate = pixel_rate * totscale / 1000;
+	}
+
+	return pixel_rate;
+}
+
+static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
+{
+	int ret;
+
+	ret = pixel_rate * bytes_per_pixel * latency;
+	ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
+	return ret;
+}
+
+static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
+			  int bytes_per_pixel, int latency)
+{
+	int ret;
+
+	ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
+	ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
+	ret = DIV_ROUND_UP(ret, 64) + 2;
+	return ret;
+}
+
+static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
+{
+	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
+}
+
+struct hsw_pipe_wm_parameters {
+	bool active;
+	bool sprite_enabled;
+	int pipe_htotal;
+	int pixel_rate;
+	int pri_bytes_per_pixel;
+	int cur_bytes_per_pixel;
+	int pri_horiz_pixels;
+	int cur_horiz_pixels;
+};
+
+struct hsw_lp_wm_result {
+	bool enable;
+	bool fbc_enable;
+	int pri_val;
+	int cur_val;
+	int fbc_val;
+};
+
+static void hsw_compute_lp_wm(int latency, int pri_max, int cur_max,
+			      int fbc_max,
+			      struct hsw_pipe_wm_parameters *params,
+			      struct hsw_lp_wm_result *result)
+{
+	enum pipe pipe;
+	int pri_val[3], cur_val[3], fbc_val[3];
+	int method1, method2;
+
+	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
+		struct hsw_pipe_wm_parameters *p = &params[pipe];
+
+		if (p->active) {
+			method1 = hsw_wm_method1(p->pixel_rate,
+						 p->pri_bytes_per_pixel,
+						 latency);
+			method2 = hsw_wm_method2(p->pixel_rate,
+						 p->pipe_htotal,
+						 p->pri_horiz_pixels,
+						 p->cur_bytes_per_pixel,
+						 latency);
+			pri_val[pipe] = min(method1, method2);
+
+			cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
+						       p->pipe_htotal,
+						       p->cur_horiz_pixels,
+						       p->cur_bytes_per_pixel,
+						       latency);
+			fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
+						   p->pri_horiz_pixels,
+						   p->pri_bytes_per_pixel);
+		} else {
+			pri_val[pipe] = 0;
+			cur_val[pipe] = 0;
+			fbc_val[pipe] = 0;
+		}
+	}
+
+	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
+	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
+	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
+
+	if (result->fbc_val > fbc_max) {
+		result->fbc_enable = false;
+		result->fbc_val = 0;
+	} else {
+		result->fbc_enable = true;
+	}
+
+	result->enable = result->pri_val <= pri_max &&
+			 result->cur_val <= cur_max;
+}
+
+static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
+				    int latency, enum pipe pipe,
+				    struct hsw_pipe_wm_parameters *params)
+{
+	int pri_val, cur_val, spr_val;
+
+	if (params->active) {
+		pri_val = hsw_wm_method1(params->pixel_rate,
+					 params->pri_bytes_per_pixel,
+					 latency);
+		cur_val = hsw_wm_method2(params->pixel_rate,
+					 params->pipe_htotal,
+					 params->cur_horiz_pixels,
+					 params->cur_bytes_per_pixel,
+					 latency);
+
+		/* TODO: Actually compute the real value. For now we're just
+		 * setting the maximum possible value, which should be safe for
+		 * the biggest possible supported sprite. */
+		spr_val = params->sprite_enabled ? 127 : 0;
+	} else {
+		pri_val = 0;
+		cur_val = 0;
+		spr_val = 0;
+	}
+
+	WARN(pri_val > 127,
+	     "Primary WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+	WARN(spr_val > 127,
+	     "Sprite WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+	WARN(cur_val > 63,
+	     "Cursor WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+
+	return (pri_val << WM0_PIPE_PLANE_SHIFT) |
+	       (spr_val << WM0_PIPE_SPRITE_SHIFT) |
+	       cur_val;
+}
+
+static uint32_t
+hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum pipe pipe = intel_crtc->pipe;
 	struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
 	int target_clock;
 	u32 linetime, ips_linetime;
 
-	if (!intel_crtc_active(crtc)) {
-		I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
-		return;
-	}
+	if (!intel_crtc_active(crtc))
+		return 0;
 
 	if (intel_crtc->config.pixel_target_clock)
 		target_clock = intel_crtc->config.pixel_target_clock;
@@ -2041,23 +2210,231 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 	ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
 					 intel_ddi_get_cdclk_freq(dev_priv));
 
-	I915_WRITE(PIPE_WM_LINETIME(pipe),
-		   PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
-		   PIPE_WM_LINETIME_TIME(linetime));
+	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
+	       PIPE_WM_LINETIME_TIME(linetime);
+}
+
+/*
+ * The spec says we shouldn't write when we don't need, because every write
+ * causes WMs to be re-evaluated, expending some power.
+ */
+static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
+				uint32_t *pipe_after, uint32_t *lp_after,
+				uint32_t *lp_spr_after,
+				uint32_t *linetime_after)
+{
+	uint32_t pipe_before[3], lp_before[3], lp_spr_before[3];
+	uint32_t linetime_before[3];
+
+	pipe_before[0] = I915_READ(WM0_PIPEA_ILK);
+	pipe_before[1] = I915_READ(WM0_PIPEB_ILK);
+	pipe_before[2] = I915_READ(WM0_PIPEC_IVB);
+	lp_before[0] = I915_READ(WM1_LP_ILK);
+	lp_before[1] = I915_READ(WM2_LP_ILK);
+	lp_before[2] = I915_READ(WM3_LP_ILK);
+	lp_spr_before[0] = I915_READ(WM1S_LP_ILK);
+	lp_spr_before[1] = I915_READ(WM2S_LP_IVB);
+	lp_spr_before[2] = I915_READ(WM3S_LP_IVB);
+	linetime_before[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
+	linetime_before[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
+	linetime_before[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
+
+	if (memcmp(pipe_after, pipe_before, 3) == 0 &&
+	    memcmp(lp_after, lp_before, 3) == 0 &&
+	    memcmp(lp_spr_after, lp_spr_before, 3) == 0 &&
+	    memcmp(linetime_after, linetime_before, 3) == 0)
+		return;
+
+	if (lp_before[2] != 0)
+		I915_WRITE(WM3_LP_ILK, 0);
+	if (lp_before[1] != 0)
+		I915_WRITE(WM2_LP_ILK, 0);
+	if (lp_before[0] != 0)
+		I915_WRITE(WM1_LP_ILK, 0);
+
+	if (pipe_before[0] != pipe_after[0])
+		I915_WRITE(WM0_PIPEA_ILK, pipe_after[0]);
+	if (pipe_before[1] != pipe_after[1])
+		I915_WRITE(WM0_PIPEB_ILK, pipe_after[1]);
+	if (pipe_before[2] != pipe_after[2])
+		I915_WRITE(WM0_PIPEC_IVB, pipe_after[2]);
+
+	if (linetime_before[0] != linetime_after[0])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_A), linetime_after[0]);
+	if (linetime_before[1] != linetime_after[1])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_B), linetime_after[1]);
+	if (linetime_before[2] != linetime_after[2])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_C), linetime_after[2]);
+
+	if (lp_spr_before[0] != lp_spr_after[0])
+		I915_WRITE(WM1S_LP_ILK, lp_spr_after[0]);
+	if (lp_spr_before[1] != lp_spr_after[1])
+		I915_WRITE(WM2S_LP_IVB, lp_spr_after[1]);
+	if (lp_spr_before[2] != lp_spr_after[2])
+		I915_WRITE(WM3S_LP_IVB, lp_spr_after[2]);
+
+	if (lp_after[0] != 0)
+		I915_WRITE(WM1_LP_ILK, lp_after[0]);
+	if (lp_after[1] != 0)
+		I915_WRITE(WM2_LP_ILK, lp_after[1]);
+	if (lp_after[2] != 0)
+		I915_WRITE(WM3_LP_ILK, lp_after[2]);
 }
 
 static void haswell_update_wm(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
+	struct drm_plane *plane;
+	uint64_t sskpd = I915_READ64(MCH_SSKPD);
+	uint32_t wm[5], tmp, lp_wm_val[3], lp_spr_wm_val[3];
 	enum pipe pipe;
+	int lp_pri_max, lp_cur_max, lp_fbc_max, pipes_active, i;
+	struct hsw_pipe_wm_parameters params[3];
+	struct hsw_lp_wm_result lp_results[4];
+	uint32_t pipe_results[3], linetime_results[3];
+	int sprites_enabled = 0;
+
+	if ((sskpd >> 56) & 0xFF)
+		wm[0] = (sskpd >> 56) & 0xFF;
+	else
+		wm[0] = sskpd & 0xF;
+	wm[1] = ((sskpd >> 4) & 0xFF) * 5;
+	wm[2] = ((sskpd >> 12) & 0xFF) * 5;
+	wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
+	wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
 
+	pipes_active = 0;
 	for_each_pipe(pipe) {
+		struct intel_crtc *intel_crtc;
+
 		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
-		haswell_update_linetime_wm(dev, crtc);
+		intel_crtc = to_intel_crtc(crtc);
+
+		params[pipe].active = intel_crtc_active(crtc);
+		if (!params[pipe].active)
+			continue;
+
+		pipes_active++;
+
+		params[pipe].pipe_htotal =
+			intel_crtc->config.adjusted_mode.htotal;
+		params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
+		params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
+		params[pipe].cur_bytes_per_pixel = 4;
+		params[pipe].pri_horiz_pixels =
+			intel_crtc->config.adjusted_mode.hdisplay;
+		params[pipe].cur_horiz_pixels = 64;
 	}
 
-	sandybridge_update_wm(dev);
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		struct intel_plane *intel_plane = to_intel_plane(plane);
+		params[intel_plane->pipe].sprite_enabled = plane->enabled;
+		if (plane->enabled)
+			sprites_enabled++;
+	}
+
+	/* TODO: compute values with the 2 possible parititionings and choose
+	 * the best. */
+	if (pipes_active > 1) {
+		lp_pri_max = sprites_enabled ? 128 : 256;
+		lp_cur_max = 64;
+	} else {
+		lp_pri_max = sprites_enabled ? 384 : 768;
+		lp_cur_max = 255;
+	}
+	lp_fbc_max = 15;
+
+	hsw_compute_lp_wm(wm[1], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[0]);
+	hsw_compute_lp_wm(wm[2], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[1]);
+	hsw_compute_lp_wm(wm[3], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[2]);
+	hsw_compute_lp_wm(wm[4], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[3]);
+
+	/* The spec says it is preferred to disable FBC WMs instead of disabling
+	 * a WM level. */
+	tmp = I915_READ(DISP_ARB_CTL);
+	tmp &= ~DISP_FBC_WM_DIS;
+	for (i = 0; i < 4; i++) {
+		if (lp_results[i].enable && !lp_results[i].fbc_enable) {
+			tmp |= DISP_FBC_WM_DIS;
+			break;
+		}
+	}
+	I915_WRITE(DISP_ARB_CTL, tmp);
+
+	if (lp_results[3].enable)
+		tmp = WM3_LP_EN | (8 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[3].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[3].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[3].cur_val;
+	else if (lp_results[2].enable)
+		tmp = WM3_LP_EN | (6 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[2].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[2].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[2].cur_val;
+	else
+		tmp = 0;
+	lp_wm_val[2] = tmp;
+
+	if (lp_results[3].enable && lp_results[2].enable)
+		tmp = WM2_LP_EN | (6 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[2].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[2].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[2].cur_val;
+	else if (!lp_results[3].enable && lp_results[1].enable)
+		tmp = WM2_LP_EN | (4 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[1].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[1].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[1].cur_val;
+	else
+		tmp = 0;
+	lp_wm_val[1] = tmp;
+
+	if (lp_results[0].enable)
+		tmp = WM1_LP_SR_EN | (2 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[0].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[0].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[0].cur_val;
+	else
+		tmp = 0;
+	lp_wm_val[0] = tmp;
+
+	/* TODO: for now we're completely ignoring the sprite values on the LP
+	 * calculations, so if we detect any sprite is enabled, just disable all
+	 * the LP watermarks. */
+	 if (sprites_enabled) {
+		lp_wm_val[0] = 0;
+		lp_wm_val[1] = 0;
+		lp_wm_val[2] = 0;
+	}
+	lp_spr_wm_val[0] = 0;
+	lp_spr_wm_val[1] = 0;
+	lp_spr_wm_val[2] = 0;
+
+	for_each_pipe(pipe)
+		pipe_results[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0], pipe,
+							 &params[pipe]);
+
+	for_each_pipe(pipe) {
+		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+		linetime_results[pipe] = hsw_compute_linetime_wm(dev, crtc);
+	}
+
+	hsw_write_wm_values(dev_priv, pipe_results, lp_wm_val, lp_spr_wm_val,
+			    linetime_results);
+}
+
+static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
+				     uint32_t sprite_width, int pixel_size)
+{
+	/* TODO: We don't really support this yet, so for now just update the
+	 * normal watermarks. This will make sure we have safe values for the
+	 * sprite watermarks and disable all the LP WMs. */
+	haswell_update_wm(dev);
 }
 
 static bool
@@ -4008,11 +4385,18 @@ static void haswell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
+	uint32_t val;
 
 	I915_WRITE(WM3_LP_ILK, 0);
 	I915_WRITE(WM2_LP_ILK, 0);
 	I915_WRITE(WM1_LP_ILK, 0);
 
+	/* For now our watermarks code assumes we're always using 1/2 data
+	 * buffer parititioning. */
+	val = I915_READ(WM_MISC);
+	val &= ~WM_MISC_DATA_PARTITION_5_6;
+	I915_WRITE(WM_MISC, val);
+
 	/* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
 	 * This implements the WaDisableRCZUnitClockGating workaround.
 	 */
@@ -4498,7 +4882,8 @@ void intel_init_pm(struct drm_device *dev)
 		} else if (IS_HASWELL(dev)) {
 			if (I915_READ64(MCH_SSKPD)) {
 				dev_priv->display.update_wm = haswell_update_wm;
-				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
+				dev_priv->display.update_sprite_wm =
+					haswell_update_sprite_wm;
 			} else {
 				DRM_DEBUG_KMS("Failed to read display plane latency. "
 					      "Disable CxSR\n");
-- 
1.7.10.4




More information about the Intel-gfx mailing list