[Intel-gfx] [PATCH 4/5] drm/i915: properly set HSW WM_LP watermarks

Paulo Zanoni przanoni at gmail.com
Fri May 24 16:59:20 CEST 2013


From: Paulo Zanoni <paulo.r.zanoni at intel.com>

We were previously only setting the WM_PIPE registers, now we are
setting the LP watermark registers. This should allow deeper PC
states, resulting in power savings.

We're only using 1/2 data buffer partitioning for now.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h |   4 +
 drivers/gpu/drm/i915/intel_pm.c | 194 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 187 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e86606c..58230ea 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3057,6 +3057,10 @@
 #define WM3S_LP_IVB		0x45128
 #define  WM1S_LP_EN		(1<<31)
 
+#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
+	(WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
+	 ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
+
 /* Memory latency timer register */
 #define MLTR_ILK		0x11222
 #define  MLTR_WM1_SHIFT		0
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2ee1d01..9f9eb48 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2132,6 +2132,12 @@ static uint32_t hsw_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
 	return ret;
 }
 
+static uint32_t hsw_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
+			   uint8_t bytes_per_pixel)
+{
+	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
+}
+
 struct hsw_pipe_wm_parameters {
 	bool active;
 	bool sprite_enabled;
@@ -2145,11 +2151,28 @@ struct hsw_pipe_wm_parameters {
 	uint32_t pixel_rate;
 };
 
+struct hsw_wm_maximums {
+	uint16_t pri;
+	uint16_t spr;
+	uint16_t cur;
+	uint16_t fbc;
+};
+
+struct hsw_lp_wm_result {
+	bool enable;
+	bool fbc_enable;
+	uint32_t pri_val;
+	uint32_t spr_val;
+	uint32_t cur_val;
+	uint32_t fbc_val;
+};
+
 struct hsw_wm_values {
 	uint32_t wm_pipe[3];
 	uint32_t wm_lp[3];
 	uint32_t wm_lp_spr[3];
 	uint32_t wm_linetime[3];
+	bool enable_fbc_wm;
 };
 
 enum hsw_data_buf_partitioning {
@@ -2170,6 +2193,27 @@ static uint32_t hsw_compute_pri_wm_pipe(struct hsw_pipe_wm_parameters *params,
 			      mem_value);
 }
 
+/* Only for WM_LP. */
+static uint32_t hsw_compute_pri_wm_lp(struct hsw_pipe_wm_parameters *params,
+				      uint32_t mem_value)
+{
+	uint32_t method1, method2;
+
+	/* TODO: for now, assume the primary plane is always enabled. */
+	if (!params->active)
+		return 0;
+
+	method1 = hsw_wm_method1(params->pixel_rate,
+				 params->pri_bytes_per_pixel,
+				 mem_value);
+	method2 = hsw_wm_method2(params->pixel_rate,
+				 params->pipe_htotal,
+				 params->pri_horiz_pixels,
+				 params->pri_bytes_per_pixel,
+				 mem_value);
+	return min(method1, method2);
+}
+
 /* For both WM_PIPE and WM_LP. */
 static uint32_t hsw_compute_spr_wm(struct hsw_pipe_wm_parameters *params,
 				   uint32_t mem_value)
@@ -2204,6 +2248,52 @@ static uint32_t hsw_compute_cur_wm(struct hsw_pipe_wm_parameters *params,
 			      mem_value);
 }
 
+/* Only for WM_LP. */
+static uint32_t hsw_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
+				   uint32_t pri_val,
+				   uint32_t mem_value)
+{
+	if (!params->active)
+		return 0;
+
+	return hsw_wm_fbc(pri_val,
+			  params->pri_horiz_pixels,
+			  params->pri_bytes_per_pixel);
+}
+
+static void hsw_compute_lp_wm(uint32_t mem_value, struct hsw_wm_maximums *max,
+			      struct hsw_pipe_wm_parameters *params,
+			      struct hsw_lp_wm_result *result)
+{
+	enum pipe pipe;
+	uint32_t pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
+
+	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
+		struct hsw_pipe_wm_parameters *p = &params[pipe];
+
+		pri_val[pipe] = hsw_compute_pri_wm_lp(p, mem_value);
+		spr_val[pipe] = hsw_compute_spr_wm(p, mem_value);
+		cur_val[pipe] = hsw_compute_cur_wm(p, mem_value);
+		fbc_val[pipe] = hsw_compute_fbc_wm(p, pri_val[pipe], mem_value);
+	}
+
+	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
+	result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
+	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
+	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
+
+	if (result->fbc_val > max->fbc) {
+		result->fbc_enable = false;
+		result->fbc_val = 0;
+	} else {
+		result->fbc_enable = true;
+	}
+
+	result->enable = result->pri_val <= max->pri &&
+			 result->spr_val <= max->spr &&
+			 result->cur_val <= max->cur;
+}
+
 static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
 				    uint32_t mem_value, enum pipe pipe,
 				    struct hsw_pipe_wm_parameters *params)
@@ -2253,13 +2343,15 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 
 static void hsw_compute_wm_parameters(struct drm_device *dev,
 				      struct hsw_pipe_wm_parameters *params,
-				      uint32_t *wm)
+				      uint32_t *wm,
+				      struct hsw_wm_maximums *lp_max_1_2)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
 	uint64_t sskpd = I915_READ64(MCH_SSKPD);
 	enum pipe pipe;
+	int pipes_active = 0, sprites_enabled = 0;
 
 	if ((sskpd >> 56) & 0xFF)
 		wm[0] = (sskpd >> 56) & 0xFF;
@@ -2281,6 +2373,8 @@ static void hsw_compute_wm_parameters(struct drm_device *dev,
 		if (!p->active)
 			continue;
 
+		pipes_active++;
+
 		p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal;
 		p->pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
 		p->pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
@@ -2299,25 +2393,89 @@ static void hsw_compute_wm_parameters(struct drm_device *dev,
 		p->sprite_enabled = intel_plane->wm.enable;
 		p->spr_bytes_per_pixel = intel_plane->wm.bytes_per_pixel;
 		p->spr_horiz_pixels = intel_plane->wm.horiz_pixels;
+
+		if (p->sprite_enabled)
+			sprites_enabled++;
+	}
+
+	if (pipes_active > 1) {
+		lp_max_1_2->pri = sprites_enabled ? 128 : 256;
+		lp_max_1_2->spr = 128;
+		lp_max_1_2->cur = 64;
+	} else {
+		lp_max_1_2->pri = sprites_enabled ? 384 : 768;
+		lp_max_1_2->spr = 384;
+		lp_max_1_2->cur = 255;
 	}
+	lp_max_1_2->fbc = 15;
 }
 
 static void hsw_compute_wm_results(struct drm_device *dev,
 				   struct hsw_pipe_wm_parameters *params,
 				   uint32_t *wm,
+				   struct hsw_wm_maximums *lp_maximums,
 				   struct hsw_wm_values *results)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
+	struct hsw_lp_wm_result lp_results[4];
 	enum pipe pipe;
+	int i;
+
+	hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
+	hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
+	hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
+	hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
+
+	/* The spec says it is preferred to disable FBC WMs instead of disabling
+	 * a WM level. */
+	results->enable_fbc_wm = true;
+	for (i = 0; i < 4; i++) {
+		if (lp_results[i].enable && !lp_results[i].fbc_enable) {
+			results->enable_fbc_wm = false;
+			break;
+		}
+	}
+
+	if (lp_results[3].enable) {
+		results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
+						  lp_results[3].pri_val,
+						  lp_results[3].cur_val);
+		results->wm_lp_spr[2] = lp_results[3].spr_val;
+	} else if (lp_results[2].enable) {
+		results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
+						  lp_results[2].pri_val,
+						  lp_results[2].cur_val);
+		results->wm_lp_spr[2] = lp_results[2].spr_val;
+	} else {
+		results->wm_lp[2] = 0;
+		results->wm_lp_spr[2] = 0;
+	}
+
+	if (lp_results[3].enable && lp_results[2].enable) {
+		results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
+						  lp_results[2].pri_val,
+						  lp_results[2].cur_val);
+		results->wm_lp_spr[1] = lp_results[2].spr_val;
+	} else if (!lp_results[3].enable && lp_results[1].enable) {
+		results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
+						  lp_results[1].pri_val,
+						  lp_results[1].cur_val);
+		results->wm_lp_spr[1] = lp_results[1].spr_val;
+	} else {
+		results->wm_lp[1] = 0;
+		results->wm_lp_spr[1] = 0;
+	}
 
-	/* No support for LP WMs yet. */
-	results->wm_lp[2] = 0;
-	results->wm_lp[1] = 0;
-	results->wm_lp[0] = 0;
-	results->wm_lp_spr[2] = 0;
-	results->wm_lp_spr[1] = 0;
-	results->wm_lp_spr[0] = 0;
+	if (lp_results[0].enable) {
+		results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
+						  lp_results[0].pri_val,
+						  lp_results[0].cur_val);
+		results->wm_lp_spr[0] = lp_results[0].spr_val;
+	} else {
+		results->wm_lp[0] = 0;
+		results->wm_lp_spr[0] = 0;
+	}
 
 	for_each_pipe(pipe)
 		results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
@@ -2341,6 +2499,7 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
 	struct hsw_wm_values previous;
 	uint32_t val;
 	enum hsw_data_buf_partitioning prev_partitioning;
+	bool prev_enable_fbc_wm;
 
 	previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
 	previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
@@ -2358,11 +2517,14 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
 	prev_partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
 			    HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2;
 
+	prev_enable_fbc_wm = !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
+
 	if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
 	    memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
 	    memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
 	    memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0 &&
-	    partitioning == prev_partitioning)
+	    partitioning == prev_partitioning &&
+	    results->enable_fbc_wm == prev_enable_fbc_wm)
 		return;
 
 	if (previous.wm_lp[2] != 0)
@@ -2395,6 +2557,15 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
 		I915_WRITE(WM_MISC, val);
 	}
 
+	if (prev_enable_fbc_wm != results->enable_fbc_wm) {
+		val = I915_READ(DISP_ARB_CTL);
+		if (results->enable_fbc_wm)
+			val &= ~DISP_FBC_WM_DIS;
+		else
+			val |= DISP_FBC_WM_DIS;
+		I915_WRITE(DISP_ARB_CTL, val);
+	}
+
 	if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
 		I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
 	if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
@@ -2413,12 +2584,13 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
 static void haswell_update_wm(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct hsw_wm_maximums lp_max_1_2;
 	struct hsw_pipe_wm_parameters params[3];
 	struct hsw_wm_values results;
 	uint32_t wm[5];
 
-	hsw_compute_wm_parameters(dev, params, wm);
-	hsw_compute_wm_results(dev, params, wm, &results);
+	hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2);
+	hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results);
 	hsw_write_wm_values(dev_priv, &results, HSW_DATA_BUF_PART_1_2);
 }
 
-- 
1.8.1.2




More information about the Intel-gfx mailing list