[Intel-gfx] [PATCH 2/8] drm/i915/skl+: Optimize WM calculation
Mahesh Kumar
mahesh1.kumar at intel.com
Tue Jul 18 12:49:44 UTC 2017
From: "Kumar, Mahesh" <mahesh1.kumar at intel.com>
Plane configuration parameters doesn't change for each WM-level
calculation. Currently we compute same parameters 8 times for each
wm-level.
This patch optimizes it by calculating these parameters in beginning
& reuse during each level-wm calculation.
Signed-off-by: Mahesh Kumar <mahesh1.kumar at intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 13 +++
drivers/gpu/drm/i915/intel_pm.c | 179 ++++++++++++++++++++++------------------
2 files changed, 111 insertions(+), 81 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 96edced67e10..62ba18d6717f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1790,6 +1790,19 @@ struct skl_wm_level {
uint8_t plane_res_l;
};
+/* Stores plane specific WM parameters */
+struct skl_wm_params{
+ bool x_tiled, y_tiled;
+ uint32_t width;
+ uint8_t cpp;
+ uint32_t plane_pixel_rate;
+ uint32_t y_min_scanlines;
+ uint32_t plane_bytes_per_line;
+ uint_fixed_16_16_t plane_blocks_per_line;
+ uint_fixed_16_16_t y_tile_minimum;
+ uint32_t linetime_us;
+};
+
/*
* This struct helps tracking the state needed for runtime PM, which puts the
* device in PCI D3 state. Notice that when this happens, nothing on the
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ee2a349cfe68..b2bd65847d9b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4341,128 +4341,135 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
downscale_amount);
}
-static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
- struct intel_crtc_state *cstate,
- const struct intel_plane_state *intel_pstate,
- uint16_t ddb_allocation,
- int level,
- uint16_t *out_blocks, /* out */
- uint8_t *out_lines, /* out */
- bool *enabled /* out */)
+static int
+skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
+ struct intel_crtc_state *cstate,
+ const struct intel_plane_state *intel_pstate,
+ struct skl_wm_params *wp)
{
struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
const struct drm_plane_state *pstate = &intel_pstate->base;
const struct drm_framebuffer *fb = pstate->fb;
- uint32_t latency = dev_priv->wm.skl_latency[level];
- uint_fixed_16_16_t method1, method2;
- uint_fixed_16_16_t plane_blocks_per_line;
- uint_fixed_16_16_t selected_result;
uint32_t interm_pbpl;
- uint32_t plane_bytes_per_line;
- uint32_t res_blocks, res_lines;
- uint8_t cpp;
- uint32_t width = 0;
- uint32_t plane_pixel_rate;
- uint_fixed_16_16_t y_tile_minimum;
- uint32_t y_min_scanlines;
struct intel_atomic_state *state =
to_intel_atomic_state(cstate->base.state);
bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
- bool y_tiled, x_tiled;
- if (latency == 0 ||
- !intel_wm_plane_visible(cstate, intel_pstate)) {
- *enabled = false;
+ if (!intel_wm_plane_visible(cstate, intel_pstate))
return 0;
- }
- y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
- fb->modifier == I915_FORMAT_MOD_Yf_TILED;
- x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
-
- /* Display WA #1141: kbl,cfl */
- if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
- dev_priv->ipc_enabled)
- latency += 4;
-
- if (apply_memory_bw_wa && x_tiled)
- latency += 15;
+ wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
+ fb->modifier == I915_FORMAT_MOD_Yf_TILED;
+ wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
if (plane->id == PLANE_CURSOR) {
- width = intel_pstate->base.crtc_w;
+ wp->width = intel_pstate->base.crtc_w;
} else {
/*
* Src coordinates are already rotated by 270 degrees for
* the 90/270 degree plane rotation cases (to match the
* GTT mapping), hence no need to account for rotation here.
*/
- width = drm_rect_width(&intel_pstate->base.src) >> 16;
+ wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
}
- cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
- fb->format->cpp[0];
- plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
+ wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
+ fb->format->cpp[0];
+ wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
+ intel_pstate);
if (drm_rotation_90_or_270(pstate->rotation)) {
-
- switch (cpp) {
+ switch (wp->cpp) {
case 1:
- y_min_scanlines = 16;
+ wp->y_min_scanlines = 16;
break;
case 2:
- y_min_scanlines = 8;
+ wp->y_min_scanlines = 8;
break;
case 4:
- y_min_scanlines = 4;
+ wp->y_min_scanlines = 4;
break;
default:
- MISSING_CASE(cpp);
+ MISSING_CASE(wp->cpp);
return -EINVAL;
}
} else {
- y_min_scanlines = 4;
+ wp->y_min_scanlines = 4;
}
if (apply_memory_bw_wa)
- y_min_scanlines *= 2;
-
- plane_bytes_per_line = width * cpp;
- if (y_tiled) {
- interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
- y_min_scanlines, 512);
- plane_blocks_per_line = div_fixed16(interm_pbpl,
- y_min_scanlines);
- } else if (x_tiled) {
- interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
- plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+ wp->y_min_scanlines *= 2;
+
+ wp->plane_bytes_per_line = wp->width * wp->cpp;
+ if (wp->y_tiled) {
+ interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
+ wp->y_min_scanlines, 512);
+ wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
+ wp->y_min_scanlines);
+ } else if (wp->x_tiled) {
+ interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
+ wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
} else {
- interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
- plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+ interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
+ wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
}
- method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
- method2 = skl_wm_method2(plane_pixel_rate,
+ wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
+ wp->plane_blocks_per_line);
+ wp->linetime_us = fixed16_to_u32_round_up(
+ intel_get_linetime_us(cstate));
+ return 0;
+}
+
+static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
+ struct intel_crtc_state *cstate,
+ const struct intel_plane_state *intel_pstate,
+ uint16_t ddb_allocation,
+ int level,
+ const struct skl_wm_params *wp,
+ uint16_t *out_blocks, /* out */
+ uint8_t *out_lines, /* out */
+ bool *enabled /* out */)
+{
+ const struct drm_plane_state *pstate = &intel_pstate->base;
+ uint32_t latency = dev_priv->wm.skl_latency[level];
+ uint_fixed_16_16_t method1, method2;
+ uint_fixed_16_16_t selected_result;
+ uint32_t res_blocks, res_lines;
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(cstate->base.state);
+ bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+
+ if (latency == 0 ||
+ !intel_wm_plane_visible(cstate, intel_pstate)) {
+ *enabled = false;
+ return 0;
+ }
+
+ /* Display WA #1141: kbl,cfl */
+ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
+ dev_priv->ipc_enabled)
+ latency += 4;
+
+ if (apply_memory_bw_wa && wp->x_tiled)
+ latency += 15;
+
+ method1 = skl_wm_method1(wp->plane_pixel_rate, wp->cpp, latency);
+ method2 = skl_wm_method2(wp->plane_pixel_rate,
cstate->base.adjusted_mode.crtc_htotal,
latency,
- plane_blocks_per_line);
-
- y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
- plane_blocks_per_line);
+ wp->plane_blocks_per_line);
- if (y_tiled) {
- selected_result = max_fixed16(method2, y_tile_minimum);
+ if (wp->y_tiled) {
+ selected_result = max_fixed16(method2, wp->y_tile_minimum);
} else {
- uint32_t linetime_us;
-
- linetime_us = fixed16_to_u32_round_up(
- intel_get_linetime_us(cstate));
- if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
- (plane_bytes_per_line / 512 < 1))
+ if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1)
+ && (wp->plane_bytes_per_line / 512 < 1))
selected_result = method2;
else if ((ddb_allocation && ddb_allocation /
- fixed16_to_u32_round_up(plane_blocks_per_line)) >= 1)
+ fixed16_to_u32_round_up(wp->plane_blocks_per_line)) >= 1)
selected_result = min_fixed16(method1, method2);
- else if (latency >= linetime_us)
+ else if (latency >= wp->linetime_us)
selected_result = min_fixed16(method1, method2);
else
selected_result = method1;
@@ -4470,12 +4477,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
res_lines = div_round_up_fixed16(selected_result,
- plane_blocks_per_line);
+ wp->plane_blocks_per_line);
if (level >= 1 && level <= 7) {
- if (y_tiled) {
- res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
- res_lines += y_min_scanlines;
+ if (wp->y_tiled) {
+ res_blocks += fixed16_to_u32_round_up(
+ wp->y_tile_minimum);
+ res_lines += wp->y_min_scanlines;
} else {
res_blocks++;
}
@@ -4513,6 +4521,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
struct skl_ddb_allocation *ddb,
struct intel_crtc_state *cstate,
const struct intel_plane_state *intel_pstate,
+ const struct skl_wm_params *wm_params,
struct skl_plane_wm *wm)
{
struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
@@ -4536,6 +4545,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
intel_pstate,
ddb_blocks,
level,
+ wm_params,
&result->plane_res_b,
&result->plane_res_l,
&result->plane_en);
@@ -4600,11 +4610,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
const struct intel_plane_state *intel_pstate =
to_intel_plane_state(pstate);
enum plane_id plane_id = to_intel_plane(plane)->id;
+ struct skl_wm_params wm_params;
wm = &pipe_wm->planes[plane_id];
+ memset(&wm_params, 0, sizeof(struct skl_wm_params));
+
+ ret = skl_compute_plane_wm_params(dev_priv, cstate,
+ intel_pstate, &wm_params);
+ if (ret)
+ return ret;
ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
- intel_pstate, wm);
+ intel_pstate, &wm_params, wm);
if (ret)
return ret;
skl_compute_transition_wm(cstate, &wm->trans_wm);
--
2.13.0
More information about the Intel-gfx
mailing list