[Intel-gfx] [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround
Paulo Zanoni
paulo.r.zanoni at intel.com
Fri Nov 4 17:09:04 UTC 2016
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> This patch implemnets Workariunds related to display arbitrated
> memory
> bandwidth. These WA are applicabe for all gen-9 based platforms.
>
> Changes since v1:
> - Rebase on top of Paulo's patch series
> Changes since v2:
> - Rebase/rework after addressing Paulo's comments in previous patch
A lot of this code has changed since then, so this will need a
significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
and we're now applying the WA by default: we just won't apply the WA
when we're pretty sure we don't need to. This helps avoiding underruns
by default.
See more below.
>
> Signed-off-by: "Kumar, Mahesh" <mahesh1.kumar at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 9 +++
> drivers/gpu/drm/i915/intel_drv.h | 11 +++
> drivers/gpu/drm/i915/intel_pm.c | 146
> +++++++++++++++++++++++++++++++++++++++
> 3 files changed, 166 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index adbd9aa..c169360 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
> SBI_MPHY,
> };
>
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> + WATERMARK_WA_NONE,
> + WATERMARK_WA_X_TILED,
> + WATERMARK_WA_Y_TILED,
> +};
> +
> #define QUIRK_PIPEA_FORCE (1<<0)
> #define QUIRK_LVDS_SSC_DISABLE (1<<1)
> #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
>
> struct skl_wm_values {
> unsigned dirty_pipes;
> + /* any WaterMark memory workaround Required */
We can remove this comment since it doesn't say anything the variable
name doesn't.
> + enum watermark_memory_wa mem_wa;
Now that we have a proper variable in the state struct, it probably
makes sense to just kill skl_needs_memory_bw_wa() and read this
variable when we need to.
> struct skl_ddb_allocation ddb;
> uint32_t wm_linetime[I915_MAX_PIPES];
> uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index f48e79a..2c1897b 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
> drm_atomic_state *state,
> return to_intel_crtc_state(crtc_state);
> }
>
> +static inline struct intel_crtc_state *
> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
> + struct intel_crtc *crtc)
> +{
> + struct drm_crtc_state *crtc_state;
> +
> + crtc_state = drm_atomic_get_existing_crtc_state(state,
> &crtc->base);
> +
> + return to_intel_crtc_state(crtc_state);
I really don't like the idea of calling to_intel_crtc_state() on a
potentially NULL pointer so the caller of this function will also check
for NULL. Even though it works today, I still think it's unsafe
practice. Please check crtc_state for NULL directly and then return
NULL.
Also, I think this function should be extracted to its own commit, and
we'd probably be able to find some callers in the existing i915 code.
> +}
> +
> static inline struct intel_plane_state *
> intel_atomic_get_existing_plane_state(struct drm_atomic_state
> *state,
> struct intel_plane *plane)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 84ec6b1..5b8f715 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
> {
> struct drm_plane_state *pstate = &intel_pstate->base;
> struct drm_framebuffer *fb = pstate->fb;
> + struct intel_atomic_state *intel_state =
> + to_intel_atomic_state(cstate->base.state);
> uint32_t latency = dev_priv->wm.skl_latency[level];
> uint32_t method1, method2;
> uint32_t plane_bytes_per_line, plane_blocks_per_line;
> @@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
> uint32_t width = 0, height = 0;
> uint32_t plane_pixel_rate;
> uint32_t y_tile_minimum, y_min_scanlines;
> + enum watermark_memory_wa mem_wa;
>
> if (latency == 0 || !cstate->base.active || !intel_pstate-
> >base.visible)
> return 0;
>
> + mem_wa = intel_state ? intel_state->wm_results.mem_wa :
> WATERMARK_WA_NONE;
> + if (mem_wa != WATERMARK_WA_NONE) {
> + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
> + latency += 15;
> + }
> +
> width = drm_rect_width(&intel_pstate->base.src) >> 16;
> height = drm_rect_height(&intel_pstate->base.src) >> 16;
>
> @@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
> y_min_scanlines = 4;
> }
>
> + if (mem_wa == WATERMARK_WA_Y_TILED)
> + y_min_scanlines *= 2;
The changes to this function will need to be rebased. But it's
interesting that my interpretation regarding this *= 2 was different.
After an email to the spec authors it seems your interpretation is the
right one...
> +
> plane_bytes_per_line = width * cpp;
> if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
> fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> @@ -4075,6 +4087,15 @@ skl_include_affected_pipes(struct
> drm_atomic_state *state)
> intel_state->wm_results.dirty_pipes = ~0;
> }
>
> + /*
> + * If Watermark workaround is changed we need to recalculate
> + * watermark values for all active pipes
> + */
> + if (intel_state->wm_results.mem_wa != dev_priv-
> >wm.skl_hw.mem_wa) {
> + realloc_pipes = ~0;
> + intel_state->wm_results.dirty_pipes = ~0;
> + }
> +
Things have changed since this patch was written. I'd recommend moving
this to skl_set_memory_bandwidth_wa().
> for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
> struct intel_crtc_state *cstate;
>
> @@ -4087,6 +4108,129 @@ skl_include_affected_pipes(struct
> drm_atomic_state *state)
> }
>
> static void
> +skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
We're not really setting anything here: we're computing. Rename to
skl_compute_wm_memory_bw_wa?
> +{
> + struct drm_device *dev = state->dev;
> + struct drm_i915_private *dev_priv = to_i915(dev);
> + struct intel_crtc *intel_crtc;
> + struct intel_plane_state *intel_pstate;
> + struct intel_atomic_state *intel_state =
> to_intel_atomic_state(state);
> + struct memdev_info *memdev_info = &dev_priv->memdev_info;
> + int num_active_plane, num_active_pipe;
I like to use plural in the number-of variables, since we're counting
the number of active planeS, not the number of the active plane.
> + uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
> + uint32_t total_pipe_bw;
> + uint32_t system_bw = 0;
> + uint32_t rank;
> + int x_tile_per;
> + int display_bw_per;
I read this and kept thinking "x tiles per what?", took me a while to
realize it's percentage. It's probably better to just use a long self-
documenting name here than to use an ambiguous abbreviation.
Also, I'd just go with uint32_t in the percentages too to avoid any
possible problems with the uin32_t calculations.
And perhaps some declarations could be moved to smaller inner scopes
below.
> + bool y_tile_enabled = false;
> +
if (!platforms_that_require_the_wa) {
wa = WATERMARK_WA_NONE;
return;
}
> + if (!memdev_info->valid)
> + goto exit;
Our default behavior should be to apply the WA then in doubt, not
to avoid it, so the return value here and in the other error cases
should be WATERAMARK_WA_Y_TILED.
Also, you can avoid the gotos by just setting mem_wa at the beginning
of the function, then you can just "return" later instead of goto.
> +
> + system_bw = memdev_info->mem_speed_khz * memdev_info-
> >num_channels *
> + memdev_info->channel_width_bytes;
> +
> + if (!system_bw)
> + goto exit;
This shouldn't be possible. Otherwise, the previous patch needs to be
fixed in a way to not allow system_bw to end up as zero. Please either
remove the check or change to "if (WARN_ON(!system_bw))".
> +
> + max_pipe_bw = 0;
> + for_each_intel_crtc(dev, intel_crtc) {
> + struct intel_crtc_state *cstate;
> + struct intel_plane *plane;
> +
> + /*
> + * If CRTC is part of current atomic commit, get
> crtc state from
> + * existing CRTC state. else take the cached CRTC
> state
> + */
> + cstate = NULL;
> + if (state)
If state is NULL we'll segfault way before this point, so there's no
need for this check.
> + cstate =
> intel_atomic_get_existing_crtc_state(state,
> + intel_crtc);
> + if (!cstate)
> + cstate = to_intel_crtc_state(intel_crtc-
> >base.state);
> +
> + if (!cstate->base.active)
> + continue;
> +
> + num_active_plane = 0;
> + max_plane_bw = 0;
> + for_each_intel_plane_mask(dev, plane, cstate-
> >base.plane_mask) {
> + struct drm_framebuffer *fb = NULL;
> +
> + intel_pstate = NULL;
> + if (state)
Same here: no need to check for state.
> + intel_pstate =
> + intel_atomic_get_existing_plane_stat
> e(state,
> +
> plane);
> + if (!intel_pstate)
> + intel_pstate =
> + to_intel_plane_state(plane-
> >base.state);
> +
> + WARN_ON(!intel_pstate->base.fb);
if (WARN_ON(!intel_pstate->base.fb))
return;
Then we can just forget about checking for fb again below.
> +
> + if (!intel_pstate->base.visible)
> + continue;
Don't we also need to exclude the cursor here?
> +
> + fb = intel_pstate->base.fb;
> + if (fb && (fb->modifier[0] ==
> I915_FORMAT_MOD_Y_TILED ||
> + fb->modifier[0] ==
> I915_FORMAT_MOD_Yf_TILED))
> + y_tile_enabled = true;
> +
> + plane_bw =
> skl_adjusted_plane_pixel_rate(cstate,
> + inte
> l_pstate);
> + max_plane_bw = max(plane_bw, max_plane_bw);
> + num_active_plane++;
> + }
> + pipe_bw = max_plane_bw * num_active_plane;
> + max_pipe_bw = max(pipe_bw, max_pipe_bw);
> + }
> +
> + if (intel_state->active_pipe_changes)
> + num_active_pipe = hweight32(intel_state-
> >active_crtcs);
> + else
> + num_active_pipe = hweight32(dev_priv->active_crtcs);
Why don't we just trust intel_state->active_crtcs even when
active_pipe_changes is false?
> +
> + total_pipe_bw = max_pipe_bw * num_active_pipe;
> +
> + display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100,
> system_bw * 1000);
Why ULL?
Also, if everything is KHz as it's supposed to be, the *100 and *1000
don't make sense.
> +
> + /*
> + * If there is any Ytile plane enabled and arbitrated
> display
> + * bandwidth > 20% of raw system memory bandwidth
> + * Enale Y-tile related WA
> + *
> + * If memory is dual channel single rank, Xtile limit = 35%,
> else Xtile
> + * limit = 60%
> + * If there is no Ytile plane enabled and
> + * arbitrated display bandwidth > Xtile limit
> + * Enable X-tile realated WA
> + */
> + if (y_tile_enabled && (display_bw_per > 20))
> + intel_state->wm_results.mem_wa =
> WATERMARK_WA_Y_TILED;
> + else {
> +
> + if (memdev_info->rank_valid)
> + rank = memdev_info->rank;
> + else
> + rank = DRAM_RANK_DUAL; /* Assume we are dual
> rank */
When in doubt, apply the most restrictive workaround to avoid the
possibility of underruns. So here it's safer to assume
DRAM_RANK_SINGLE.
> +
> + if ((rank == DRAM_RANK_SINGLE) &&
> + (memdev_info->num_channels
> == 2))
> + x_tile_per = 35;
> + else
> + x_tile_per = 60;
> +
> + if (display_bw_per > x_tile_per)
> + intel_state->wm_results.mem_wa =
> WATERMARK_WA_X_TILED;
> + }
> + return;
> +
> +exit:
> + intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
> +}
> +
> +static void
> skl_copy_wm_for_pipe(struct skl_wm_values *dst,
> struct skl_wm_values *src,
> enum pipe pipe)
> @@ -4131,6 +4275,8 @@ skl_compute_wm(struct drm_atomic_state *state)
> /* Clear all dirty flags */
> results->dirty_pipes = 0;
>
> + skl_set_memory_bandwidth_wm_wa(state);
> +
> ret = skl_include_affected_pipes(state);
> if (ret)
> return ret;
More information about the Intel-gfx
mailing list