[Intel-gfx] [PATCH v7 8/8] drm/i915/gen9: WM memory bandwidth related workaround
Mahesh Kumar
mahesh1.kumar at intel.com
Wed Feb 15 15:00:35 UTC 2017
Hi,
On Thursday 15 December 2016 10:37 PM, Paulo Zanoni wrote:
> Em Qui, 2016-12-01 às 21:19 +0530, Mahesh Kumar escreveu:
>> This patch implemnets Workariunds related to display arbitrated
>> memory
>> bandwidth. These WA are applicabe for all gen-9 based platforms.
> 3 typos above.
>
> The WA is already implemented. What the patch does is that it opts-out
> of the WA in case we conclude it's safe. Please say this in the commit
> message.
updated the commit msg
>> Changes since v1:
>> - Rebase on top of Paulo's patch series
>> Changes since v2:
>> - Address review comments
>> - Rebase/rework as per other patch changes in series
>> Changes since v3:
>> - Rework based on Maarten's comments
>>
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 11 +++
>> drivers/gpu/drm/i915/intel_display.c | 24 ++++++
>> drivers/gpu/drm/i915/intel_pm.c | 155
>> +++++++++++++++++++++++++++++++++--
>> 3 files changed, 181 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 69213a4..3126259 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1216,6 +1216,13 @@ enum intel_sbi_destination {
>> SBI_MPHY,
>> };
>>
>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
>> +enum watermark_memory_wa {
>> + WATERMARK_WA_NONE,
>> + WATERMARK_WA_X_TILED,
>> + WATERMARK_WA_Y_TILED,
>> +};
>> +
>> #define QUIRK_PIPEA_FORCE (1<<0)
>> #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>> #define QUIRK_INVERT_BRIGHTNESS (1<<2)
>> @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation {
>>
>> struct skl_wm_values {
>> unsigned dirty_pipes;
>> + /* any WaterMark memory workaround Required */
> CapitaliZation is weird Here. Besides, no need for the comment.
>
>
>> + enum watermark_memory_wa mem_wa;
>> + uint32_t pipe_bw_kbps[I915_MAX_PIPES];
>> + bool pipe_ytiled[I915_MAX_PIPES];
>> struct skl_ddb_allocation ddb;
>> };
>>
>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>> b/drivers/gpu/drm/i915/intel_display.c
>> index 5d11002..f8da488 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct
>> drm_atomic_state *state)
>> to_intel_plane(plane)-
>>> frontbuffer_bit);
>> }
>>
>> +static void
>> +intel_update_wm_bw_wa(struct drm_atomic_state *state)
>> +{
>> + struct intel_atomic_state *intel_state =
>> to_intel_atomic_state(state);
>> + struct drm_i915_private *dev_priv = to_i915(state->dev);
>> + const struct drm_crtc *crtc;
>> + const struct drm_crtc_state *cstate;
>> + struct skl_wm_values *results = &intel_state->wm_results;
>> + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
>> + int i;
>> +
>> + if (!IS_GEN9(dev_priv))
>> + return;
>> +
>> + for_each_crtc_in_state(state, crtc, cstate, i) {
>> + hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
>> + hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i];
>> + }
>> +
>> + hw_vals->mem_wa = results->mem_wa;
>> +}
> I think we can just patch skl_copy_wm_for_pipe() to also copy the
> fields we need instead of adding this function. Whouldn't that be much
> simpler? Anyway, this one looks correct, so no need to change if you
> don't want.
>
>
>> +
>> /**
>> * intel_atomic_commit - commit validated state object
>> * @dev: DRM device
>> @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct
>> drm_device *dev,
>> intel_shared_dpll_commit(state);
>> intel_atomic_track_fbs(state);
>>
>> + intel_update_wm_bw_wa(state);
>> +
>> if (intel_state->modeset) {
>> memcpy(dev_priv->min_pixclk, intel_state-
>>> min_pixclk,
>> sizeof(intel_state->min_pixclk));
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>> b/drivers/gpu/drm/i915/intel_pm.c
>> index cc8fc84..fda6e1e 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>>
>> #define SKL_SAGV_BLOCK_TIME 30 /* µs */
>>
>> -/*
>> - * FIXME: We still don't have the proper code detect if we need to
>> apply the WA,
>> - * so assume we'll always need it in order to avoid underruns.
>> - */
>> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
>> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state
>> *state)
> Why are we renaming this function?
>
> Also, in my last review I suggested that we kill this function. I don't
> think it makes sense to have this function anymore.
>
> Besides, function intel_can_enable_sagv() needs to look at the value we
> assigned to enum watermark_memory_wa, not get a true/false based on
> platform version.
removed the function
>
>
>> {
>> struct drm_i915_private *dev_priv = to_i915(state-
>>> base.dev);
>>
>> @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct
>> drm_atomic_state *state)
>>
>> latency = dev_priv->wm.skl_latency[level];
>>
>> - if (skl_needs_memory_bw_wa(intel_state) &&
>> + if (intel_needs_memory_bw_wa(intel_state) &&
>> plane->base.state->fb->modifier ==
>> I915_FORMAT_MOD_X_TILED)
>> latency += 15;
>> @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>> uint32_t y_min_scanlines;
>> struct intel_atomic_state *state =
>> to_intel_atomic_state(cstate->base.state);
>> - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
>> + enum watermark_memory_wa mem_wa;
>> bool y_tiled, x_tiled;
>>
>> if (latency == 0 || !cstate->base.active || !intel_pstate-
>>> base.visible) {
>> @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>> if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>> latency += 4;
>>
>> - if (apply_memory_bw_wa && x_tiled)
>> + mem_wa = state->wm_results.mem_wa;
> This assignment could have been done during declaration.
done
>
>
>> + if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>> latency += 15;
>>
>> width = drm_rect_width(&intel_pstate->base.src) >> 16;
>> @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>> y_min_scanlines = 4;
>> }
>>
>> - if (apply_memory_bw_wa)
>> + if (mem_wa == WATERMARK_WA_Y_TILED)
>> y_min_scanlines *= 2;
>>
>> plane_bytes_per_line = width * cpp;
>> @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state
>> *state)
>> }
>>
>> /*
>> + * If Watermark workaround is changed we need to recalculate
>> + * watermark values for all active pipes
>> + */
>> + if (intel_state->wm_results.mem_wa != dev_priv-
>>> wm.skl_hw.mem_wa) {
>> + realloc_pipes = ~0;
>> + intel_state->wm_results.dirty_pipes = ~0;
>> + }
> But then skl_ddb_add_affected_planes() only actually adds to the commit
> the planes that have different DDB partitioning. It seems to me that it
> may be possible to have a case where the WA changes and the DDB stays
> the same, so we need to find a way to include every CRTC there. Maybe
> we could just go and call the function that adds every CRTC here.
modified this part of code to calculate WM for all pipes if WA is
changing & add pipe only if WM values are changing.
We are holding a ww_mutex, so this operation should be race condition free.
>
>> +
>> + /*
>> * We're not recomputing for the pipes not included in the
>> commit, so
>> * make sure we start with the current state.
>> */
>> @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state
>> *state)
>> return 0;
>> }
>>
>> +static int
>> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
>> +{
>> + struct drm_device *dev = state->dev;
>> + struct drm_i915_private *dev_priv = to_i915(dev);
>> + struct intel_atomic_state *intel_state =
>> to_intel_atomic_state(state);
>> + struct memdev_info *memdev_info = &dev_priv->memdev_info;
>> + struct skl_wm_values *results = &intel_state->wm_results;
>> + struct drm_crtc *crtc;
>> + struct drm_crtc_state *cstate;
>> + int active_pipes = 0;
>> + uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
>> + int display_bw_percentage;
>> + bool y_tile_enabled = false;
>> + int ret, i;
>> +
>> + if (!intel_needs_memory_bw_wa(intel_state)) {
>> + results->mem_wa = WATERMARK_WA_NONE;
>> + return 0;
>> + }
>> +
>> + if (!memdev_info->valid)
>> + goto exit;
> There's no reason to use a label if it's only called here. Just move
> all that code to the if statement.
done
>
>
>> +
>> + ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
>> + state->acquire_ctx);
>> + if (ret)
>> + return ret;
> Why are we getting this here? Why this lock specifically? What's it
> protecting?
>
> Doc says:
> - ww mutex protecting connector state and routing
> - protects connector->encoder and encoder->crtc links
>
> We're not touching any of that here.
Created a new ww_mutex lock to prevent wm related calculation, when it
may affect other CRTC's also.
using that now instead of connection_mutex.
>
>
>> +
>> + memcpy(results->pipe_bw_kbps, dev_priv-
>>> wm.skl_hw.pipe_bw_kbps,
>> + sizeof(results->pipe_bw_kbps));
>> + memcpy(results->pipe_ytiled, dev_priv-
>>> wm.skl_hw.pipe_ytiled,
>> + sizeof(results->pipe_ytiled));
>> +
>> + for_each_crtc_in_state(state, crtc, cstate, i) {
>> + struct drm_plane *plane;
>> + const struct drm_plane_state *pstate;
>> + int active_planes = 0;
>> + uint32_t max_plane_bw_kbps = 0;
>> +
>> + results->pipe_ytiled[i] = false;
>> +
>> + if (!cstate->active) {
>> + results->pipe_bw_kbps[i] = 0;
>> + continue;
>> + }
>> +
>> + drm_atomic_crtc_state_for_each_plane_state(plane,
>> pstate,
>> + csta
>> te) {
>> + struct drm_framebuffer *fb;
>> + uint32_t plane_bw_kbps;
>> + enum plane_id id = to_intel_plane(plane)-
>>> id;
>> +
>> + if (!pstate->visible)
>> + continue;
>> +
>> + if (WARN_ON(!pstate->fb))
>> + continue;
>> +
>> + if (id == PLANE_CURSOR)
>> + continue;
>> +
>> + fb = pstate->fb;
>> + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED
>> ||
>> + fb->modifier ==
>> I915_FORMAT_MOD_Yf_TILED))
>> + results->pipe_ytiled[i] = true;
>> +
>> + plane_bw_kbps =
>> skl_adjusted_plane_pixel_rate(
>> + to_intel_crtc_state(
>> cstate),
>> + to_intel_plane_state
>> (pstate));
>> + max_plane_bw_kbps = max(plane_bw_kbps,
>> + max_plane_bw
>> _kbps);
>> + active_planes++;
>> + }
>> + results->pipe_bw_kbps[i] = max_plane_bw_kbps *
>> active_planes;
>> + }
>> +
>> + for_each_pipe(dev_priv, i) {
>> + if (results->pipe_bw_kbps[i]) {
>> + max_pipe_bw_kbps = max(max_pipe_bw_kbps,
>> + results->pipe_bw_kbps[i]);
>> + active_pipes++;
>> + }
>> + if (results->pipe_ytiled[i])
>> + y_tile_enabled = true;
>> + }
>> +
>> + total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
>> + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps
>> * 100,
>> + memdev_info-
>>> bandwidth_kbps);
> Shouldn't this be just DIV_ROUND_UP()? I don't see 64 bit variables
> here, nor the possibility of overflows.
If we have 3 4K pannels & each have 3 planes enabled with scaling then
this value theoretically may go beyond 32bits,
type-casted the variable now to 64bit.
536MHz * 3 planes * 3 pipes * pipe_scaler_ratio * plane_scaler_ratio *
100 = 536000 * 3 * 3 * (1.9 *1.9 (hscale & vscale)) * (1.9 * 1.9 (pipe
hscale & vscale)) * 100 = 6286685040 = 0x1 76B7 3370
>
>
>> +
>> + /*
>> + * If there is any Ytile plane enabled and arbitrated
>> display
>> + * bandwidth > 20% of raw system memory bandwidth
>> + * Enale Y-tile related WA
>> + *
>> + * If memory is dual channel single rank, Xtile limit = 35%,
>> else Xtile
>> + * limit = 60%
>> + * If there is no Ytile plane enabled and
>> + * arbitrated display bandwidth > Xtile limit
>> + * Enable X-tile realated WA
>> + */
>> + if (y_tile_enabled && (display_bw_percentage > 20))
>> + results->mem_wa = WATERMARK_WA_Y_TILED;
>> + else {
>> + int x_tile_percentage = 60;
>> + enum rank rank = DRAM_RANK_SINGLE;
>> +
>> + if (memdev_info->rank != DRAM_RANK_INVALID)
>> + rank = memdev_info->rank;
> I really think that the previous patch should prevent this. If
> memdev_info->rank is invalid then memdev_info->valid should also be
> false and we'd have returned at the beginning of the function. With
> that, this part of the code could be simplified a little bit since then
> we'd have no need to choose a default rank.
>
done
Regards,
-Mahesh
>
>> +
>> + if ((rank == DRAM_RANK_SINGLE) &&
>> + (memdev_info->num_channels
>> == 2))
>> + x_tile_percentage = 35;
>> +
>> + if (display_bw_percentage > x_tile_percentage)
>> + results->mem_wa = WATERMARK_WA_X_TILED;
>> + }
>> +
>> + return 0;
>> +
>> +exit:
>> + results->mem_wa = WATERMARK_WA_Y_TILED;
>> + return 0;
>> +}
>> +
>> +
> Two blank lines here.
>
>
>> static void
>> skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>> struct skl_wm_values *src,
>> @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state)
>> /* Clear all dirty flags */
>> results->dirty_pipes = 0;
>>
>> + ret = skl_compute_memory_bandwidth_wm_wa(state);
>> + if (ret)
>> + return ret;
>> +
>> ret = skl_compute_ddb(state);
>> if (ret)
>> return ret;
More information about the Intel-gfx
mailing list