[Intel-gfx] [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround

Thu Nov 10 05:54:48 UTC 2016

Hi,


On Friday 04 November 2016 10:39 PM, Paulo Zanoni wrote:
> Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
>> This patch implemnets Workariunds related to display arbitrated
>> memory
>> bandwidth. These WA are applicabe for all gen-9 based platforms.
>>
>> Changes since v1:
>>   - Rebase on top of Paulo's patch series
>> Changes since v2:
>>   - Rebase/rework after addressing Paulo's comments in previous patch
> A lot of this code has changed since then, so this will need a
> significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
> and we're now applying the WA by default: we just won't apply the WA
> when we're pretty sure we don't need to. This helps avoiding underruns
> by default.
>
> See more below.
make sense, Will change default WA to WA_T_TILED
>> Signed-off-by: "Kumar, Mahesh" <mahesh1.kumar at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h  |   9 +++
>>   drivers/gpu/drm/i915/intel_drv.h |  11 +++
>>   drivers/gpu/drm/i915/intel_pm.c  | 146
>> +++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 166 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index adbd9aa..c169360 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
>>   	SBI_MPHY,
>>   };
>>   
>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
>> +enum watermark_memory_wa {
>> +	WATERMARK_WA_NONE,
>> +	WATERMARK_WA_X_TILED,
>> +	WATERMARK_WA_Y_TILED,
>> +};
>> +
>>   #define QUIRK_PIPEA_FORCE (1<<0)
>>   #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>>   #define QUIRK_INVERT_BRIGHTNESS (1<<2)
>> @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
>>   
>>   struct skl_wm_values {
>>   	unsigned dirty_pipes;
>> +	/* any WaterMark memory workaround Required */
> We can remove this comment since it doesn't say anything the variable
> name doesn't.
>
>> +	enum watermark_memory_wa mem_wa;
> Now that we have a proper variable in the state struct, it probably
> makes sense to just kill skl_needs_memory_bw_wa() and read this
> variable when we need to.
>
>
>>   	struct skl_ddb_allocation ddb;
>>   	uint32_t wm_linetime[I915_MAX_PIPES];
>>   	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h
>> b/drivers/gpu/drm/i915/intel_drv.h
>> index f48e79a..2c1897b 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
>> drm_atomic_state *state,
>>   	return to_intel_crtc_state(crtc_state);
>>   }
>>   
>> +static inline struct intel_crtc_state *
>> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
>> +				      struct intel_crtc *crtc)
>> +{
>> +	struct drm_crtc_state *crtc_state;
>> +
>> +	crtc_state = drm_atomic_get_existing_crtc_state(state,
>> &crtc->base);
>> +
>> +	return to_intel_crtc_state(crtc_state);
> I really don't like the idea of calling to_intel_crtc_state() on a
> potentially NULL pointer so the caller of this function will also check
> for NULL. Even though it works today, I still think it's unsafe
> practice. Please check crtc_state for NULL directly and then return
> NULL.
>
> Also, I think this function should be extracted to its own commit, and
> we'd probably be able to find some callers in the existing i915 code.
Will extract this function & include NULL check for crtc_state itself
>
>> +}
>> +
>>   static inline struct intel_plane_state *
>>   intel_atomic_get_existing_plane_state(struct drm_atomic_state
>> *state,
>>   				      struct intel_plane *plane)
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>> b/drivers/gpu/drm/i915/intel_pm.c
>> index 84ec6b1..5b8f715 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   {
>>   	struct drm_plane_state *pstate = &intel_pstate->base;
>>   	struct drm_framebuffer *fb = pstate->fb;
>> +	struct intel_atomic_state *intel_state =
>> +			to_intel_atomic_state(cstate->base.state);
>>   	uint32_t latency = dev_priv->wm.skl_latency[level];
>>   	uint32_t method1, method2;
>>   	uint32_t plane_bytes_per_line, plane_blocks_per_line;
>> @@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   	uint32_t width = 0, height = 0;
>>   	uint32_t plane_pixel_rate;
>>   	uint32_t y_tile_minimum, y_min_scanlines;
>> +	enum watermark_memory_wa mem_wa;
>>   
>>   	if (latency == 0 || !cstate->base.active || !intel_pstate-
>>> base.visible)
>>   		return 0;
>>   
>> +	mem_wa = intel_state ? intel_state->wm_results.mem_wa :
>> WATERMARK_WA_NONE;
>> +	if (mem_wa != WATERMARK_WA_NONE) {
>> +		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
>> +			latency += 15;
>> +	}
>> +
>>   	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>>   	height = drm_rect_height(&intel_pstate->base.src) >> 16;
>>   
>> @@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   		y_min_scanlines = 4;
>>   	}
>>   
>> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>> +		y_min_scanlines *= 2;
> The changes to this function will need to be rebased. But it's
> interesting that my interpretation regarding this *= 2 was different.
> After an email to the spec authors it seems your interpretation is the
> right one...
>
>
>> +
>>   	plane_bytes_per_line = width * cpp;
>>   	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>>   	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
>> @@ -4075,6 +4087,15 @@ skl_include_affected_pipes(struct
>> drm_atomic_state *state)
>>   		intel_state->wm_results.dirty_pipes = ~0;
>>   	}
>>   
>> +	/*
>> +	 * If Watermark workaround is changed we need to recalculate
>> +	 * watermark values for all active pipes
>> +	 */
>> +	if (intel_state->wm_results.mem_wa != dev_priv-
>>> wm.skl_hw.mem_wa) {
>> +		realloc_pipes = ~0;
>> +		intel_state->wm_results.dirty_pipes = ~0;
>> +	}
>> +
> Things have changed since this patch was written. I'd recommend moving
> this to skl_set_memory_bandwidth_wa().
Working on re-basing the patch.
>
>>   	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
>>   		struct intel_crtc_state *cstate;
>>   
>> @@ -4087,6 +4108,129 @@ skl_include_affected_pipes(struct
>> drm_atomic_state *state)
>>   }
>>   
>>   static void
>> +skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
> We're not really setting anything here: we're computing. Rename to
> skl_compute_wm_memory_bw_wa?
>
>
>> +{
>> +	struct drm_device *dev = state->dev;
>> +	struct drm_i915_private *dev_priv = to_i915(dev);
>> +	struct intel_crtc *intel_crtc;
>> +	struct intel_plane_state *intel_pstate;
>> +	struct intel_atomic_state *intel_state =
>> to_intel_atomic_state(state);
>> +	struct memdev_info *memdev_info = &dev_priv->memdev_info;
>> +	int num_active_plane, num_active_pipe;
> I like to use plural in the number-of variables, since we're counting
> the number of active planeS, not the number of the active plane.
>
>
>> +	uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
>> +	uint32_t total_pipe_bw;
>> +	uint32_t system_bw = 0;
>> +	uint32_t rank;
>> +	int x_tile_per;
>> +	int display_bw_per;
> I read this and kept thinking "x tiles per what?", took me a while to
> realize it's percentage. It's probably better to just use a long self-
> documenting name here than to use an ambiguous abbreviation.
>
> Also, I'd just go with uint32_t in the percentages too to avoid any
> possible problems with the uin32_t calculations.
>
> And perhaps some declarations could be moved to smaller inner scopes
> below.
>
>
>> +	bool y_tile_enabled = false;
>> +
> if (!platforms_that_require_the_wa) {
> 	wa = WATERMARK_WA_NONE;
> 	return;
> }
this function is not called by any GEN other than GEN9, will it be ok to 
add "if (!GEN9)" check?
>> +	if (!memdev_info->valid)
>> +		goto exit;
> Our default behavior should be to apply the WA then in doubt, not
> to avoid it, so the return value here and in the other error cases
> should be WATERAMARK_WA_Y_TILED.
>
> Also, you can avoid the gotos by just setting mem_wa at the beginning
> of the function, then you can just "return" later instead of goto.
>
>
>> +
>> +	system_bw = memdev_info->mem_speed_khz * memdev_info-
>>> num_channels *
>> +				memdev_info->channel_width_bytes;
>> +
>> +	if (!system_bw)
>> +		goto exit;
> This shouldn't be possible. Otherwise, the previous patch needs to be
> fixed in a way to not allow system_bw to end up as zero. Please either
> remove the check or change to "if (WARN_ON(!system_bw))".
yes, ideally this should not be possible, but what if because of any 
reason BIOS is not writing the register OR we don't have BIOS altogether?

If we add WARN_ON here, it'll flood the logs, we can add WARN_ON_ONCE, 
but we are anyway printing the warning in previous patch, if frequency 
or any other variable calculated is ZERO, IMO will add more prints in 
previous patch instead of adding here.
>
>> +
>> +	max_pipe_bw = 0;
>> +	for_each_intel_crtc(dev, intel_crtc) {
>> +		struct intel_crtc_state *cstate;
>> +		struct intel_plane *plane;
>> +
>> +		/*
>> +		 * If CRTC is part of current atomic commit, get
>> crtc state from
>> +		 * existing CRTC state. else take the cached CRTC
>> state
>> +		 */
>> +		cstate = NULL;
>> +		if (state)
> If state is NULL we'll segfault way before this point, so there's no
> need for this check.
>
>
>> +			cstate =
>> intel_atomic_get_existing_crtc_state(state,
>> +					intel_crtc);
>> +		if (!cstate)
>> +			cstate = to_intel_crtc_state(intel_crtc-
>>> base.state);
>> +
>> +		if (!cstate->base.active)
>> +			continue;
>> +
>> +		num_active_plane = 0;
>> +		max_plane_bw = 0;
>> +		for_each_intel_plane_mask(dev, plane, cstate-
>>> base.plane_mask) {
>> +			struct drm_framebuffer *fb = NULL;
>> +
>> +			intel_pstate = NULL;
>> +			if (state)
> Same here: no need to check for state.
>
>> +				intel_pstate =
>> +				intel_atomic_get_existing_plane_stat
>> e(state,
>> +									
>> plane);
>> +			if (!intel_pstate)
>> +				intel_pstate =
>> +					to_intel_plane_state(plane-
>>> base.state);
>> +
>> +			WARN_ON(!intel_pstate->base.fb);
> if (WARN_ON(!intel_pstate->base.fb))
> 	return;
>
> Then we can just forget about checking for fb again below.
>
>> +
>> +			if (!intel_pstate->base.visible)
>> +				continue;
> Don't we also need to exclude the cursor here?
yes, cursor should be excluded, will add that. thanks,
>
>
>> +
>> +			fb = intel_pstate->base.fb;
>> +			if (fb && (fb->modifier[0] ==
>> I915_FORMAT_MOD_Y_TILED ||
>> +				fb->modifier[0] ==
>> I915_FORMAT_MOD_Yf_TILED))
>> +				y_tile_enabled = true;
>> +
>> +			plane_bw =
>> skl_adjusted_plane_pixel_rate(cstate,
>> +								inte
>> l_pstate);
>> +			max_plane_bw = max(plane_bw, max_plane_bw);
>> +			num_active_plane++;
>> +		}
>> +		pipe_bw = max_plane_bw * num_active_plane;
>> +		max_pipe_bw = max(pipe_bw, max_pipe_bw);
>> +	}
>> +
>> +	if (intel_state->active_pipe_changes)
>> +		num_active_pipe = hweight32(intel_state-
>>> active_crtcs);
>> +	else
>> +		num_active_pipe = hweight32(dev_priv->active_crtcs);
> Why don't we just trust intel_state->active_crtcs even when
> active_pipe_changes is false?
I really didn't dig into that code & reused the code from different 
function. will check into this
>
>
>> +
>> +	total_pipe_bw = max_pipe_bw * num_active_pipe;
>> +
>> +	display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100,
>> system_bw * 1000);
> Why ULL?
>
> Also, if everything is KHz as it's supposed to be, the *100 and *1000
> don't make sense.
In previous patch I converted the frequency to MHz... need to fix that, 
then *1000 not be required,
*100 is still required to get the percentage.
>
>
>> +
>> +	/*
>> +	 * If there is any Ytile plane enabled and arbitrated
>> display
>> +	 * bandwidth > 20% of raw system memory bandwidth
>> +	 * Enale Y-tile related WA
>> +	 *
>> +	 * If memory is dual channel single rank, Xtile limit = 35%,
>> else Xtile
>> +	 * limit = 60%
>> +	 * If there is no Ytile plane enabled and
>> +	 * arbitrated display bandwidth > Xtile limit
>> +	 * Enable X-tile realated WA
>> +	 */
>> +	if (y_tile_enabled && (display_bw_per > 20))
>> +		intel_state->wm_results.mem_wa =
>> WATERMARK_WA_Y_TILED;
>> +	else {
>> +
>> +		if (memdev_info->rank_valid)
>> +			rank = memdev_info->rank;
>> +		else
>> +			rank = DRAM_RANK_DUAL; /* Assume we are dual
>> rank */
> When in doubt, apply the most restrictive workaround to avoid the
> possibility of underruns. So here it's safer to assume
> DRAM_RANK_SINGLE.
OK...

thanks,

Regards,
-Mahesh
>
>> +
>> +		if ((rank == DRAM_RANK_SINGLE) &&
>> +					(memdev_info->num_channels
>> == 2))
>> +			x_tile_per = 35;
>> +		else
>> +			x_tile_per = 60;
>> +
>> +		if (display_bw_per > x_tile_per)
>> +			intel_state->wm_results.mem_wa =
>> WATERMARK_WA_X_TILED;
>> +	}
>> +	return;
>> +
>> +exit:
>> +	intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
>> +}
>> +
>> +static void
>>   skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>>   		     struct skl_wm_values *src,
>>   		     enum pipe pipe)
>> @@ -4131,6 +4275,8 @@ skl_compute_wm(struct drm_atomic_state *state)
>>   	/* Clear all dirty flags */
>>   	results->dirty_pipes = 0;
>>   
>> +	skl_set_memory_bandwidth_wm_wa(state);
>> +
>>   	ret = skl_include_affected_pipes(state);
>>   	if (ret)
>>   		return ret;