[Intel-gfx] [PATCH v6 8/8] drm/i915/gen9: WM memory bandwidth related workaround

Tue Nov 29 13:47:54 UTC 2016


On Tuesday 29 November 2016 03:16 PM, Lankhorst, Maarten wrote:
> Mahesh Kumar schreef op di 29-11-2016 om 11:12 [+0530]:
>> Hi,
>>
>>
>> On Thursday 24 November 2016 06:21 PM, Lankhorst, Maarten wrote:
>>> Mahesh Kumar schreef op do 24-11-2016 om 09:31 [+0530]:
>>>> This patch implemnets Workariunds related to display arbitrated
>>>> memory
>>>> bandwidth. These WA are applicabe for all gen-9 based platforms.
>>>>
>>>> Changes since v1:
>>>>    - Rebase on top of Paulo's patch series
>>>> Changes since v2:
>>>>    - Address review comments
>>>>    - Rebase/rework as per other patch changes in series
>>>>
>>>> Signed-off-by: Mahesh Kumar <mahesh1.kumar at intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_drv.h |   9 +++
>>>>    drivers/gpu/drm/i915/intel_pm.c | 149
>>>> +++++++++++++++++++++++++++++++++++++---
>>>>    2 files changed, 149 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 4e2f17f..2b673c6 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -1193,6 +1193,13 @@ enum intel_sbi_destination {
>>>>    	SBI_MPHY,
>>>>    };
>>>>    
>>>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
>>>> +enum watermark_memory_wa {
>>>> +	WATERMARK_WA_NONE,
>>>> +	WATERMARK_WA_X_TILED,
>>>> +	WATERMARK_WA_Y_TILED,
>>>> +};
>>>> +
>>>>    #define QUIRK_PIPEA_FORCE (1<<0)
>>>>    #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>>>>    #define QUIRK_INVERT_BRIGHTNESS (1<<2)
>>>> @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation {
>>>>    
>>>>    struct skl_wm_values {
>>>>    	unsigned dirty_pipes;
>>>> +	/* any WaterMark memory workaround Required */
>>>> +	enum watermark_memory_wa mem_wa;
>>>>    	struct skl_ddb_allocation ddb;
>>>>    };
>>>>    
>>>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>>>> b/drivers/gpu/drm/i915/intel_pm.c
>>>> index 3e2dd8f..547bbda 100644
>>>> --- a/drivers/gpu/drm/i915/intel_pm.c
>>>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>>>> @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct intel_plane
>>>> *plane)
>>>>    	}
>>>>    }
>>>>    
>>>> -/*
>>>> - * FIXME: We still don't have the proper code detect if we need
>>>> to
>>>> apply the WA,
>>>> - * so assume we'll always need it in order to avoid underruns.
>>>> - */
>>>> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state
>>>> *state)
>>>> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state
>>>> *state)
>>>>    {
>>>>    	struct drm_i915_private *dev_priv = to_i915(state-
>>>>> base.dev);
>>>>    
>>>> @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct
>>>> drm_atomic_state *state)
>>>>    
>>>>    		latency = dev_priv->wm.skl_latency[level];
>>>>    
>>>> -		if (skl_needs_memory_bw_wa(intel_state) &&
>>>> +		if (intel_needs_memory_bw_wa(intel_state) &&
>>>>    		    plane->base.state->fb->modifier ==
>>>>    		    I915_FORMAT_MOD_X_TILED)
>>>>    			latency += 15;
>>>> @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const
>>>> struct
>>>> drm_i915_private *dev_priv,
>>>>    	uint32_t y_min_scanlines;
>>>>    	struct intel_atomic_state *state =
>>>>    		to_intel_atomic_state(cstate->base.state);
>>>> -	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
>>>> +	enum watermark_memory_wa mem_wa;
>>>>    	bool y_tiled, x_tiled;
>>>>    
>>>>    	if (latency == 0 || !cstate->base.active ||
>>>> !intel_pstate-
>>>>> base.visible) {
>>>> @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const
>>>> struct
>>>> drm_i915_private *dev_priv,
>>>>    	if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>>>>    		latency += 4;
>>>>    
>>>> -	if (apply_memory_bw_wa && x_tiled)
>>>> +	mem_wa = state->wm_results.mem_wa;
>>>> +	if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>>>>    		latency += 15;
>>>>    
>>>>    	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>>>> @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const
>>>> struct
>>>> drm_i915_private *dev_priv,
>>>>    		y_min_scanlines = 4;
>>>>    	}
>>>>    
>>>> -	if (apply_memory_bw_wa)
>>>> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>>>>    		y_min_scanlines *= 2;
>>>>    
>>>>    	plane_bytes_per_line = width * cpp;
>>>> @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct drm_atomic_state
>>>> *state)
>>>>    	}
>>>>    
>>>>    	/*
>>>> +	 * If Watermark workaround is changed we need to
>>>> recalculate
>>>> +	 * watermark values for all active pipes
>>>> +	 */
>>>> +	if (intel_state->wm_results.mem_wa != dev_priv-
>>>>> wm.skl_hw.mem_wa) {
>>>> +		realloc_pipes = ~0;
>>>> +		intel_state->wm_results.dirty_pipes = ~0;
>>>> +	}
>>>> +
>>>> +	/*
>>>>    	 * We're not recomputing for the pipes not included in
>>>> the
>>>> commit, so
>>>>    	 * make sure we start with the current state.
>>>>    	 */
>>>> @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct drm_atomic_state
>>>> *state)
>>>>    }
>>>>    
>>>>    static void
>>>> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state
>>>> *state)
>>>> +{
>>>> +	struct drm_device *dev = state->dev;
>>>> +	struct drm_i915_private *dev_priv = to_i915(dev);
>>>> +	struct intel_crtc *intel_crtc;
>>>> +	struct intel_plane_state *intel_pstate;
>>>> +	struct intel_atomic_state *intel_state =
>>>> to_intel_atomic_state(state);
>>>> +	struct memdev_info *memdev_info = &dev_priv-
>>>>> memdev_info;
>>>> +	int num_active_pipes;
>>>> +	uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps;
>>>> +	int display_bw_percentage;
>>>> +	bool y_tile_enabled = false;
>>>> +
>>>> +	if (!intel_needs_memory_bw_wa(intel_state)) {
>>>> +		intel_state->wm_results.mem_wa =
>>>> WATERMARK_WA_NONE;
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	if (!memdev_info->valid)
>>>> +		goto exit;
>>>> +
>>>> +	max_pipe_bw_kbps = 0;
>>>> +	num_active_pipes = 0;
>>>> +	for_each_intel_crtc(dev, intel_crtc) {
>>>> +		struct intel_crtc_state *cstate;
>>>> +		struct intel_plane *plane;
>>>> +		int num_active_planes;
>>>> +		uint32_t  max_plane_bw_kbps, pipe_bw_kbps;
>>>> +
>>>> +		/*
>>>> +		 * If CRTC is part of current atomic commit, get
>>>> crtc state from
>>>> +		 * existing CRTC state. else take the cached
>>>> CRTC
>>>> state
>>>> +		 */
>>>> +		cstate = NULL;
>>>> +		if (state)
>>>> +			cstate =
>>>> intel_atomic_get_existing_crtc_state(state,
>>>> +					intel_crtc);
>>>> +		if (!cstate)
>>>> +			cstate = to_intel_crtc_state(intel_crtc-
>>>>> base.state);
>>> This is really really not allowed. If you want to access
>>> crtc->state, you need to get the state.
>>>
>>> Since a w/a forces a full recalculation, best you can do is put the
>>> per
>>> pipe status in crtc_state->wm.skl as well. If it changes you could
>>> add
>>> all other pipes.
>> I really don't want to add all pipes in "state" until that is
>> necessary
>> otherwise it'll make our commit huge, that's why getting the cached
>> state.
>> Same goes for plane. Later if w/a changes then I'm  adding all the
>> CRTC's in relloc_pipes mask.
>>
>> 	if (intel_state->wm_results.mem_wa != dev_priv-
>>> wm.skl_hw.mem_wa)
>> If I hold the crtc->mutex or plane->mutex lock, It may cause
>> deadlock
>> with other commits.
>> I can keep a cached copy of plane memory bandwidth requirement in
>> "dev_priv->wm.skl_hw", but again if there are two parallel commits
>> it
>> may cause race.
>>
>> Can you please suggest what is the correct way of getting plane/crtc
>> state during check phase without have to actually commit them (add
>> in
>> global-state),
>> If any such thing is available in atomic framework?
> How often will the workaround status change in practice?
In Linux use-case status changes very rarely, If we connect multiple 
high resolution display & our DRAM's bandwidth is less we may have to 
enable the WA.
& it'll remain there until any display in unplugged OR any modeset with 
lower resolution.
On the other hand, In OS with multi-plane enabled, WA may change if any 
plane is enabled/disabled (based on total bandwidth requirement for display)
> You could do same as active_crtcs, protect changing the workaround
> current crtc's workaround mode in dev_priv->wm.skl.wa_state[pipe] with
> connection_mutex. If this means changing that the global workaround
> setting is changed, then also acquire all other crtc states.
This is not per-CRTC WA, this is complete display-system level WA. & 
depends upon total memory requirement of display.
As you suggested I can keep the status(bandwidth requirement) for each 
pipe in dev_priv->wm.skl.wa_state/bandwidth[pipe], and access it with 
some mutex.
Is connection mutex is right mutex to hold, what about wm_mutex?


Regards,
-Mahesh
>
> I can't say it's pretty though, it would need a good justification for
> how bad the problem is in practice plus pretty good documentation on
> why it works.
>
> ~Maarten