[Intel-gfx] [PATCH v6 8/8] drm/i915/gen9: WM memory bandwidth related workaround
Mahesh Kumar
mahesh1.kumar at intel.com
Tue Nov 29 13:47:54 UTC 2016
On Tuesday 29 November 2016 03:16 PM, Lankhorst, Maarten wrote:
> Mahesh Kumar schreef op di 29-11-2016 om 11:12 [+0530]:
>> Hi,
>>
>>
>> On Thursday 24 November 2016 06:21 PM, Lankhorst, Maarten wrote:
>>> Mahesh Kumar schreef op do 24-11-2016 om 09:31 [+0530]:
>>>> This patch implemnets Workariunds related to display arbitrated
>>>> memory
>>>> bandwidth. These WA are applicabe for all gen-9 based platforms.
>>>>
>>>> Changes since v1:
>>>> - Rebase on top of Paulo's patch series
>>>> Changes since v2:
>>>> - Address review comments
>>>> - Rebase/rework as per other patch changes in series
>>>>
>>>> Signed-off-by: Mahesh Kumar <mahesh1.kumar at intel.com>
>>>> ---
>>>> drivers/gpu/drm/i915/i915_drv.h | 9 +++
>>>> drivers/gpu/drm/i915/intel_pm.c | 149
>>>> +++++++++++++++++++++++++++++++++++++---
>>>> 2 files changed, 149 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 4e2f17f..2b673c6 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -1193,6 +1193,13 @@ enum intel_sbi_destination {
>>>> SBI_MPHY,
>>>> };
>>>>
>>>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
>>>> +enum watermark_memory_wa {
>>>> + WATERMARK_WA_NONE,
>>>> + WATERMARK_WA_X_TILED,
>>>> + WATERMARK_WA_Y_TILED,
>>>> +};
>>>> +
>>>> #define QUIRK_PIPEA_FORCE (1<<0)
>>>> #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>>>> #define QUIRK_INVERT_BRIGHTNESS (1<<2)
>>>> @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation {
>>>>
>>>> struct skl_wm_values {
>>>> unsigned dirty_pipes;
>>>> + /* any WaterMark memory workaround Required */
>>>> + enum watermark_memory_wa mem_wa;
>>>> struct skl_ddb_allocation ddb;
>>>> };
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>>>> b/drivers/gpu/drm/i915/intel_pm.c
>>>> index 3e2dd8f..547bbda 100644
>>>> --- a/drivers/gpu/drm/i915/intel_pm.c
>>>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>>>> @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct intel_plane
>>>> *plane)
>>>> }
>>>> }
>>>>
>>>> -/*
>>>> - * FIXME: We still don't have the proper code detect if we need
>>>> to
>>>> apply the WA,
>>>> - * so assume we'll always need it in order to avoid underruns.
>>>> - */
>>>> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state
>>>> *state)
>>>> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state
>>>> *state)
>>>> {
>>>> struct drm_i915_private *dev_priv = to_i915(state-
>>>>> base.dev);
>>>>
>>>> @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct
>>>> drm_atomic_state *state)
>>>>
>>>> latency = dev_priv->wm.skl_latency[level];
>>>>
>>>> - if (skl_needs_memory_bw_wa(intel_state) &&
>>>> + if (intel_needs_memory_bw_wa(intel_state) &&
>>>> plane->base.state->fb->modifier ==
>>>> I915_FORMAT_MOD_X_TILED)
>>>> latency += 15;
>>>> @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const
>>>> struct
>>>> drm_i915_private *dev_priv,
>>>> uint32_t y_min_scanlines;
>>>> struct intel_atomic_state *state =
>>>> to_intel_atomic_state(cstate->base.state);
>>>> - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
>>>> + enum watermark_memory_wa mem_wa;
>>>> bool y_tiled, x_tiled;
>>>>
>>>> if (latency == 0 || !cstate->base.active ||
>>>> !intel_pstate-
>>>>> base.visible) {
>>>> @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const
>>>> struct
>>>> drm_i915_private *dev_priv,
>>>> if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>>>> latency += 4;
>>>>
>>>> - if (apply_memory_bw_wa && x_tiled)
>>>> + mem_wa = state->wm_results.mem_wa;
>>>> + if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>>>> latency += 15;
>>>>
>>>> width = drm_rect_width(&intel_pstate->base.src) >> 16;
>>>> @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const
>>>> struct
>>>> drm_i915_private *dev_priv,
>>>> y_min_scanlines = 4;
>>>> }
>>>>
>>>> - if (apply_memory_bw_wa)
>>>> + if (mem_wa == WATERMARK_WA_Y_TILED)
>>>> y_min_scanlines *= 2;
>>>>
>>>> plane_bytes_per_line = width * cpp;
>>>> @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct drm_atomic_state
>>>> *state)
>>>> }
>>>>
>>>> /*
>>>> + * If Watermark workaround is changed we need to
>>>> recalculate
>>>> + * watermark values for all active pipes
>>>> + */
>>>> + if (intel_state->wm_results.mem_wa != dev_priv-
>>>>> wm.skl_hw.mem_wa) {
>>>> + realloc_pipes = ~0;
>>>> + intel_state->wm_results.dirty_pipes = ~0;
>>>> + }
>>>> +
>>>> + /*
>>>> * We're not recomputing for the pipes not included in
>>>> the
>>>> commit, so
>>>> * make sure we start with the current state.
>>>> */
>>>> @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct drm_atomic_state
>>>> *state)
>>>> }
>>>>
>>>> static void
>>>> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state
>>>> *state)
>>>> +{
>>>> + struct drm_device *dev = state->dev;
>>>> + struct drm_i915_private *dev_priv = to_i915(dev);
>>>> + struct intel_crtc *intel_crtc;
>>>> + struct intel_plane_state *intel_pstate;
>>>> + struct intel_atomic_state *intel_state =
>>>> to_intel_atomic_state(state);
>>>> + struct memdev_info *memdev_info = &dev_priv-
>>>>> memdev_info;
>>>> + int num_active_pipes;
>>>> + uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps;
>>>> + int display_bw_percentage;
>>>> + bool y_tile_enabled = false;
>>>> +
>>>> + if (!intel_needs_memory_bw_wa(intel_state)) {
>>>> + intel_state->wm_results.mem_wa =
>>>> WATERMARK_WA_NONE;
>>>> + return;
>>>> + }
>>>> +
>>>> + if (!memdev_info->valid)
>>>> + goto exit;
>>>> +
>>>> + max_pipe_bw_kbps = 0;
>>>> + num_active_pipes = 0;
>>>> + for_each_intel_crtc(dev, intel_crtc) {
>>>> + struct intel_crtc_state *cstate;
>>>> + struct intel_plane *plane;
>>>> + int num_active_planes;
>>>> + uint32_t max_plane_bw_kbps, pipe_bw_kbps;
>>>> +
>>>> + /*
>>>> + * If CRTC is part of current atomic commit, get
>>>> crtc state from
>>>> + * existing CRTC state. else take the cached
>>>> CRTC
>>>> state
>>>> + */
>>>> + cstate = NULL;
>>>> + if (state)
>>>> + cstate =
>>>> intel_atomic_get_existing_crtc_state(state,
>>>> + intel_crtc);
>>>> + if (!cstate)
>>>> + cstate = to_intel_crtc_state(intel_crtc-
>>>>> base.state);
>>> This is really really not allowed. If you want to access
>>> crtc->state, you need to get the state.
>>>
>>> Since a w/a forces a full recalculation, best you can do is put the
>>> per
>>> pipe status in crtc_state->wm.skl as well. If it changes you could
>>> add
>>> all other pipes.
>> I really don't want to add all pipes in "state" until that is
>> necessary
>> otherwise it'll make our commit huge, that's why getting the cached
>> state.
>> Same goes for plane. Later if w/a changes then I'm adding all the
>> CRTC's in relloc_pipes mask.
>>
>> if (intel_state->wm_results.mem_wa != dev_priv-
>>> wm.skl_hw.mem_wa)
>> If I hold the crtc->mutex or plane->mutex lock, It may cause
>> deadlock
>> with other commits.
>> I can keep a cached copy of plane memory bandwidth requirement in
>> "dev_priv->wm.skl_hw", but again if there are two parallel commits
>> it
>> may cause race.
>>
>> Can you please suggest what is the correct way of getting plane/crtc
>> state during check phase without have to actually commit them (add
>> in
>> global-state),
>> If any such thing is available in atomic framework?
> How often will the workaround status change in practice?
In Linux use-case status changes very rarely, If we connect multiple
high resolution display & our DRAM's bandwidth is less we may have to
enable the WA.
& it'll remain there until any display in unplugged OR any modeset with
lower resolution.
On the other hand, In OS with multi-plane enabled, WA may change if any
plane is enabled/disabled (based on total bandwidth requirement for display)
> You could do same as active_crtcs, protect changing the workaround
> current crtc's workaround mode in dev_priv->wm.skl.wa_state[pipe] with
> connection_mutex. If this means changing that the global workaround
> setting is changed, then also acquire all other crtc states.
This is not per-CRTC WA, this is complete display-system level WA. &
depends upon total memory requirement of display.
As you suggested I can keep the status(bandwidth requirement) for each
pipe in dev_priv->wm.skl.wa_state/bandwidth[pipe], and access it with
some mutex.
Is connection mutex is right mutex to hold, what about wm_mutex?
Regards,
-Mahesh
>
> I can't say it's pretty though, it would need a good justification for
> how bad the problem is in practice plus pretty good documentation on
> why it works.
>
> ~Maarten
More information about the Intel-gfx
mailing list