[Intel-gfx] [PATCH 10/11] drm/i915/skl: New ddb allocation algorithm
Mahesh Kumar
mahesh1.kumar at intel.com
Mon May 15 08:15:01 UTC 2017
Hi,
On Saturday 13 May 2017 03:54 AM, Matt Roper wrote:
> On Mon, May 08, 2017 at 05:19:01PM +0530, Mahesh Kumar wrote:
>> This patch implements new DDB allocation algorithm as per HW team
>> recommendation. This algo takecare of scenario where we allocate less DDB
>> for the planes with lower relative pixel rate, but they require more DDB
>> to work.
>> It also takes care of enabling same watermark level for each
>> plane in crtc, for efficient power saving.
>>
>> Changes since v1:
>> - Rebase on top of Paulo's patch series
>>
>> Changes since v2:
>> - Fix the for loop condition to enable WM
>>
>> Changes since v3:
>> - Fix crash in cursor i-g-t reported by Maarten
>> - Rebase after addressing Paulo's comments
>> - Few other ULT fixes
>> Changes since v4:
>> - Rebase on drm-tip
>> - Added separate function to enable WM levels
>> Changes since v4:
>> - Fix a crash identified in skl-6770HQ system
>>
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar at intel.com>
>> ---
>> drivers/gpu/drm/i915/intel_pm.c | 253 ++++++++++++++++++++++++----------------
>> 1 file changed, 152 insertions(+), 101 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index bcf5d2523e4a..92600cf42e12 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3573,13 +3573,41 @@ skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
>> minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
>> }
>>
>> +static void
>> +skl_enable_plane_wm_levels(const struct drm_i915_private *dev_priv,
>> + uint16_t plane_ddb,
>> + uint16_t max_level,
>> + struct skl_plane_wm *wm)
>> +{
>> + int level;
>> + /*
>> + * Now enable all levels in WM structure which can be enabled
>> + * using current DDB allocation
>> + */
>> + for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
>> + struct skl_wm_level *level_wm = &wm->wm[level];
>> +
>> + if (level > max_level || level_wm->plane_res_b == 0
>> + || level_wm->plane_res_l >= 31
>> + || level_wm->plane_res_b >= plane_ddb) {
>> + level_wm->plane_en = false;
>> + level_wm->plane_res_b = 0;
>> + level_wm->plane_res_l = 0;
>> + } else {
>> + level_wm->plane_en = true;
>> + }
>> + }
>> +}
>> +
>> static int
>> skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>> - struct skl_ddb_allocation *ddb /* out */)
>> + struct skl_ddb_allocation *ddb /* out */,
>> + struct skl_pipe_wm *pipe_wm)
>> {
>> struct drm_atomic_state *state = cstate->base.state;
>> struct drm_crtc *crtc = cstate->base.crtc;
>> struct drm_device *dev = crtc->dev;
>> + struct drm_i915_private *dev_priv = to_i915(dev);
>> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> enum pipe pipe = intel_crtc->pipe;
>> struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
>> @@ -3592,6 +3620,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>> unsigned plane_data_rate[I915_MAX_PLANES] = {};
>> unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
>> uint16_t total_min_blocks = 0;
>> + uint16_t total_level_ddb;
>> + int max_level, level;
>>
>> /* Clear the partitioning for disabled planes. */
>> memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
>> @@ -3630,10 +3660,43 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>> return-EINVAL;
>> }
>>
>> - alloc_size -= total_min_blocks;
>> - ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
>> + alloc_size -= minimum[PLANE_CURSOR];
>> + ddb->plane[pipe][PLANE_CURSOR].start = alloc->end -
>> + minimum[PLANE_CURSOR];
>> ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
>>
>> + for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) {
> Why do we walk backwards here (max level down to 0)? Shouldn't we be
> going the other direction so that we allocate blocks for WM0, then move
> on to the higher levels until we eventually fail? Maybe I'm missing
> something...
We are checking of levels which "can't be enabled" here, and prune those
level. that's the reason walking from max level down to 0.
>> + total_level_ddb = 0;
>> + for_each_plane_id_on_crtc(intel_crtc, plane_id) {
>> + /*
>> + * TODO: We should calculate watermark values for Y/UV
>> + * plane both in case of NV12 format and use both values
>> + * for ddb calculation. NV12 is disabled as of now, So
>> + * using only single/UV plane value here.
>> + */
>> + struct skl_plane_wm *wm = &pipe_wm->planes[plane_id];
>> + uint16_t plane_res_b = wm->wm[level].plane_res_b;
>> + uint16_t min = minimum[plane_id] + y_minimum[plane_id];
>> +
>> + if (plane_id == PLANE_CURSOR)
>> + continue;
>> +
>> + total_level_ddb += max(plane_res_b, min);
>> + }
>> +
>> + if (total_level_ddb <= alloc_size)
>> + break;
>> + }
>> +
>> + if ((level < 0) || (total_min_blocks > alloc_size)) {
> If we had plenty of DDB space and could enable every single level, the
> backwards walk above would leave level=-1 when the loop ends. So then
> we'd complain here that we'd exceeded limitations and fail the commit,
> which doesn't seem right.
If we have plenty of DDB then, level-7 itself can be enabled & we'll
return from there.
total_level_ddb is required DDB to enable wm level :)
If we are reaching to -1 that means we don't have sufficient DDB to
enable even WM level-0 & hence failing the flip.
-Mahesh
>
>
> Matt
>
>> + DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
>> + DRM_DEBUG_KMS("minimum required %d/%d\n", (level < 0) ?
>> + total_level_ddb : total_min_blocks, alloc_size);
>> + return -EINVAL;
>> + }
>> + max_level = level;
>> + alloc_size -= total_level_ddb;
>> +
>> /*
>> * 2. Distribute the remaining space in proportion to the amount of
>> * data each plane needs to fetch from memory.
>> @@ -3649,10 +3712,17 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>> start = alloc->start;
>> for_each_plane_id_on_crtc(intel_crtc, plane_id) {
>> unsigned int data_rate, y_data_rate;
>> - uint16_t plane_blocks, y_plane_blocks = 0;
>> -
>> - if (plane_id == PLANE_CURSOR)
>> + uint16_t plane_blocks = 0, y_plane_blocks = 0;
>> + struct skl_plane_wm *wm = &pipe_wm->planes[plane_id];
>> + uint16_t plane_res_b = wm->wm[max_level].plane_res_b;
>> +
>> + if (plane_id == PLANE_CURSOR) {
>> + plane_blocks =
>> + skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
>> + skl_enable_plane_wm_levels(dev_priv, plane_blocks,
>> + max_level, wm);
>> continue;
>> + }
>>
>> data_rate = plane_data_rate[plane_id];
>>
>> @@ -3661,33 +3731,36 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>> * promote the expression to 64 bits to avoid overflowing, the
>> * result is < available as data_rate / total_data_rate < 1
>> */
>> - plane_blocks = minimum[plane_id];
>> - plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
>> - total_data_rate);
>>
>> /* Leave disabled planes at (0,0) */
>> if (data_rate) {
>> + plane_blocks = max(minimum[plane_id], plane_res_b);
>> + plane_blocks += div_u64((uint64_t)alloc_size *
>> + data_rate, total_data_rate);
>> ddb->plane[pipe][plane_id].start = start;
>> ddb->plane[pipe][plane_id].end = start + plane_blocks;
>> + start += plane_blocks;
>> }
>>
>> - start += plane_blocks;
>> -
>> /*
>> * allocation for y_plane part of planar format:
>> + * TODO: Once we start calculating watermark values for Y/UV
>> + * plane both consider it for initial allowed wm blocks.
>> */
>> y_data_rate = plane_y_data_rate[plane_id];
>>
>> - y_plane_blocks = y_minimum[plane_id];
>> - y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
>> - total_data_rate);
>> -
>> if (y_data_rate) {
>> + y_plane_blocks = y_minimum[plane_id];
>> + y_plane_blocks += div_u64((uint64_t)alloc_size *
>> + y_data_rate, total_data_rate);
>> ddb->y_plane[pipe][plane_id].start = start;
>> ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks;
>> + start += y_plane_blocks;
>> }
>> -
>> - start += y_plane_blocks;
>> + skl_enable_plane_wm_levels(dev_priv,
>> + plane_blocks,
>> + max_level,
>> + wm);
>> }
>>
>> return 0;
>> @@ -3776,11 +3849,9 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
>> static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>> struct intel_crtc_state *cstate,
>> const struct intel_plane_state *intel_pstate,
>> - uint16_t ddb_allocation,
>> int level,
>> uint16_t *out_blocks, /* out */
>> - uint8_t *out_lines, /* out */
>> - bool *enabled /* out */)
>> + uint8_t *out_lines /* out */)
>> {
>> struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
>> const struct drm_plane_state *pstate = &intel_pstate->base;
>> @@ -3803,10 +3874,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>> bool y_tiled, x_tiled;
>>
>> if (latency == 0 ||
>> - !intel_wm_plane_visible(cstate, intel_pstate)) {
>> - *enabled = false;
>> + !intel_wm_plane_visible(cstate, intel_pstate))
>> return 0;
>> - }
>>
>> y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
>> fb->modifier == I915_FORMAT_MOD_Yf_TILED;
>> @@ -3892,9 +3961,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>> if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
>> (plane_bytes_per_line / 512 < 1))
>> selected_result = method2;
>> - else if ((ddb_allocation && ddb_allocation /
>> - fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1)
>> - selected_result = min_fixed_16_16(method1, method2);
>> else if (latency >= linetime_us)
>> selected_result = method2;
>> else
>> @@ -3914,64 +3980,41 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>> }
>> }
>>
>> - if (res_blocks >= ddb_allocation || res_lines > 31) {
>> - *enabled = false;
>> -
>> - /*
>> - * If there are no valid level 0 watermarks, then we can't
>> - * support this display configuration.
>> - */
>> - if (level) {
>> - return 0;
>> - } else {
>> - struct drm_plane *plane = pstate->plane;
>> + if (res_lines >= 31 && level == 0) {
>> + struct drm_plane *plane = pstate->plane;
>>
>> - DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
>> - DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n",
>> - plane->base.id, plane->name,
>> - res_blocks, ddb_allocation, res_lines);
>> - return -EINVAL;
>> - }
>> + DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
>> + DRM_DEBUG_KMS("[PLANE:%d:%s] lines required = %u/31\n",
>> + plane->base.id, plane->name, res_lines);
>> }
>>
>> *out_blocks = res_blocks;
>> *out_lines = res_lines;
>> - *enabled = true;
>>
>> return 0;
>> }
>>
>> static int
>> skl_compute_wm_level(const struct drm_i915_private *dev_priv,
>> - struct skl_ddb_allocation *ddb,
>> struct intel_crtc_state *cstate,
>> const struct intel_plane_state *intel_pstate,
>> struct skl_plane_wm *wm)
>> {
>> - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
>> - struct drm_plane *plane = intel_pstate->base.plane;
>> - struct intel_plane *intel_plane = to_intel_plane(plane);
>> - uint16_t ddb_blocks;
>> - enum pipe pipe = intel_crtc->pipe;
>> int level, max_level = ilk_wm_max_level(dev_priv);
>> int ret;
>>
>> if (WARN_ON(!intel_pstate->base.fb))
>> return -EINVAL;
>>
>> - ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
>> -
>> for (level = 0; level <= max_level; level++) {
>> struct skl_wm_level *result = &wm->wm[level];
>>
>> ret = skl_compute_plane_wm(dev_priv,
>> cstate,
>> intel_pstate,
>> - ddb_blocks,
>> level,
>> &result->plane_res_b,
>> - &result->plane_res_l,
>> - &result->plane_en);
>> + &result->plane_res_l);
>> if (ret)
>> return ret;
>> }
>> @@ -4037,8 +4080,7 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
>>
>> wm = &pipe_wm->planes[plane_id];
>>
>> - ret = skl_compute_wm_level(dev_priv, ddb, cstate,
>> - intel_pstate, wm);
>> + ret = skl_compute_wm_level(dev_priv, cstate, intel_pstate, wm);
>> if (ret)
>> return ret;
>>
>> @@ -4152,6 +4194,45 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries,
>> return false;
>> }
>>
>> +static int
>> +skl_ddb_add_affected_planes(struct intel_crtc_state *cstate,
>> + const struct skl_pipe_wm *old_pipe_wm,
>> + const struct skl_pipe_wm *pipe_wm)
>> +{
>> + struct drm_atomic_state *state = cstate->base.state;
>> + struct drm_device *dev = state->dev;
>> + struct drm_crtc *crtc = cstate->base.crtc;
>> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> + struct drm_i915_private *dev_priv = to_i915(dev);
>> + struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
>> + struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
>> + struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
>> + struct drm_plane_state *plane_state;
>> + struct drm_plane *plane;
>> + enum pipe pipe = intel_crtc->pipe;
>> +
>> + WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
>> +
>> + drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
>> + enum plane_id plane_id = to_intel_plane(plane)->id;
>> + const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id];
>> + const struct skl_plane_wm *old_wm = &old_pipe_wm->planes[plane_id];
>> +
>> + if ((skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
>> + &new_ddb->plane[pipe][plane_id]) &&
>> + skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id],
>> + &new_ddb->y_plane[pipe][plane_id])) &&
>> + !memcmp(wm, old_wm, sizeof(struct skl_plane_wm)))
>> + continue;
>> +
>> + plane_state = drm_atomic_get_plane_state(state, plane);
>> + if (IS_ERR(plane_state))
>> + return PTR_ERR(plane_state);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
>> const struct skl_pipe_wm *old_pipe_wm,
>> struct skl_pipe_wm *pipe_wm, /* out */
>> @@ -4165,6 +4246,18 @@ static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
>> if (ret)
>> return ret;
>>
>> + ret = skl_allocate_pipe_ddb(intel_cstate, ddb, pipe_wm);
>> + if (ret)
>> + return ret;
>> + /*
>> + * TODO: Do we still need to add planes in state, As WM update is
>> + * not part of update_plane anymore, So wm for planes can be updated
>> + * irrespective of updade_plane call.
>> + */
>> + ret = skl_ddb_add_affected_planes(intel_cstate, old_pipe_wm, pipe_wm);
>> + if (ret)
>> + return ret;
>> +
>> if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
>> *changed = false;
>> else
>> @@ -4187,41 +4280,7 @@ pipes_modified(struct drm_atomic_state *state)
>> }
>>
>> static int
>> -skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
>> -{
>> - struct drm_atomic_state *state = cstate->base.state;
>> - struct drm_device *dev = state->dev;
>> - struct drm_crtc *crtc = cstate->base.crtc;
>> - struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> - struct drm_i915_private *dev_priv = to_i915(dev);
>> - struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
>> - struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
>> - struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
>> - struct drm_plane_state *plane_state;
>> - struct drm_plane *plane;
>> - enum pipe pipe = intel_crtc->pipe;
>> -
>> - WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
>> -
>> - drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
>> - enum plane_id plane_id = to_intel_plane(plane)->id;
>> -
>> - if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
>> - &new_ddb->plane[pipe][plane_id]) &&
>> - skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id],
>> - &new_ddb->y_plane[pipe][plane_id]))
>> - continue;
>> -
>> - plane_state = drm_atomic_get_plane_state(state, plane);
>> - if (IS_ERR(plane_state))
>> - return PTR_ERR(plane_state);
>> - }
>> -
>> - return 0;
>> -}
>> -
>> -static int
>> -skl_compute_ddb(struct drm_atomic_state *state)
>> +skl_include_affected_crtc(struct drm_atomic_state *state)
>> {
>> struct drm_device *dev = state->dev;
>> struct drm_i915_private *dev_priv = to_i915(dev);
>> @@ -4285,14 +4344,6 @@ skl_compute_ddb(struct drm_atomic_state *state)
>> cstate = intel_atomic_get_crtc_state(state, intel_crtc);
>> if (IS_ERR(cstate))
>> return PTR_ERR(cstate);
>> -
>> - ret = skl_allocate_pipe_ddb(cstate, ddb);
>> - if (ret)
>> - return ret;
>> -
>> - ret = skl_ddb_add_affected_planes(cstate);
>> - if (ret)
>> - return ret;
>> }
>>
>> return 0;
>> @@ -4373,7 +4424,7 @@ skl_compute_wm(struct drm_atomic_state *state)
>> /* Clear all dirty flags */
>> results->dirty_pipes = 0;
>>
>> - ret = skl_compute_ddb(state);
>> + ret = skl_include_affected_crtc(state);
>> if (ret)
>> return ret;
>>
>> --
>> 2.11.0
>>
More information about the Intel-gfx
mailing list