[Intel-gfx] [PATCH 47/89 v11] drm/i915/skl: SKL Watermark Computation
Ville Syrjälä
ville.syrjala at linux.intel.com
Wed Oct 29 18:07:58 CET 2014
On Tue, Sep 23, 2014 at 12:13:50PM +0100, Damien Lespiau wrote:
> From: Pradeep Bhat <pradeep.bhat at intel.com>
>
> This patch implements the watermark algorithm and its necessary
> functions. Two function pointers skl_update_wm and
> skl_update_sprite_wm are provided. The skl_update_wm will update
> the watermarks for the crtc provided as an argument and then
> checks for change in DDB allocation for other active pipes and
> recomputes the watermarks for those Pipes and planes as well.
> Finally it does the register programming for all dirty pipes.
> The trigger of the Watermark double buffer registers will have
> to be once the plane configurations are done by the caller.
>
> v2: fixed the divide-by-0 error in the results computation func.
> Also reworked the PLANE_WM register values computation func to
> make it more compact. Incorporated all other review comments
> from Damien.
>
> v3: Changed the skl_compute_plane_wm function to now return success
> or failure. Also the result blocks and lines are computed here
> instead of in skl_compute_wm_results function.
>
> v4: Adjust skl_ddb_alloc_changed() to the new planes/cursor split
> (Damien)
>
> v5: Reworked the affected functions to implement new plane/cursor
> split.
>
> v6: Rework the logic that triggers the DDB allocation and WM computation
> of skl_update_other_pipe_wm() to not depend on non-computed DDB
> values.
> Always give a valid cursor_width (at boot it's 0) to keep the
> invariant that we consider the cursor plane always enabled.
> Otherwise we end up dividing by 0 in skl_compute_plane_wm()
> (Damien Lespiau)
>
> v7: Spell out allocation
> skl_ddb_ functions should have the ddb as first argument
> Make the skl_ddb_alloc_changed() parameters const
> (Damien)
>
> v8: Rebase on top of the crtc->primary changes
>
> v9: Split the staging results structure to not exceed the 1Kb stack
> allocation in skl_update_wm()
>
> v10: Make skl_pipe_pixel_rate() take a pointer to the pipe config
> Add a comment about overflow considerations for skl_wm_method1()
> Various additions of const
> Various use of sizeof(variable) instead of sizeof(type)
> Various move of variable definitons to a narrower scope
> Zero initialize some stack allocated structures to make sure we
> don't have garbage in case we don't write all the values
> (Ville)
>
> v11: Remove non-necessary default number of blocks/lines when the plane
> is disabled (Ville)
>
> Signed-off-by: Pradeep Bhat <pradeep.bhat at intel.com>
> Signed-off-by: Damien Lespiau <damien.lespiau at intel.com>
Looks OK. The fixup to move the validity check still seems to need a
bit of tweaking but otherwise I think it's sane enough.
I seem to recall that I went through all of the little details the
first time around, so I decided to not do that this time. Hopefully
my memory is correct ;)
Reviewed-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 12 +-
> drivers/gpu/drm/i915/intel_pm.c | 422 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 433 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0301a91..cde5136 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1722,8 +1722,18 @@ struct drm_i915_private {
> */
> uint16_t skl_latency[8];
>
> + /*
> + * The skl_wm_values structure is a bit too big for stack
> + * allocation, so we keep the staging struct where we store
> + * intermediate results here instead.
> + */
> + struct skl_wm_values skl_results;
> +
> /* current hardware state */
> - struct ilk_wm_values hw;
> + union {
> + struct ilk_wm_values hw;
> + struct skl_wm_values skl_hw;
> + };
> } wm;
>
> struct i915_runtime_pm pm;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 768890e..e69a833 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2957,6 +2957,426 @@ static bool ilk_disable_lp_wm(struct drm_device *dev)
> return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
> }
>
> +static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_config *config)
> +{
> + /* TODO: Take into account the scalers once we support them */
> + return config->adjusted_mode.crtc_clock;
> +}
> +
> +/*
> + * The max latency should be 257 (max the punit can code is 255 and we add 2us
> + * for the read latency) and bytes_per_pixel should always be <= 8, so that
> + * should allow pixel_rate up to ~2 GHz which seems sufficient since max
> + * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
> +*/
> +static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
> + uint32_t latency)
> +{
> + uint32_t wm_intermediate_val, ret;
> +
> + if (latency == 0)
> + return UINT_MAX;
> +
> + wm_intermediate_val = latency * pixel_rate * bytes_per_pixel;
> + ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
> +
> + return ret;
> +}
> +
> +static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
> + uint32_t horiz_pixels, uint8_t bytes_per_pixel,
> + uint32_t latency)
> +{
> + uint32_t ret, plane_bytes_per_line, wm_intermediate_val;
> +
> + if (latency == 0)
> + return UINT_MAX;
> +
> + plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
> + wm_intermediate_val = latency * pixel_rate;
> + ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
> + plane_bytes_per_line;
> +
> + return ret;
> +}
> +
> +static void skl_compute_transition_wm(struct drm_crtc *crtc,
> + struct skl_pipe_wm_parameters *params,
> + struct skl_pipe_wm *pipe_wm)
> +{
> + /*
> + * For now it is suggested to use the LP0 wm val of corresponding
> + * plane as transition wm val. This is done while computing results.
> + */
> + if (!params->active)
> + return;
> +}
> +
> +static uint32_t
> +skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
> +{
> + if (!intel_crtc_active(crtc))
> + return 0;
> +
> + return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
> +
> +}
> +
> +static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
> + const struct intel_crtc *intel_crtc)
> +{
> + struct drm_device *dev = intel_crtc->base.dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
> + enum pipe pipe = intel_crtc->pipe;
> +
> + if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
> + sizeof(new_ddb->plane[pipe])))
> + return true;
> +
> + if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe],
> + sizeof(new_ddb->cursor[pipe])))
> + return true;
> +
> + return false;
> +}
> +
> +static void skl_compute_wm_global_parameters(struct drm_device *dev,
> + struct intel_wm_config *config)
> +{
> + struct drm_crtc *crtc;
> + struct drm_plane *plane;
> +
> + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
> + config->num_pipes_active += intel_crtc_active(crtc);
> +
> + /* FIXME: I don't think we need those two global parameters on SKL */
> + list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> + struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> + config->sprites_enabled |= intel_plane->wm.enabled;
> + config->sprites_scaled |= intel_plane->wm.scaled;
> + }
> +}
> +
> +static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
> + struct skl_pipe_wm_parameters *p)
> +{
> + struct drm_device *dev = crtc->dev;
> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> + enum pipe pipe = intel_crtc->pipe;
> + struct drm_plane *plane;
> + int i = 1; /* Index for sprite planes start */
> +
> + p->active = intel_crtc_active(crtc);
> + if (p->active) {
> + p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal;
> + p->pixel_rate = skl_pipe_pixel_rate(&intel_crtc->config);
> +
> + /*
> + * For now, assume primary and cursor planes are always enabled.
> + */
> + p->plane[0].enabled = true;
> + p->plane[0].bytes_per_pixel =
> + crtc->primary->fb->bits_per_pixel / 8;
> + p->plane[0].horiz_pixels = intel_crtc->config.pipe_src_w;
> + p->plane[0].vert_pixels = intel_crtc->config.pipe_src_h;
> +
> + p->cursor.enabled = true;
> + p->cursor.bytes_per_pixel = 4;
> + p->cursor.horiz_pixels = intel_crtc->cursor_width ?
> + intel_crtc->cursor_width : 64;
> + }
> +
> + list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> + struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> + if (intel_plane->pipe == pipe)
> + p->plane[i++] = intel_plane->wm;
> + }
> +}
> +
> +static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p,
> + struct intel_plane_wm_parameters *p_params,
> + uint16_t max_page_buff_alloc,
> + uint32_t mem_value,
> + uint16_t *res_blocks, /* out */
> + uint8_t *res_lines /* out */)
> +{
> + uint32_t method1, method2, plane_bytes_per_line;
> + uint32_t result_bytes;
> +
> + if (!p->active || !p_params->enabled)
> + return false;
> +
> + method1 = skl_wm_method1(p->pixel_rate,
> + p_params->bytes_per_pixel,
> + mem_value);
> + method2 = skl_wm_method2(p->pixel_rate,
> + p->pipe_htotal,
> + p_params->horiz_pixels,
> + p_params->bytes_per_pixel,
> + mem_value);
> +
> + plane_bytes_per_line = p_params->horiz_pixels *
> + p_params->bytes_per_pixel;
> +
> + /* For now xtile and linear */
> + if (((max_page_buff_alloc * 512) / plane_bytes_per_line) >= 1)
> + result_bytes = min(method1, method2);
> + else
> + result_bytes = method1;
> +
> + *res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1;
> + *res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line);
> +
> + return true;
> +}
> +
> +static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
> + struct skl_ddb_allocation *ddb,
> + struct skl_pipe_wm_parameters *p,
> + enum pipe pipe,
> + int level,
> + int num_planes,
> + struct skl_wm_level *result)
> +{
> + uint16_t latency = dev_priv->wm.skl_latency[level];
> + uint16_t ddb_blocks;
> + int i;
> +
> + for (i = 0; i < num_planes; i++) {
> + ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
> +
> + result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i],
> + ddb_blocks,
> + latency,
> + &result->plane_res_b[i],
> + &result->plane_res_l[i]);
> + }
> +
> + ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]);
> + result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks,
> + latency, &result->cursor_res_b,
> + &result->cursor_res_l);
> +}
> +
> +static void skl_compute_pipe_wm(struct drm_crtc *crtc,
> + struct skl_ddb_allocation *ddb,
> + struct skl_pipe_wm_parameters *params,
> + struct skl_pipe_wm *pipe_wm)
> +{
> + struct drm_device *dev = crtc->dev;
> + const struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> + int level, max_level = ilk_wm_max_level(dev);
> +
> + for (level = 0; level <= max_level; level++) {
> + skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
> + level, intel_num_planes(intel_crtc),
> + &pipe_wm->wm[level]);
> + }
> + pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
> +
> + skl_compute_transition_wm(crtc, params, pipe_wm);
> +}
> +
> +static void skl_compute_wm_results(struct drm_device *dev,
> + struct skl_pipe_wm_parameters *p,
> + struct skl_pipe_wm *p_wm,
> + struct skl_wm_values *r,
> + struct intel_crtc *intel_crtc)
> +{
> + int level, max_level = ilk_wm_max_level(dev);
> + enum pipe pipe = intel_crtc->pipe;
> +
> + for (level = 0; level <= max_level; level++) {
> + uint16_t ddb_blocks;
> + uint32_t temp;
> + int i;
> +
> + for (i = 0; i < intel_num_planes(intel_crtc); i++) {
> + temp = 0;
> + ddb_blocks = skl_ddb_entry_size(&r->ddb.plane[pipe][i]);
> +
> + if ((p_wm->wm[level].plane_res_b[i] > ddb_blocks) ||
> + (p_wm->wm[level].plane_res_l[i] > 31))
> + p_wm->wm[level].plane_en[i] = false;
> +
> + temp |= p_wm->wm[level].plane_res_l[i] <<
> + PLANE_WM_LINES_SHIFT;
> + temp |= p_wm->wm[level].plane_res_b[i];
> + if (p_wm->wm[level].plane_en[i])
> + temp |= PLANE_WM_EN;
> +
> + r->plane[pipe][i][level] = temp;
> + /* Use the LP0 WM value for transition WM for now. */
> + if (level == 0)
> + r->plane_trans[pipe][i] = temp;
> + }
> +
> + temp = 0;
> + ddb_blocks = skl_ddb_entry_size(&r->ddb.cursor[pipe]);
> +
> + if ((p_wm->wm[level].cursor_res_b > ddb_blocks) ||
> + (p_wm->wm[level].cursor_res_l > 31))
> + p_wm->wm[level].cursor_en = false;
> +
> + temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT;
> + temp |= p_wm->wm[level].cursor_res_b;
> +
> + if (p_wm->wm[level].cursor_en)
> + temp |= PLANE_WM_EN;
> +
> + r->cursor[pipe][level] = temp;
> + /* Use the LP0 WM value for transition WM for now. */
> + if (level == 0)
> + r->cursor_trans[pipe] = temp;
> +
> + }
> +
> + r->wm_linetime[pipe] = p_wm->linetime;
> +}
> +
> +static void skl_write_wm_values(struct drm_i915_private *dev_priv,
> + const struct skl_wm_values *new)
> +{
> + struct drm_device *dev = dev_priv->dev;
> + struct intel_crtc *crtc;
> +
> + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
> + int i, level, max_level = ilk_wm_max_level(dev);
> + enum pipe pipe = crtc->pipe;
> +
> + if (new->dirty[pipe]) {
> + I915_WRITE(PIPE_WM_LINETIME(pipe),
> + new->wm_linetime[pipe]);
> +
> + for (level = 0; level <= max_level; level++) {
> + for (i = 0; i < intel_num_planes(crtc); i++)
> + I915_WRITE(PLANE_WM(pipe, i, level),
> + new->plane[pipe][i][level]);
> + I915_WRITE(CUR_WM(pipe, level),
> + new->cursor[pipe][level]);
> + }
> + for (i = 0; i < intel_num_planes(crtc); i++)
> + I915_WRITE(PLANE_WM_TRANS(pipe, i),
> + new->plane_trans[pipe][i]);
> + I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
> + }
> + }
> +
> + dev_priv->wm.skl_hw = *new;
> +}
> +
> +static bool skl_update_pipe_wm(struct drm_crtc *crtc,
> + struct skl_pipe_wm_parameters *params,
> + struct intel_wm_config *config,
> + struct skl_ddb_allocation *ddb, /* out */
> + struct skl_pipe_wm *pipe_wm /* out */)
> +{
> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +
> + skl_compute_wm_pipe_parameters(crtc, params);
> + skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
> +
> + if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
> + return false;
> +
> + intel_crtc->wm.skl_active = *pipe_wm;
> + return true;
> +}
> +
> +static void skl_update_other_pipe_wm(struct drm_device *dev,
> + struct drm_crtc *crtc,
> + struct intel_wm_config *config,
> + struct skl_wm_values *r)
> +{
> + struct intel_crtc *intel_crtc;
> + struct intel_crtc *this_crtc = to_intel_crtc(crtc);
> +
> + /*
> + * If the WM update hasn't changed the allocation for this_crtc (the
> + * crtc we are currently computing the new WM values for), other
> + * enabled crtcs will keep the same allocation and we don't need to
> + * recompute anything for them.
> + */
> + if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
> + return;
> +
> + /*
> + * Otherwise, because of this_crtc being freshly enabled/disabled, the
> + * other active pipes need new DDB allocation and WM values.
> + */
> + list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
> + base.head) {
> + struct skl_pipe_wm_parameters params = {};
> + struct skl_pipe_wm pipe_wm = {};
> + bool wm_changed;
> +
> + if (this_crtc->pipe == intel_crtc->pipe)
> + continue;
> +
> + if (!intel_crtc->active)
> + continue;
> +
> + wm_changed = skl_update_pipe_wm(&intel_crtc->base,
> + ¶ms, config,
> + &r->ddb, &pipe_wm);
> +
> + /*
> + * If we end up re-computing the other pipe WM values, it's
> + * because it was really needed, so we expect the WM values to
> + * be different.
> + */
> + WARN_ON(!wm_changed);
> +
> + skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc);
> + r->dirty[intel_crtc->pipe] = true;
> + }
> +}
> +
> +static void skl_update_wm(struct drm_crtc *crtc)
> +{
> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> + struct drm_device *dev = crtc->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct skl_pipe_wm_parameters params = {};
> + struct skl_wm_values *results = &dev_priv->wm.skl_results;
> + struct skl_pipe_wm pipe_wm = {};
> + struct intel_wm_config config = {};
> +
> + memset(results, 0, sizeof(*results));
> +
> + skl_compute_wm_global_parameters(dev, &config);
> +
> + if (!skl_update_pipe_wm(crtc, ¶ms, &config,
> + &results->ddb, &pipe_wm))
> + return;
> +
> + skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc);
> + results->dirty[intel_crtc->pipe] = true;
> +
> + skl_update_other_pipe_wm(dev, crtc, &config, results);
> + skl_write_wm_values(dev_priv, results);
> +}
> +
> +static void
> +skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
> + uint32_t sprite_width, uint32_t sprite_height,
> + int pixel_size, bool enabled, bool scaled)
> +{
> + struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> + intel_plane->wm.enabled = enabled;
> + intel_plane->wm.scaled = scaled;
> + intel_plane->wm.horiz_pixels = sprite_width;
> + intel_plane->wm.vert_pixels = sprite_height;
> + intel_plane->wm.bytes_per_pixel = pixel_size;
> +
> + skl_update_wm(crtc);
> +}
> +
> static void ilk_update_wm(struct drm_crtc *crtc)
> {
> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> @@ -7486,6 +7906,8 @@ void intel_init_pm(struct drm_device *dev)
> skl_setup_wm_latency(dev);
>
> dev_priv->display.init_clock_gating = gen9_init_clock_gating;
> + dev_priv->display.update_wm = skl_update_wm;
> + dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
> } else if (HAS_PCH_SPLIT(dev)) {
> ilk_setup_wm_latency(dev);
>
> --
> 1.8.3.1
--
Ville Syrjälä
Intel OTC
More information about the Intel-gfx
mailing list