[Intel-gfx] [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW

Ville Syrjälä ville.syrjala at linux.intel.com
Wed May 22 09:25:19 CEST 2013


On Tue, May 21, 2013 at 06:24:38PM -0300, Paulo Zanoni wrote:
> Hi
> 
> 2013/5/20 Ville Syrjälä <ville.syrjala at linux.intel.com>:
> > On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
> >> From: Paulo Zanoni <paulo.r.zanoni at intel.com>
> >>
> >> We were previously calling sandybridge_update_wm on HSW, but the SNB
> >> function didn't really match the HSW specification, so we were just
> >> writing the wrong values. For example, I was not seeing any LP
> >> watermark ever enabled.
> >>
> >> This patch implements the haswell_update_wm function as described in
> >> our specification. The values match the "Haswell Watermark Calculator"
> >> too.
> >>
> >> With this patch I can finally see us reaching PC7 state with screen
> >> sizes <= 1920x1080.
> >>
> >> The only thing I see we're missing is to properly account for the case
> >> where the primary plane is disabled. We still don't do this and I
> >> believe we'll need some small reworks on intel_sprite.c if we want to
> >> do this correctly, so let's leave this feature for a future patch.
> >>
> >> v2: - Refactor code in the hope that it will be more useful for
> >>       Ville's rework
> >>     - Immpletement the 2 pieces missing on v1: sprite watermarks and
> >>       support for 5/6 data buffer partitioning.
> >>
> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/i915_reg.h     |    7 +
> >>  drivers/gpu/drm/i915/intel_drv.h    |   12 +
> >>  drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
> >>  drivers/gpu/drm/i915/intel_sprite.c |    6 +-
> >>  4 files changed, 527 insertions(+), 20 deletions(-)
> >>
> >> Hi
> >>
> >> I had some discussions with Ville based on his plans to rework the watermarks
> >> code, so I decided to do a little refactoring on the previous patch in order to
> >> make it easier for him. Now we have a clear split between the 3 steps: (i)
> >> gather the WM parameters, (ii) calculate the WM values and (iii) write the
> >> values to the registers. My idea is that he'll be able to take parts of my
> >> Haswell-specific code and make them become usable by the other hardware
> >> generations. He'll probably have to rename some of the hsw_ structs and move
> >> them around, but I hope my code can be used as a starting point for his rework.
> >>
> >> In addition to the refactoring I also added support for sprite watermarks and
> >> the 5/6 data buffer partitioning mode, so we should be "feature complete".
> >>
> >> I checked the values set by the Kernel and they do match the watermarks
> >> calculator.
> >>
> >> Thanks,
> >> Paulo
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> >> index 92dcbe3..33b5de3 100644
> >> --- a/drivers/gpu/drm/i915/i915_reg.h
> >> +++ b/drivers/gpu/drm/i915/i915_reg.h
> >> @@ -3055,6 +3055,10 @@
> >>  #define WM3S_LP_IVB          0x45128
> >>  #define  WM1S_LP_EN          (1<<31)
> >>
> >> +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
> >> +     (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
> >> +      ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
> >> +
> >>  /* Memory latency timer register */
> >>  #define MLTR_ILK             0x11222
> >>  #define  MLTR_WM1_SHIFT              0
> >> @@ -4938,6 +4942,9 @@
> >>  #define  SFUSE_STRAP_DDIC_DETECTED   (1<<1)
> >>  #define  SFUSE_STRAP_DDID_DETECTED   (1<<0)
> >>
> >> +#define WM_MISC                              0x45260
> >> +#define  WM_MISC_DATA_PARTITION_5_6  (1 << 0)
> >> +
> >>  #define WM_DBG                               0x45280
> >>  #define  WM_DBG_DISALLOW_MULTIPLE_LP (1<<0)
> >>  #define  WM_DBG_DISALLOW_MAXFIFO     (1<<1)
> >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> >> index 80b417a..b8376e1 100644
> >> --- a/drivers/gpu/drm/i915/intel_drv.h
> >> +++ b/drivers/gpu/drm/i915/intel_drv.h
> >> @@ -319,6 +319,18 @@ struct intel_plane {
> >>       unsigned int crtc_w, crtc_h;
> >>       uint32_t src_x, src_y;
> >>       uint32_t src_w, src_h;
> >> +
> >> +     /* Since we need to change the watermarks before/after
> >> +      * enabling/disabling the planes, we need to store the parameters here
> >> +      * as the other pieces of the struct may not reflect the values we want
> >> +      * for the watermark calculations. Currently only Haswell uses this.
> >> +      */
> >> +     struct plane_watermark_parameters {
> >> +             bool enable;
> >> +             int horiz_pixels;
> >> +             int bytes_per_pixel;
> >
> > These could be unsigned.
> 
> The udpate_sprite_wm defines pixel_size as int and sprite_width as
> uint32_t, so it would make sense to change horiz_pixels to uint32_t.
> But the other "horiz_pixels" values used by this code come from struct
> drm_display_mode, which uses "int" for everything. No matter what we
> do, we'll have a mess between int and uint32_t, and "int" seems to be
> used much more than unsigned, so I chose it... But if you still think
> we should go unsigned, we can certainly do that...

I usually prefer unsigned because it serves as a clear hint that we
don't have negative values ever, and the compiler can optimize away
power of two divisiions, and we get an extra bit.

> 
> >
> >> +     } wm;
> >> +
> >>       void (*update_plane)(struct drm_plane *plane,
> >>                            struct drm_framebuffer *fb,
> >>                            struct drm_i915_gem_object *obj,
> >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> >> index 478518d..afc4705 100644
> >> --- a/drivers/gpu/drm/i915/intel_pm.c
> >> +++ b/drivers/gpu/drm/i915/intel_pm.c
> >> @@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
> >>                  cursor_wm);
> >>  }
> >>
> >> -static void
> >> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> >> +static int hsw_wm_get_pixel_rate(struct drm_device *dev,
> >> +                              struct drm_crtc *crtc)
> >> +{
> >> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >> +     int pixel_rate, pfit_size;
> >> +
> >> +     if (intel_crtc->config.pixel_target_clock)
> >> +             pixel_rate = intel_crtc->config.pixel_target_clock;
> >> +     else
> >> +             pixel_rate = intel_crtc->config.adjusted_mode.clock;
> >> +
> >> +     /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
> >> +      * adjust the pixel_rate here. */
> >> +
> >> +     pfit_size = intel_crtc->config.pch_pfit.size;
> >> +     if (pfit_size) {
> >> +             int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
> >> +
> >> +             x = (pfit_size >> 16) & 0xFFFF;
> >> +             y = pfit_size & 0xFFFF;
> >> +             crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
> >> +             crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
> >> +
> >> +             hscale = crtc_x * 1000 / x;
> >> +             vscale = crtc_y * 1000 / y;
> >
> > That should be 'requested_mode / pfit_size'
> 
> Why? What's wrong with the current code?

pfit_size is the panel fitter output size, requested_mode is the input
size (pipesrc).

> 
> 
> >
> >> +             hscale = (hscale < 1000) ? 1000 : hscale;
> >> +             vscale = (vscale < 1000) ? 1000 : vscale;
> >> +             totscale = hscale * vscale / 1000;
> >> +             pixel_rate = pixel_rate * totscale / 1000;
> >
> > This whole things seems to lose quite of bit of precision. There are a few
> > more bits available if we want to keep it in 32bit land, but maybe just
> > doing the obvious 64bit thing would be better.
> 
> I try to avoid 64 bit division because it fails to compile on 32 bit
> machines.

Obviously you need to use the correct macro for the div.

> Also, we write all the values to 32-bit registers. And the
> precision used here is even higher than the one used on the examples
> in BSpec. But I can increase it, no problem.

I'm generally too lazy to think through how much precision would be
acceptable to avoid getting the wrong answer in the end. Hence 64bit
maths could be the easy choice.

In any case I'd go with unsigned and use shifts/power of two multipliers
for the fixed point math. The compiler could then optimize the
divisions away.

> 
> 
> >
> >> +     }
> >> +
> >> +     return pixel_rate;
> >> +}
> >> +
> >> +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
> >> +{
> >> +     int ret;
> >> +
> >> +     ret = pixel_rate * bytes_per_pixel * latency;
> >
> > This isn't quite overflow safe. latency max is "0x1ff*5" -> 12 bits,
> > bytes_per_pixel could be as high as 8 (when/if we enable 64bit formats)
> > -> another 4 bits, which leaves only 16 bits for pixel_rate (assuming we
> > make this all unsigned, which we should).
> 
> Will fix.
> 
> 
> >
> >> +     ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
> >> +     return ret;
> >> +}
> >> +
> >> +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
> >> +                       int bytes_per_pixel, int latency)
> >> +{
> >> +     int ret;
> >> +
> >> +     ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
> >> +     ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
> >> +     ret = DIV_ROUND_UP(ret, 64) + 2;
> >
> > Maybe we should just go for 64bit math for these guys as well?
> 
> I prefer to keep it as 32bit since there are divisions and also all
> these values should be written in 32bit registers.
> 
> 
> >
> >> +     return ret;
> >> +}
> >> +
> >> +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
> >> +{
> >> +     return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
> >> +}
> >> +
> >> +struct hsw_pipe_wm_parameters {
> >> +     bool active;
> >> +     bool sprite_enabled;
> >> +     int pipe_htotal;
> >> +     int pixel_rate;
> >> +     int pri_bytes_per_pixel;
> >> +     int spr_bytes_per_pixel;
> >> +     int cur_bytes_per_pixel;
> >> +     int pri_horiz_pixels;
> >> +     int spr_horiz_pixels;
> >> +     int cur_horiz_pixels;
> >> +};
> >> +
> >> +struct hsw_wm_maximums {
> >> +     int pri;
> >> +     int spr;
> >> +     int cur;
> >> +     int fbc;
> >> +};
> >> +
> >> +struct hsw_lp_wm_result {
> >> +     bool enable;
> >> +     bool fbc_enable;
> >> +     int pri_val;
> >> +     int spr_val;
> >> +     int cur_val;
> >> +     int fbc_val;
> >> +};
> >
> > All this stuff can be unsigned too.
> 
> But these values are always numbers between 0 and 768 on Haswell, so
> we're very safe using int.

Safe, but not self-documenting. I was also thinking we might consider
saving some bytes and going w/ uint16_t or something.

> 
> 
> >
> >> +
> >> +struct hsw_wm_values {
> >> +     uint32_t wm_pipe[3];
> >> +     uint32_t wm_lp[3];
> >> +     uint32_t wm_lp_spr[3];
> >> +     uint32_t wm_linetime[3];
> >> +     bool enable_fbc_wm;
> >> +};
> >> +
> >> +static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
> >> +                           struct hsw_pipe_wm_parameters *params,
> >> +                           struct hsw_lp_wm_result *result)
> >> +{
> >> +     enum pipe pipe;
> >> +     int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
> >> +     int method1, method2;
> >> +
> >> +     for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
> >> +             struct hsw_pipe_wm_parameters *p = &params[pipe];
> >> +
> >> +             if (p->active) {
> >> +                     /* TODO: for now, assume the primary plane is always
> >> +                      * enabled. */
> >> +                     method1 = hsw_wm_method1(p->pixel_rate,
> >> +                                              p->pri_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     method2 = hsw_wm_method2(p->pixel_rate,
> >> +                                              p->pipe_htotal,
> >> +                                              p->pri_horiz_pixels,
> >> +                                              p->pri_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     pri_val[pipe] = min(method1, method2);
> >> +
> >> +                     if (p->sprite_enabled) {
> >> +                             method1 = hsw_wm_method1(p->pixel_rate,
> >> +                                                      p->spr_bytes_per_pixel,
> >> +                                                      mem_value);
> >> +                             method2 = hsw_wm_method2(p->pixel_rate,
> >> +                                                      p->pipe_htotal,
> >> +                                                      p->spr_horiz_pixels,
> >> +                                                      p->spr_bytes_per_pixel,
> >> +                                                      mem_value);
> >> +                             spr_val[pipe] = min(method1, method2);
> >> +                     } else {
> >> +                             spr_val[pipe] = 0;
> >> +                     }
> >> +
> >> +                     cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
> >> +                                                    p->pipe_htotal,
> >> +                                                    p->cur_horiz_pixels,
> >> +                                                    p->cur_bytes_per_pixel,
> >> +                                                    mem_value);
> >> +                     fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
> >> +                                                p->pri_horiz_pixels,
> >> +                                                p->pri_bytes_per_pixel);
> >> +             } else {
> >> +                     pri_val[pipe] = 0;
> >> +                     spr_val[pipe] = 0;
> >> +                     cur_val[pipe] = 0;
> >> +                     fbc_val[pipe] = 0;
> >> +             }
> >> +     }
> >> +
> >> +     result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
> >> +     result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
> >> +     result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
> >> +     result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
> >> +
> >> +     if (result->fbc_val > max->fbc) {
> >> +             result->fbc_enable = false;
> >> +             result->fbc_val = 0;
> >> +     } else {
> >> +             result->fbc_enable = true;
> >> +     }
> >> +
> >> +     result->enable = result->pri_val <= max->pri &&
> >> +                      result->spr_val <= max->spr &&
> >> +                      result->cur_val <= max->cur;
> >> +}
> >> +
> >> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
> >> +                                 int mem_value, enum pipe pipe,
> >> +                                 struct hsw_pipe_wm_parameters *params)
> >> +{
> >> +     int pri_val, cur_val, spr_val, method1, method2;
> >> +
> >> +     if (params->active) {
> >> +             /* TODO: for now, assume the primary plane is always enabled. */
> >> +             pri_val = hsw_wm_method1(params->pixel_rate,
> >> +                                      params->pri_bytes_per_pixel,
> >> +                                      mem_value);
> >> +
> >> +             if (params->sprite_enabled) {
> >> +                     method1 = hsw_wm_method1(params->pixel_rate,
> >> +                                              params->spr_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     method2 = hsw_wm_method2(params->pixel_rate,
> >> +                                              params->pipe_htotal,
> >> +                                              params->spr_horiz_pixels,
> >> +                                              params->spr_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     spr_val = min(method1, method2);
> >> +             } else {
> >> +                     spr_val = 0;
> >> +             }
> >> +
> >> +             cur_val = hsw_wm_method2(params->pixel_rate,
> >> +                                      params->pipe_htotal,
> >> +                                      params->cur_horiz_pixels,
> >> +                                      params->cur_bytes_per_pixel,
> >> +                                      mem_value);
> >> +     } else {
> >> +             pri_val = 0;
> >> +             spr_val = 0;
> >> +             cur_val = 0;
> >> +     }
> >> +
> >> +     WARN(pri_val > 127,
> >> +          "Primary WM error, mode not supported for pipe %c\n",
> >> +          pipe_name(pipe));
> >> +     WARN(spr_val > 127,
> >> +          "Sprite WM error, mode not supported for pipe %c\n",
> >> +          pipe_name(pipe));
> >> +     WARN(cur_val > 63,
> >> +          "Cursor WM error, mode not supported for pipe %c\n",
> >> +          pipe_name(pipe));
> >> +
> >> +     return (pri_val << WM0_PIPE_PLANE_SHIFT) |
> >> +            (spr_val << WM0_PIPE_SPRITE_SHIFT) |
> >> +            cur_val;
> >> +}
> >> +
> >> +static uint32_t
> >> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> >>  {
> >>       struct drm_i915_private *dev_priv = dev->dev_private;
> >>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >> -     enum pipe pipe = intel_crtc->pipe;
> >>       struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
> >>       int target_clock;
> >>       u32 linetime, ips_linetime;
> >>
> >> -     if (!intel_crtc_active(crtc)) {
> >> -             I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
> >> -             return;
> >> -     }
> >> +     if (!intel_crtc_active(crtc))
> >> +             return 0;
> >>
> >>       if (intel_crtc->config.pixel_target_clock)
> >>               target_clock = intel_crtc->config.pixel_target_clock;
> >> @@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> >>       ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
> >>                                        intel_ddi_get_cdclk_freq(dev_priv));
> >>
> >> -     I915_WRITE(PIPE_WM_LINETIME(pipe),
> >> -                PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> >> -                PIPE_WM_LINETIME_TIME(linetime));
> >> +     return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> >> +            PIPE_WM_LINETIME_TIME(linetime);
> >>  }
> >>
> >> -static void haswell_update_wm(struct drm_device *dev)
> >> +static void hsw_compute_wm_parameters(struct drm_device *dev,
> >> +                                   struct hsw_pipe_wm_parameters *params,
> >> +                                   uint32_t *wm,
> >> +                                   struct hsw_wm_maximums *lp_max_1_2,
> >> +                                   struct hsw_wm_maximums *lp_max_5_6)
> >>  {
> >>       struct drm_i915_private *dev_priv = dev->dev_private;
> >>       struct drm_crtc *crtc;
> >> +     struct drm_plane *plane;
> >> +     uint64_t sskpd = I915_READ64(MCH_SSKPD);
> >>       enum pipe pipe;
> >> +     int pipes_active = 0, sprites_enabled = 0;
> >>
> >> -     /* Disable the LP WMs before changine the linetime registers. This is
> >> -      * just a temporary code that will be replaced soon. */
> >> -     I915_WRITE(WM3_LP_ILK, 0);
> >> -     I915_WRITE(WM2_LP_ILK, 0);
> >> -     I915_WRITE(WM1_LP_ILK, 0);
> >> +     if ((sskpd >> 56) & 0xFF)
> >> +             wm[0] = (sskpd >> 56) & 0xFF;
> >> +     else
> >> +             wm[0] = sskpd & 0xF;
> >> +     wm[1] = ((sskpd >> 4) & 0xFF) * 5;
> >> +     wm[2] = ((sskpd >> 12) & 0xFF) * 5;
> >> +     wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
> >> +     wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
> >> +
> >> +     for_each_pipe(pipe) {
> >> +             struct intel_crtc *intel_crtc;
> >> +
> >> +             crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> >> +             intel_crtc = to_intel_crtc(crtc);
> >> +
> >> +             params[pipe].active = intel_crtc_active(crtc);
> >> +             if (!params[pipe].active)
> >> +                     continue;
> >> +
> >> +             pipes_active++;
> >> +
> >> +             params[pipe].pipe_htotal =
> >> +                     intel_crtc->config.adjusted_mode.htotal;
> >> +             params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
> >> +             params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
> >> +             params[pipe].cur_bytes_per_pixel = 4;
> >> +             params[pipe].pri_horiz_pixels =
> >> +                     intel_crtc->config.adjusted_mode.hdisplay;
> >> +             params[pipe].cur_horiz_pixels = 64;
> >> +     }
> >> +
> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
> >> +
> >> +             pipe = intel_plane->pipe;
> >> +             params[pipe].sprite_enabled = intel_plane->wm.enable;
> >> +             params[pipe].spr_bytes_per_pixel =
> >> +                     intel_plane->wm.bytes_per_pixel;
> >> +             params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
> >> +
> >> +             if (params[pipe].sprite_enabled)
> >> +                     sprites_enabled++;
> >> +     }
> >> +
> >> +     if (pipes_active > 1) {
> >> +             lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
> >> +             lp_max_1_2->spr = lp_max_5_6->spr = 128;
> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 64;
> >> +     } else {
> >> +             lp_max_1_2->pri = sprites_enabled ? 384 : 768;
> >> +             lp_max_5_6->pri = sprites_enabled ? 128 : 768;
> >> +             lp_max_1_2->spr = 384;
> >> +             lp_max_5_6->spr = 640;
> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 255;
> >> +     }
> >> +     lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
> >> +}
> >> +
> >> +static void hsw_compute_wm_results(struct drm_device *dev,
> >> +                                struct hsw_pipe_wm_parameters *params,
> >> +                                uint32_t *wm,
> >> +                                struct hsw_wm_maximums *lp_maximums,
> >> +                                struct hsw_wm_values *results)
> >> +{
> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
> >> +     struct drm_crtc *crtc;
> >> +     struct hsw_lp_wm_result lp_results[4];
> >> +     enum pipe pipe;
> >> +     int i;
> >> +
> >> +     hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
> >> +     hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
> >> +     hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
> >> +     hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
> >> +
> >> +     /* The spec says it is preferred to disable FBC WMs instead of disabling
> >> +      * a WM level. */
> >> +     results->enable_fbc_wm = true;
> >> +     for (i = 0; i < 4; i++) {
> >> +             if (lp_results[i].enable && !lp_results[i].fbc_enable) {
> >> +                     results->enable_fbc_wm = false;
> >> +                     break;
> >> +             }
> >> +     }
> >> +
> >> +     if (lp_results[3].enable) {
> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
> >> +                                               lp_results[3].pri_val,
> >> +                                               lp_results[3].cur_val);
> >> +             results->wm_lp_spr[2] = lp_results[3].spr_val;
> >> +     } else if (lp_results[2].enable) {
> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> >> +                                               lp_results[2].pri_val,
> >> +                                               lp_results[2].cur_val);
> >> +             results->wm_lp_spr[2] = lp_results[2].spr_val;
> >> +     } else {
> >> +             results->wm_lp[2] = 0;
> >> +             results->wm_lp_spr[2] = 0;
> >> +     }
> >> +
> >> +     if (lp_results[3].enable && lp_results[2].enable) {
> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> >> +                                               lp_results[2].pri_val,
> >> +                                               lp_results[2].cur_val);
> >> +             results->wm_lp_spr[1] = lp_results[2].spr_val;
> >> +     } else if (!lp_results[3].enable && lp_results[1].enable) {
> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
> >> +                                               lp_results[1].pri_val,
> >> +                                               lp_results[1].cur_val);
> >> +             results->wm_lp_spr[1] = lp_results[1].spr_val;
> >> +     } else {
> >> +             results->wm_lp[1] = 0;
> >> +             results->wm_lp_spr[1] = 0;
> >> +     }
> >> +
> >> +     if (lp_results[0].enable) {
> >> +             results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
> >> +                                               lp_results[0].pri_val,
> >> +                                               lp_results[0].cur_val);
> >> +             results->wm_lp_spr[0] = lp_results[0].spr_val;
> >> +     } else {
> >> +             results->wm_lp[0] = 0;
> >> +             results->wm_lp_spr[0] = 0;
> >> +     }
> >> +
> >> +     for_each_pipe(pipe)
> >> +             results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
> >> +                                                          pipe,
> >> +                                                          &params[pipe]);
> >>
> >>       for_each_pipe(pipe) {
> >>               crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> >> -             haswell_update_linetime_wm(dev, crtc);
> >> +             results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
> >> +     }
> >> +}
> >> +
> >> +/* Find the result with the highest level enabled. Check for enable_fbc_wm in
> >> + * case both are at the same level. Prefer r1 in case they're the same. */
> >> +struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
> >> +                                        struct hsw_wm_values *r2)
> >> +{
> >> +     int i, val_r1 = 0, val_r2 = 0;
> >> +
> >> +     for (i = 0; i < 3; i++) {
> >> +             if (r1->wm_lp[i] & WM3_LP_EN)
> >> +                     val_r1 |= (1 << i);
> >> +             if (r2->wm_lp[i] & WM3_LP_EN)
> >> +                     val_r2 |= (1 << i);
> >> +     }
> >> +
> >> +     if (val_r1 == val_r2) {
> >> +             if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
> >> +                     return r2;
> >> +             else
> >> +                     return r1;
> >> +     } else if (val_r1 > val_r2) {
> >> +             return r1;
> >> +     } else {
> >> +             return r2;
> >> +     }
> >> +}
> >> +
> >> +/*
> >> + * The spec says we shouldn't write when we don't need, because every write
> >> + * causes WMs to be re-evaluated, expending some power.
> >> + */
> >> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> >> +                             struct hsw_wm_values *results)
> >> +{
> >> +     struct hsw_wm_values previous;
> >> +     uint32_t val;
> >> +
> >> +     val = I915_READ(DISP_ARB_CTL);
> >> +     if (results->enable_fbc_wm)
> >> +             val &= ~DISP_FBC_WM_DIS;
> >> +     else
> >> +             val |= DISP_FBC_WM_DIS;
> >> +     I915_WRITE(DISP_ARB_CTL, val);
> >> +
> >> +     previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
> >> +     previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
> >> +     previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
> >> +     previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
> >> +     previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
> >> +     previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
> >> +     previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
> >> +     previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
> >> +     previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
> >> +     previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
> >> +     previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
> >> +     previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
> >> +
> >> +     if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
> >> +         memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
> >> +         memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
> >> +         memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
> >> +             return;
> >> +
> >> +     if (previous.wm_lp[2] != 0)
> >> +             I915_WRITE(WM3_LP_ILK, 0);
> >> +     if (previous.wm_lp[1] != 0)
> >> +             I915_WRITE(WM2_LP_ILK, 0);
> >> +     if (previous.wm_lp[0] != 0)
> >> +             I915_WRITE(WM1_LP_ILK, 0);
> >> +
> >> +     if (previous.wm_pipe[0] != results->wm_pipe[0])
> >> +             I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
> >> +     if (previous.wm_pipe[1] != results->wm_pipe[1])
> >> +             I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
> >> +     if (previous.wm_pipe[2] != results->wm_pipe[2])
> >> +             I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
> >> +
> >> +     if (previous.wm_linetime[0] != results->wm_linetime[0])
> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
> >> +     if (previous.wm_linetime[1] != results->wm_linetime[1])
> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
> >> +     if (previous.wm_linetime[2] != results->wm_linetime[2])
> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
> >> +
> >> +     if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
> >> +             I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
> >> +     if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
> >> +             I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
> >> +     if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
> >> +             I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
> >> +
> >> +     if (results->wm_lp[0] != 0)
> >> +             I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
> >> +     if (results->wm_lp[1] != 0)
> >> +             I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
> >> +     if (results->wm_lp[2] != 0)
> >> +             I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
> >> +}
> >> +
> >> +static void haswell_update_wm(struct drm_device *dev)
> >> +{
> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
> >> +     struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
> >> +     struct hsw_pipe_wm_parameters params[3];
> >> +     struct hsw_wm_values results_1_2, results_5_6, *best_results;
> >> +     uint32_t wm[5];
> >> +
> >> +     hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
> >> +
> >> +     hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
> >> +     if (lp_max_1_2.pri != lp_max_5_6.pri) {
> >> +             hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
> >> +                                    &results_5_6);
> >> +             best_results = hsw_find_best_result(&results_1_2, &results_5_6);
> >
> > You don't seem to actually write WM_MISC with the select partititioning
> > value.
> 
> Oops, my bug.
> 
> 
> > Also I'm not at all sure when WM_MISC gets updated.
> 
> WM paritioning is only related to the LP watermarks, so I imagine it
> should be safe to change this when the LP watermarks are disabled.

We still don't know if it's double buffered or not.

> 
> 
> > The IVB and
> > older ARB_CTL2 DDB partitioning bit was double buffered on the LP pipe,
> > but on HSW it's not documented. Maybe it's just a bad idea to try and
> > change the partitioning when multiple pipes are enabled. We still need
> > to figure out if it's double buffered though...
> 
> The partitioning is only useful when there's a single pipe enabled. If
> we have multiple pipes, then the maximums will be the same as in the
> 1/2 case, so we'll choose to use 1/2 partitioning.

You mean the maximums documented in the Bspec. I'm assuming those are
just the computed values based on the total FIFO size, number of pipes,
and the split. I guess they never expected the 5/6 split to be used w/
multiple pipes, so the resulting FIFO sizes are not explicitly
documented there.

In my patch I compute those values instead of requiring such a list of
hardcoded values. So my code could give you the 5/6 value w/ multiple
pipes.

Or I could be wrong, and the hardware might ignore the 5/6 split setting
with multiple pipes, or it might fall over. Who knows. Unfortunately
such detauls are not really spelled out in the spec.

> 
> 
> >
> > BSpec also says that the 5/6 split should only be selected when FBC is
> > enabled. Not sure if that's a real HW constraint or not. I would
> > definately want to enable it when the primary plane is disabled for
> > example.
> 
> Yeah, it totally makes sense when the primary is disabled. We should
> ask for clarification regarding this.
> 
> Thanks for the review!
> 
> >
> >> +     } else {
> >> +             best_results = &results_1_2;
> >>       }
> >>
> >> -     sandybridge_update_wm(dev);
> >> +     hsw_write_wm_values(dev_priv, best_results);
> >> +}
> >> +
> >> +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
> >> +                                  uint32_t sprite_width, int pixel_size)
> >> +{
> >> +     struct drm_plane *plane;
> >> +
> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
> >> +
> >> +             if (intel_plane->pipe == pipe) {
> >> +                     intel_plane->wm.enable = true;
> >> +                     intel_plane->wm.horiz_pixels = sprite_width + 1;
> >> +                     intel_plane->wm.bytes_per_pixel = pixel_size;
> >> +                     break;
> >> +             }
> >> +     }
> >> +
> >> +     haswell_update_wm(dev);
> >>  }
> >>
> >>  static bool
> >> @@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
> >>               } else if (IS_HASWELL(dev)) {
> >>                       if (I915_READ64(MCH_SSKPD)) {
> >>                               dev_priv->display.update_wm = haswell_update_wm;
> >> -                             dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
> >> +                             dev_priv->display.update_sprite_wm =
> >> +                                     haswell_update_sprite_wm;
> >>                       } else {
> >>                               DRM_DEBUG_KMS("Failed to read display plane latency. "
> >>                                             "Disable CxSR\n");
> >> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> >> index 19b9cb9..95b39ef 100644
> >> --- a/drivers/gpu/drm/i915/intel_sprite.c
> >> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> >> @@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
> >>
> >>       dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
> >>
> >> +     if (IS_HASWELL(dev))
> >> +             intel_plane->wm.enable = false;
> >> +
> >>       /* potentially re-enable LP watermarks */
> >> -     if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
> >> +     if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
> >> +         IS_HASWELL(dev))
> >>               intel_update_watermarks(dev);
> >>  }
> >>
> >> --
> >> 1.7.10.4
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx at lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
> > --
> > Ville Syrjälä
> > Intel OTC
> 
> 
> 
> -- 
> Paulo Zanoni

-- 
Ville Syrjälä
Intel OTC



More information about the Intel-gfx mailing list