[Intel-gfx] [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW

Fri May 24 17:05:15 CEST 2013

2013/5/22 Ville Syrjälä <ville.syrjala at linux.intel.com>:
> On Tue, May 21, 2013 at 06:24:38PM -0300, Paulo Zanoni wrote:
>> Hi
>>
>> 2013/5/20 Ville Syrjälä <ville.syrjala at linux.intel.com>:
>> > On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
>> >> From: Paulo Zanoni <paulo.r.zanoni at intel.com>
>> >>
>> >> We were previously calling sandybridge_update_wm on HSW, but the SNB
>> >> function didn't really match the HSW specification, so we were just
>> >> writing the wrong values. For example, I was not seeing any LP
>> >> watermark ever enabled.
>> >>
>> >> This patch implements the haswell_update_wm function as described in
>> >> our specification. The values match the "Haswell Watermark Calculator"
>> >> too.
>> >>
>> >> With this patch I can finally see us reaching PC7 state with screen
>> >> sizes <= 1920x1080.
>> >>
>> >> The only thing I see we're missing is to properly account for the case
>> >> where the primary plane is disabled. We still don't do this and I
>> >> believe we'll need some small reworks on intel_sprite.c if we want to
>> >> do this correctly, so let's leave this feature for a future patch.
>> >>
>> >> v2: - Refactor code in the hope that it will be more useful for
>> >>       Ville's rework
>> >>     - Immpletement the 2 pieces missing on v1: sprite watermarks and
>> >>       support for 5/6 data buffer partitioning.
>> >>
>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
>> >> ---
>> >>  drivers/gpu/drm/i915/i915_reg.h     |    7 +
>> >>  drivers/gpu/drm/i915/intel_drv.h    |   12 +
>> >>  drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
>> >>  drivers/gpu/drm/i915/intel_sprite.c |    6 +-
>> >>  4 files changed, 527 insertions(+), 20 deletions(-)
>> >>
>> >> Hi
>> >>
>> >> I had some discussions with Ville based on his plans to rework the watermarks
>> >> code, so I decided to do a little refactoring on the previous patch in order to
>> >> make it easier for him. Now we have a clear split between the 3 steps: (i)
>> >> gather the WM parameters, (ii) calculate the WM values and (iii) write the
>> >> values to the registers. My idea is that he'll be able to take parts of my
>> >> Haswell-specific code and make them become usable by the other hardware
>> >> generations. He'll probably have to rename some of the hsw_ structs and move
>> >> them around, but I hope my code can be used as a starting point for his rework.
>> >>
>> >> In addition to the refactoring I also added support for sprite watermarks and
>> >> the 5/6 data buffer partitioning mode, so we should be "feature complete".
>> >>
>> >> I checked the values set by the Kernel and they do match the watermarks
>> >> calculator.
>> >>
>> >> Thanks,
>> >> Paulo
>> >>
>> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> >> index 92dcbe3..33b5de3 100644
>> >> --- a/drivers/gpu/drm/i915/i915_reg.h
>> >> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> >> @@ -3055,6 +3055,10 @@
>> >>  #define WM3S_LP_IVB          0x45128
>> >>  #define  WM1S_LP_EN          (1<<31)
>> >>
>> >> +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
>> >> +     (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
>> >> +      ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
>> >> +
>> >>  /* Memory latency timer register */
>> >>  #define MLTR_ILK             0x11222
>> >>  #define  MLTR_WM1_SHIFT              0
>> >> @@ -4938,6 +4942,9 @@
>> >>  #define  SFUSE_STRAP_DDIC_DETECTED   (1<<1)
>> >>  #define  SFUSE_STRAP_DDID_DETECTED   (1<<0)
>> >>
>> >> +#define WM_MISC                              0x45260
>> >> +#define  WM_MISC_DATA_PARTITION_5_6  (1 << 0)
>> >> +
>> >>  #define WM_DBG                               0x45280
>> >>  #define  WM_DBG_DISALLOW_MULTIPLE_LP (1<<0)
>> >>  #define  WM_DBG_DISALLOW_MAXFIFO     (1<<1)
>> >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> >> index 80b417a..b8376e1 100644
>> >> --- a/drivers/gpu/drm/i915/intel_drv.h
>> >> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> >> @@ -319,6 +319,18 @@ struct intel_plane {
>> >>       unsigned int crtc_w, crtc_h;
>> >>       uint32_t src_x, src_y;
>> >>       uint32_t src_w, src_h;
>> >> +
>> >> +     /* Since we need to change the watermarks before/after
>> >> +      * enabling/disabling the planes, we need to store the parameters here
>> >> +      * as the other pieces of the struct may not reflect the values we want
>> >> +      * for the watermark calculations. Currently only Haswell uses this.
>> >> +      */
>> >> +     struct plane_watermark_parameters {
>> >> +             bool enable;
>> >> +             int horiz_pixels;
>> >> +             int bytes_per_pixel;
>> >
>> > These could be unsigned.
>>
>> The udpate_sprite_wm defines pixel_size as int and sprite_width as
>> uint32_t, so it would make sense to change horiz_pixels to uint32_t.
>> But the other "horiz_pixels" values used by this code come from struct
>> drm_display_mode, which uses "int" for everything. No matter what we
>> do, we'll have a mess between int and uint32_t, and "int" seems to be
>> used much more than unsigned, so I chose it... But if you still think
>> we should go unsigned, we can certainly do that...
>
> I usually prefer unsigned because it serves as a clear hint that we
> don't have negative values ever, and the compiler can optimize away
> power of two divisiions, and we get an extra bit.
>
>>
>> >
>> >> +     } wm;
>> >> +
>> >>       void (*update_plane)(struct drm_plane *plane,
>> >>                            struct drm_framebuffer *fb,
>> >>                            struct drm_i915_gem_object *obj,
>> >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> >> index 478518d..afc4705 100644
>> >> --- a/drivers/gpu/drm/i915/intel_pm.c
>> >> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> >> @@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
>> >>                  cursor_wm);
>> >>  }
>> >>
>> >> -static void
>> >> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>> >> +static int hsw_wm_get_pixel_rate(struct drm_device *dev,
>> >> +                              struct drm_crtc *crtc)
>> >> +{
>> >> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> >> +     int pixel_rate, pfit_size;
>> >> +
>> >> +     if (intel_crtc->config.pixel_target_clock)
>> >> +             pixel_rate = intel_crtc->config.pixel_target_clock;
>> >> +     else
>> >> +             pixel_rate = intel_crtc->config.adjusted_mode.clock;
>> >> +
>> >> +     /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
>> >> +      * adjust the pixel_rate here. */
>> >> +
>> >> +     pfit_size = intel_crtc->config.pch_pfit.size;
>> >> +     if (pfit_size) {
>> >> +             int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
>> >> +
>> >> +             x = (pfit_size >> 16) & 0xFFFF;
>> >> +             y = pfit_size & 0xFFFF;
>> >> +             crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
>> >> +             crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
>> >> +
>> >> +             hscale = crtc_x * 1000 / x;
>> >> +             vscale = crtc_y * 1000 / y;
>> >
>> > That should be 'requested_mode / pfit_size'
>>
>> Why? What's wrong with the current code?
>
> pfit_size is the panel fitter output size, requested_mode is the input
> size (pipesrc).

If we use adjusted_mode we'll get the panel's native mode, if we use
requested_mode we'll get the mode seen by xrandr. My interpretation by
checking Haswell Watermarks Calculator and the BSpec description is
that I should use adjusted_mode. And I'm already using pfit_size, so
no need to discuss it. Besides this, I believe I have implemented all
the other suggestions.

Thanks for the review,
Paulo

>
>>
>>
>> >
>> >> +             hscale = (hscale < 1000) ? 1000 : hscale;
>> >> +             vscale = (vscale < 1000) ? 1000 : vscale;
>> >> +             totscale = hscale * vscale / 1000;
>> >> +             pixel_rate = pixel_rate * totscale / 1000;
>> >
>> > This whole things seems to lose quite of bit of precision. There are a few
>> > more bits available if we want to keep it in 32bit land, but maybe just
>> > doing the obvious 64bit thing would be better.
>>
>> I try to avoid 64 bit division because it fails to compile on 32 bit
>> machines.
>
> Obviously you need to use the correct macro for the div.
>
>> Also, we write all the values to 32-bit registers. And the
>> precision used here is even higher than the one used on the examples
>> in BSpec. But I can increase it, no problem.
>
> I'm generally too lazy to think through how much precision would be
> acceptable to avoid getting the wrong answer in the end. Hence 64bit
> maths could be the easy choice.
>
> In any case I'd go with unsigned and use shifts/power of two multipliers
> for the fixed point math. The compiler could then optimize the
> divisions away.
>
>>
>>
>> >
>> >> +     }
>> >> +
>> >> +     return pixel_rate;
>> >> +}
>> >> +
>> >> +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
>> >> +{
>> >> +     int ret;
>> >> +
>> >> +     ret = pixel_rate * bytes_per_pixel * latency;
>> >
>> > This isn't quite overflow safe. latency max is "0x1ff*5" -> 12 bits,
>> > bytes_per_pixel could be as high as 8 (when/if we enable 64bit formats)
>> > -> another 4 bits, which leaves only 16 bits for pixel_rate (assuming we
>> > make this all unsigned, which we should).
>>
>> Will fix.
>>
>>
>> >
>> >> +     ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
>> >> +     return ret;
>> >> +}
>> >> +
>> >> +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
>> >> +                       int bytes_per_pixel, int latency)
>> >> +{
>> >> +     int ret;
>> >> +
>> >> +     ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
>> >> +     ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
>> >> +     ret = DIV_ROUND_UP(ret, 64) + 2;
>> >
>> > Maybe we should just go for 64bit math for these guys as well?
>>
>> I prefer to keep it as 32bit since there are divisions and also all
>> these values should be written in 32bit registers.
>>
>>
>> >
>> >> +     return ret;
>> >> +}
>> >> +
>> >> +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
>> >> +{
>> >> +     return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
>> >> +}
>> >> +
>> >> +struct hsw_pipe_wm_parameters {
>> >> +     bool active;
>> >> +     bool sprite_enabled;
>> >> +     int pipe_htotal;
>> >> +     int pixel_rate;
>> >> +     int pri_bytes_per_pixel;
>> >> +     int spr_bytes_per_pixel;
>> >> +     int cur_bytes_per_pixel;
>> >> +     int pri_horiz_pixels;
>> >> +     int spr_horiz_pixels;
>> >> +     int cur_horiz_pixels;
>> >> +};
>> >> +
>> >> +struct hsw_wm_maximums {
>> >> +     int pri;
>> >> +     int spr;
>> >> +     int cur;
>> >> +     int fbc;
>> >> +};
>> >> +
>> >> +struct hsw_lp_wm_result {
>> >> +     bool enable;
>> >> +     bool fbc_enable;
>> >> +     int pri_val;
>> >> +     int spr_val;
>> >> +     int cur_val;
>> >> +     int fbc_val;
>> >> +};
>> >
>> > All this stuff can be unsigned too.
>>
>> But these values are always numbers between 0 and 768 on Haswell, so
>> we're very safe using int.
>
> Safe, but not self-documenting. I was also thinking we might consider
> saving some bytes and going w/ uint16_t or something.
>
>>
>>
>> >
>> >> +
>> >> +struct hsw_wm_values {
>> >> +     uint32_t wm_pipe[3];
>> >> +     uint32_t wm_lp[3];
>> >> +     uint32_t wm_lp_spr[3];
>> >> +     uint32_t wm_linetime[3];
>> >> +     bool enable_fbc_wm;
>> >> +};
>> >> +
>> >> +static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
>> >> +                           struct hsw_pipe_wm_parameters *params,
>> >> +                           struct hsw_lp_wm_result *result)
>> >> +{
>> >> +     enum pipe pipe;
>> >> +     int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
>> >> +     int method1, method2;
>> >> +
>> >> +     for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
>> >> +             struct hsw_pipe_wm_parameters *p = &params[pipe];
>> >> +
>> >> +             if (p->active) {
>> >> +                     /* TODO: for now, assume the primary plane is always
>> >> +                      * enabled. */
>> >> +                     method1 = hsw_wm_method1(p->pixel_rate,
>> >> +                                              p->pri_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     method2 = hsw_wm_method2(p->pixel_rate,
>> >> +                                              p->pipe_htotal,
>> >> +                                              p->pri_horiz_pixels,
>> >> +                                              p->pri_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     pri_val[pipe] = min(method1, method2);
>> >> +
>> >> +                     if (p->sprite_enabled) {
>> >> +                             method1 = hsw_wm_method1(p->pixel_rate,
>> >> +                                                      p->spr_bytes_per_pixel,
>> >> +                                                      mem_value);
>> >> +                             method2 = hsw_wm_method2(p->pixel_rate,
>> >> +                                                      p->pipe_htotal,
>> >> +                                                      p->spr_horiz_pixels,
>> >> +                                                      p->spr_bytes_per_pixel,
>> >> +                                                      mem_value);
>> >> +                             spr_val[pipe] = min(method1, method2);
>> >> +                     } else {
>> >> +                             spr_val[pipe] = 0;
>> >> +                     }
>> >> +
>> >> +                     cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
>> >> +                                                    p->pipe_htotal,
>> >> +                                                    p->cur_horiz_pixels,
>> >> +                                                    p->cur_bytes_per_pixel,
>> >> +                                                    mem_value);
>> >> +                     fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
>> >> +                                                p->pri_horiz_pixels,
>> >> +                                                p->pri_bytes_per_pixel);
>> >> +             } else {
>> >> +                     pri_val[pipe] = 0;
>> >> +                     spr_val[pipe] = 0;
>> >> +                     cur_val[pipe] = 0;
>> >> +                     fbc_val[pipe] = 0;
>> >> +             }
>> >> +     }
>> >> +
>> >> +     result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
>> >> +     result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
>> >> +     result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
>> >> +     result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
>> >> +
>> >> +     if (result->fbc_val > max->fbc) {
>> >> +             result->fbc_enable = false;
>> >> +             result->fbc_val = 0;
>> >> +     } else {
>> >> +             result->fbc_enable = true;
>> >> +     }
>> >> +
>> >> +     result->enable = result->pri_val <= max->pri &&
>> >> +                      result->spr_val <= max->spr &&
>> >> +                      result->cur_val <= max->cur;
>> >> +}
>> >> +
>> >> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
>> >> +                                 int mem_value, enum pipe pipe,
>> >> +                                 struct hsw_pipe_wm_parameters *params)
>> >> +{
>> >> +     int pri_val, cur_val, spr_val, method1, method2;
>> >> +
>> >> +     if (params->active) {
>> >> +             /* TODO: for now, assume the primary plane is always enabled. */
>> >> +             pri_val = hsw_wm_method1(params->pixel_rate,
>> >> +                                      params->pri_bytes_per_pixel,
>> >> +                                      mem_value);
>> >> +
>> >> +             if (params->sprite_enabled) {
>> >> +                     method1 = hsw_wm_method1(params->pixel_rate,
>> >> +                                              params->spr_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     method2 = hsw_wm_method2(params->pixel_rate,
>> >> +                                              params->pipe_htotal,
>> >> +                                              params->spr_horiz_pixels,
>> >> +                                              params->spr_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     spr_val = min(method1, method2);
>> >> +             } else {
>> >> +                     spr_val = 0;
>> >> +             }
>> >> +
>> >> +             cur_val = hsw_wm_method2(params->pixel_rate,
>> >> +                                      params->pipe_htotal,
>> >> +                                      params->cur_horiz_pixels,
>> >> +                                      params->cur_bytes_per_pixel,
>> >> +                                      mem_value);
>> >> +     } else {
>> >> +             pri_val = 0;
>> >> +             spr_val = 0;
>> >> +             cur_val = 0;
>> >> +     }
>> >> +
>> >> +     WARN(pri_val > 127,
>> >> +          "Primary WM error, mode not supported for pipe %c\n",
>> >> +          pipe_name(pipe));
>> >> +     WARN(spr_val > 127,
>> >> +          "Sprite WM error, mode not supported for pipe %c\n",
>> >> +          pipe_name(pipe));
>> >> +     WARN(cur_val > 63,
>> >> +          "Cursor WM error, mode not supported for pipe %c\n",
>> >> +          pipe_name(pipe));
>> >> +
>> >> +     return (pri_val << WM0_PIPE_PLANE_SHIFT) |
>> >> +            (spr_val << WM0_PIPE_SPRITE_SHIFT) |
>> >> +            cur_val;
>> >> +}
>> >> +
>> >> +static uint32_t
>> >> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>> >>  {
>> >>       struct drm_i915_private *dev_priv = dev->dev_private;
>> >>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> >> -     enum pipe pipe = intel_crtc->pipe;
>> >>       struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
>> >>       int target_clock;
>> >>       u32 linetime, ips_linetime;
>> >>
>> >> -     if (!intel_crtc_active(crtc)) {
>> >> -             I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
>> >> -             return;
>> >> -     }
>> >> +     if (!intel_crtc_active(crtc))
>> >> +             return 0;
>> >>
>> >>       if (intel_crtc->config.pixel_target_clock)
>> >>               target_clock = intel_crtc->config.pixel_target_clock;
>> >> @@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>> >>       ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
>> >>                                        intel_ddi_get_cdclk_freq(dev_priv));
>> >>
>> >> -     I915_WRITE(PIPE_WM_LINETIME(pipe),
>> >> -                PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
>> >> -                PIPE_WM_LINETIME_TIME(linetime));
>> >> +     return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
>> >> +            PIPE_WM_LINETIME_TIME(linetime);
>> >>  }
>> >>
>> >> -static void haswell_update_wm(struct drm_device *dev)
>> >> +static void hsw_compute_wm_parameters(struct drm_device *dev,
>> >> +                                   struct hsw_pipe_wm_parameters *params,
>> >> +                                   uint32_t *wm,
>> >> +                                   struct hsw_wm_maximums *lp_max_1_2,
>> >> +                                   struct hsw_wm_maximums *lp_max_5_6)
>> >>  {
>> >>       struct drm_i915_private *dev_priv = dev->dev_private;
>> >>       struct drm_crtc *crtc;
>> >> +     struct drm_plane *plane;
>> >> +     uint64_t sskpd = I915_READ64(MCH_SSKPD);
>> >>       enum pipe pipe;
>> >> +     int pipes_active = 0, sprites_enabled = 0;
>> >>
>> >> -     /* Disable the LP WMs before changine the linetime registers. This is
>> >> -      * just a temporary code that will be replaced soon. */
>> >> -     I915_WRITE(WM3_LP_ILK, 0);
>> >> -     I915_WRITE(WM2_LP_ILK, 0);
>> >> -     I915_WRITE(WM1_LP_ILK, 0);
>> >> +     if ((sskpd >> 56) & 0xFF)
>> >> +             wm[0] = (sskpd >> 56) & 0xFF;
>> >> +     else
>> >> +             wm[0] = sskpd & 0xF;
>> >> +     wm[1] = ((sskpd >> 4) & 0xFF) * 5;
>> >> +     wm[2] = ((sskpd >> 12) & 0xFF) * 5;
>> >> +     wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
>> >> +     wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
>> >> +
>> >> +     for_each_pipe(pipe) {
>> >> +             struct intel_crtc *intel_crtc;
>> >> +
>> >> +             crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>> >> +             intel_crtc = to_intel_crtc(crtc);
>> >> +
>> >> +             params[pipe].active = intel_crtc_active(crtc);
>> >> +             if (!params[pipe].active)
>> >> +                     continue;
>> >> +
>> >> +             pipes_active++;
>> >> +
>> >> +             params[pipe].pipe_htotal =
>> >> +                     intel_crtc->config.adjusted_mode.htotal;
>> >> +             params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
>> >> +             params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
>> >> +             params[pipe].cur_bytes_per_pixel = 4;
>> >> +             params[pipe].pri_horiz_pixels =
>> >> +                     intel_crtc->config.adjusted_mode.hdisplay;
>> >> +             params[pipe].cur_horiz_pixels = 64;
>> >> +     }
>> >> +
>> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
>> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
>> >> +
>> >> +             pipe = intel_plane->pipe;
>> >> +             params[pipe].sprite_enabled = intel_plane->wm.enable;
>> >> +             params[pipe].spr_bytes_per_pixel =
>> >> +                     intel_plane->wm.bytes_per_pixel;
>> >> +             params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
>> >> +
>> >> +             if (params[pipe].sprite_enabled)
>> >> +                     sprites_enabled++;
>> >> +     }
>> >> +
>> >> +     if (pipes_active > 1) {
>> >> +             lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
>> >> +             lp_max_1_2->spr = lp_max_5_6->spr = 128;
>> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 64;
>> >> +     } else {
>> >> +             lp_max_1_2->pri = sprites_enabled ? 384 : 768;
>> >> +             lp_max_5_6->pri = sprites_enabled ? 128 : 768;
>> >> +             lp_max_1_2->spr = 384;
>> >> +             lp_max_5_6->spr = 640;
>> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 255;
>> >> +     }
>> >> +     lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
>> >> +}
>> >> +
>> >> +static void hsw_compute_wm_results(struct drm_device *dev,
>> >> +                                struct hsw_pipe_wm_parameters *params,
>> >> +                                uint32_t *wm,
>> >> +                                struct hsw_wm_maximums *lp_maximums,
>> >> +                                struct hsw_wm_values *results)
>> >> +{
>> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
>> >> +     struct drm_crtc *crtc;
>> >> +     struct hsw_lp_wm_result lp_results[4];
>> >> +     enum pipe pipe;
>> >> +     int i;
>> >> +
>> >> +     hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
>> >> +     hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
>> >> +     hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
>> >> +     hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
>> >> +
>> >> +     /* The spec says it is preferred to disable FBC WMs instead of disabling
>> >> +      * a WM level. */
>> >> +     results->enable_fbc_wm = true;
>> >> +     for (i = 0; i < 4; i++) {
>> >> +             if (lp_results[i].enable && !lp_results[i].fbc_enable) {
>> >> +                     results->enable_fbc_wm = false;
>> >> +                     break;
>> >> +             }
>> >> +     }
>> >> +
>> >> +     if (lp_results[3].enable) {
>> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
>> >> +                                               lp_results[3].pri_val,
>> >> +                                               lp_results[3].cur_val);
>> >> +             results->wm_lp_spr[2] = lp_results[3].spr_val;
>> >> +     } else if (lp_results[2].enable) {
>> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
>> >> +                                               lp_results[2].pri_val,
>> >> +                                               lp_results[2].cur_val);
>> >> +             results->wm_lp_spr[2] = lp_results[2].spr_val;
>> >> +     } else {
>> >> +             results->wm_lp[2] = 0;
>> >> +             results->wm_lp_spr[2] = 0;
>> >> +     }
>> >> +
>> >> +     if (lp_results[3].enable && lp_results[2].enable) {
>> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
>> >> +                                               lp_results[2].pri_val,
>> >> +                                               lp_results[2].cur_val);
>> >> +             results->wm_lp_spr[1] = lp_results[2].spr_val;
>> >> +     } else if (!lp_results[3].enable && lp_results[1].enable) {
>> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
>> >> +                                               lp_results[1].pri_val,
>> >> +                                               lp_results[1].cur_val);
>> >> +             results->wm_lp_spr[1] = lp_results[1].spr_val;
>> >> +     } else {
>> >> +             results->wm_lp[1] = 0;
>> >> +             results->wm_lp_spr[1] = 0;
>> >> +     }
>> >> +
>> >> +     if (lp_results[0].enable) {
>> >> +             results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
>> >> +                                               lp_results[0].pri_val,
>> >> +                                               lp_results[0].cur_val);
>> >> +             results->wm_lp_spr[0] = lp_results[0].spr_val;
>> >> +     } else {
>> >> +             results->wm_lp[0] = 0;
>> >> +             results->wm_lp_spr[0] = 0;
>> >> +     }
>> >> +
>> >> +     for_each_pipe(pipe)
>> >> +             results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
>> >> +                                                          pipe,
>> >> +                                                          &params[pipe]);
>> >>
>> >>       for_each_pipe(pipe) {
>> >>               crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>> >> -             haswell_update_linetime_wm(dev, crtc);
>> >> +             results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
>> >> +     }
>> >> +}
>> >> +
>> >> +/* Find the result with the highest level enabled. Check for enable_fbc_wm in
>> >> + * case both are at the same level. Prefer r1 in case they're the same. */
>> >> +struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
>> >> +                                        struct hsw_wm_values *r2)
>> >> +{
>> >> +     int i, val_r1 = 0, val_r2 = 0;
>> >> +
>> >> +     for (i = 0; i < 3; i++) {
>> >> +             if (r1->wm_lp[i] & WM3_LP_EN)
>> >> +                     val_r1 |= (1 << i);
>> >> +             if (r2->wm_lp[i] & WM3_LP_EN)
>> >> +                     val_r2 |= (1 << i);
>> >> +     }
>> >> +
>> >> +     if (val_r1 == val_r2) {
>> >> +             if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
>> >> +                     return r2;
>> >> +             else
>> >> +                     return r1;
>> >> +     } else if (val_r1 > val_r2) {
>> >> +             return r1;
>> >> +     } else {
>> >> +             return r2;
>> >> +     }
>> >> +}
>> >> +
>> >> +/*
>> >> + * The spec says we shouldn't write when we don't need, because every write
>> >> + * causes WMs to be re-evaluated, expending some power.
>> >> + */
>> >> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
>> >> +                             struct hsw_wm_values *results)
>> >> +{
>> >> +     struct hsw_wm_values previous;
>> >> +     uint32_t val;
>> >> +
>> >> +     val = I915_READ(DISP_ARB_CTL);
>> >> +     if (results->enable_fbc_wm)
>> >> +             val &= ~DISP_FBC_WM_DIS;
>> >> +     else
>> >> +             val |= DISP_FBC_WM_DIS;
>> >> +     I915_WRITE(DISP_ARB_CTL, val);
>> >> +
>> >> +     previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
>> >> +     previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
>> >> +     previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
>> >> +     previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
>> >> +     previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
>> >> +     previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
>> >> +     previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
>> >> +     previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
>> >> +     previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
>> >> +     previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
>> >> +     previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
>> >> +     previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
>> >> +
>> >> +     if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
>> >> +         memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
>> >> +         memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
>> >> +         memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
>> >> +             return;
>> >> +
>> >> +     if (previous.wm_lp[2] != 0)
>> >> +             I915_WRITE(WM3_LP_ILK, 0);
>> >> +     if (previous.wm_lp[1] != 0)
>> >> +             I915_WRITE(WM2_LP_ILK, 0);
>> >> +     if (previous.wm_lp[0] != 0)
>> >> +             I915_WRITE(WM1_LP_ILK, 0);
>> >> +
>> >> +     if (previous.wm_pipe[0] != results->wm_pipe[0])
>> >> +             I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
>> >> +     if (previous.wm_pipe[1] != results->wm_pipe[1])
>> >> +             I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
>> >> +     if (previous.wm_pipe[2] != results->wm_pipe[2])
>> >> +             I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
>> >> +
>> >> +     if (previous.wm_linetime[0] != results->wm_linetime[0])
>> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
>> >> +     if (previous.wm_linetime[1] != results->wm_linetime[1])
>> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
>> >> +     if (previous.wm_linetime[2] != results->wm_linetime[2])
>> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
>> >> +
>> >> +     if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
>> >> +             I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
>> >> +     if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
>> >> +             I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
>> >> +     if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
>> >> +             I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
>> >> +
>> >> +     if (results->wm_lp[0] != 0)
>> >> +             I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
>> >> +     if (results->wm_lp[1] != 0)
>> >> +             I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
>> >> +     if (results->wm_lp[2] != 0)
>> >> +             I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
>> >> +}
>> >> +
>> >> +static void haswell_update_wm(struct drm_device *dev)
>> >> +{
>> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
>> >> +     struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
>> >> +     struct hsw_pipe_wm_parameters params[3];
>> >> +     struct hsw_wm_values results_1_2, results_5_6, *best_results;
>> >> +     uint32_t wm[5];
>> >> +
>> >> +     hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
>> >> +
>> >> +     hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
>> >> +     if (lp_max_1_2.pri != lp_max_5_6.pri) {
>> >> +             hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
>> >> +                                    &results_5_6);
>> >> +             best_results = hsw_find_best_result(&results_1_2, &results_5_6);
>> >
>> > You don't seem to actually write WM_MISC with the select partititioning
>> > value.
>>
>> Oops, my bug.
>>
>>
>> > Also I'm not at all sure when WM_MISC gets updated.
>>
>> WM paritioning is only related to the LP watermarks, so I imagine it
>> should be safe to change this when the LP watermarks are disabled.
>
> We still don't know if it's double buffered or not.
>
>>
>>
>> > The IVB and
>> > older ARB_CTL2 DDB partitioning bit was double buffered on the LP pipe,
>> > but on HSW it's not documented. Maybe it's just a bad idea to try and
>> > change the partitioning when multiple pipes are enabled. We still need
>> > to figure out if it's double buffered though...
>>
>> The partitioning is only useful when there's a single pipe enabled. If
>> we have multiple pipes, then the maximums will be the same as in the
>> 1/2 case, so we'll choose to use 1/2 partitioning.
>
> You mean the maximums documented in the Bspec. I'm assuming those are
> just the computed values based on the total FIFO size, number of pipes,
> and the split. I guess they never expected the 5/6 split to be used w/
> multiple pipes, so the resulting FIFO sizes are not explicitly
> documented there.
>
> In my patch I compute those values instead of requiring such a list of
> hardcoded values. So my code could give you the 5/6 value w/ multiple
> pipes.
>
> Or I could be wrong, and the hardware might ignore the 5/6 split setting
> with multiple pipes, or it might fall over. Who knows. Unfortunately
> such detauls are not really spelled out in the spec.
>
>>
>>
>> >
>> > BSpec also says that the 5/6 split should only be selected when FBC is
>> > enabled. Not sure if that's a real HW constraint or not. I would
>> > definately want to enable it when the primary plane is disabled for
>> > example.
>>
>> Yeah, it totally makes sense when the primary is disabled. We should
>> ask for clarification regarding this.
>>
>> Thanks for the review!
>>
>> >
>> >> +     } else {
>> >> +             best_results = &results_1_2;
>> >>       }
>> >>
>> >> -     sandybridge_update_wm(dev);
>> >> +     hsw_write_wm_values(dev_priv, best_results);
>> >> +}
>> >> +
>> >> +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
>> >> +                                  uint32_t sprite_width, int pixel_size)
>> >> +{
>> >> +     struct drm_plane *plane;
>> >> +
>> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
>> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
>> >> +
>> >> +             if (intel_plane->pipe == pipe) {
>> >> +                     intel_plane->wm.enable = true;
>> >> +                     intel_plane->wm.horiz_pixels = sprite_width + 1;
>> >> +                     intel_plane->wm.bytes_per_pixel = pixel_size;
>> >> +                     break;
>> >> +             }
>> >> +     }
>> >> +
>> >> +     haswell_update_wm(dev);
>> >>  }
>> >>
>> >>  static bool
>> >> @@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
>> >>               } else if (IS_HASWELL(dev)) {
>> >>                       if (I915_READ64(MCH_SSKPD)) {
>> >>                               dev_priv->display.update_wm = haswell_update_wm;
>> >> -                             dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
>> >> +                             dev_priv->display.update_sprite_wm =
>> >> +                                     haswell_update_sprite_wm;
>> >>                       } else {
>> >>                               DRM_DEBUG_KMS("Failed to read display plane latency. "
>> >>                                             "Disable CxSR\n");
>> >> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
>> >> index 19b9cb9..95b39ef 100644
>> >> --- a/drivers/gpu/drm/i915/intel_sprite.c
>> >> +++ b/drivers/gpu/drm/i915/intel_sprite.c
>> >> @@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
>> >>
>> >>       dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
>> >>
>> >> +     if (IS_HASWELL(dev))
>> >> +             intel_plane->wm.enable = false;
>> >> +
>> >>       /* potentially re-enable LP watermarks */
>> >> -     if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
>> >> +     if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
>> >> +         IS_HASWELL(dev))
>> >>               intel_update_watermarks(dev);
>> >>  }
>> >>
>> >> --
>> >> 1.7.10.4
>> >>
>> >> _______________________________________________
>> >> Intel-gfx mailing list
>> >> Intel-gfx at lists.freedesktop.org
>> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> >
>> > --
>> > Ville Syrjälä
>> > Intel OTC
>>
>>
>>
>> --
>> Paulo Zanoni
>
> --
> Ville Syrjälä
> Intel OTC

-- 
Paulo Zanoni