[Mesa-dev] [PATCH V2 3/4] intel: Add multisample scaled blitting in blorp engine

Wed May 29 15:59:13 PDT 2013

On Wed, May 29, 2013 at 10:06 AM, Anuj Phogat <anuj.phogat at gmail.com> wrote:
> On Fri, May 24, 2013 at 12:01 PM, Paul Berry <stereotype441 at gmail.com> wrote:
>> On 16 May 2013 11:44, Anuj Phogat <anuj.phogat at gmail.com> wrote:
>>>
>>> In traditional multisampled framebuffer rendering, color samples must be
>>> explicitly resolved via BlitFramebuffer before doing the scaled blitting
>>> of the framebuffer. So, scaled blitting of a multisample framebuffer
>>> takes two separate calls to BlitFramebuffer.
>>>
>>> This patch implements the functionality of doing multisampled scaled
>>> resolve using just one BlitFramebuffer call. Important changes involved
>>> in this patch are listed below:
>>>     - Use float registers to scale and offset texture coordinates.
>>>     - Change offset computation to consider float coordinates.
>>>     - Round the scaled coordinates down to nearest integer.
>>>     - Modify src texture coordinates clipping to account for scaling..
>>>     - Linear filter is not yet implemented in blorp. So, don't use
>>>       blorp engine to do single sampled scaled blitting.
>>>
>>> Note: Observed no piglit regressions on sandybridge & ivybridge with
>>> these changes.
>>>
>>> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_blorp.h          |  23 ++--
>>>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp   | 143
>>> +++++++++++++++----------
>>>  src/mesa/drivers/dri/i965/brw_reg.h            |   7 --
>>>  src/mesa/drivers/dri/intel/intel_mipmap_tree.c |   2 +
>>>  4 files changed, 102 insertions(+), 73 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
>>> b/src/mesa/drivers/dri/i965/brw_blorp.h
>>> index 70e3933..a40324b 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
>>> @@ -40,9 +40,10 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
>>>                          unsigned src_level, unsigned src_layer,
>>>                          struct intel_mipmap_tree *dst_mt,
>>>                          unsigned dst_level, unsigned dst_layer,
>>> -                        int src_x0, int src_y0,
>>> -                        int dst_x0, int dst_y0,
>>> -                        int dst_x1, int dst_y1,
>>> +                        float src_x0, float src_y0,
>>> +                        float src_x1, float src_y1,
>>> +                        float dst_x0, float dst_y0,
>>> +                        float dst_x1, float dst_y1,
>>>                          bool mirror_x, bool mirror_y);
>>>
>>>  bool
>>> @@ -158,11 +159,11 @@ public:
>>>
>>>  struct brw_blorp_coord_transform_params
>>>  {
>>> -   void setup(GLuint src0, GLuint dst0, GLuint dst1,
>>> +   void setup(GLfloat src0, GLfloat src1, GLfloat dst0, GLfloat dst1,
>>>                bool mirror);
>>>
>>> -   int32_t multiplier;
>>> -   int32_t offset;
>>> +   float multiplier;
>>> +   float offset;
>>>  };
>>>
>>>
>>> @@ -304,6 +305,9 @@ struct brw_blorp_blit_prog_key
>>>      * than one sample per pixel.
>>>      */
>>>     bool persample_msaa_dispatch;
>>> +
>>> +   /* True for scaled blitting. */
>>> +   bool blit_scaled;
>>>  };
>>>
>>>  class brw_blorp_blit_params : public brw_blorp_params
>>> @@ -314,9 +318,10 @@ public:
>>>                           unsigned src_level, unsigned src_layer,
>>>                           struct intel_mipmap_tree *dst_mt,
>>>                           unsigned dst_level, unsigned dst_layer,
>>> -                         GLuint src_x0, GLuint src_y0,
>>> -                         GLuint dst_x0, GLuint dst_y0,
>>> -                         GLuint width, GLuint height,
>>> +                         GLfloat src_x0, GLfloat src_y0,
>>> +                         GLfloat src_x1, GLfloat src_y1,
>>> +                         GLfloat dst_x0, GLfloat dst_y0,
>>> +                         GLfloat dst_x1, GLfloat dst_y1,
>>>                           bool mirror_x, bool mirror_y);
>>>
>>>     virtual uint32_t get_wm_prog(struct brw_context *brw,
>>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>>> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>>> index b7ee92b..19169ef 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>>> @@ -41,11 +41,11 @@
>>>   * If coord0 > coord1, swap them and invert the "mirror" boolean.
>>>   */
>>>  static inline void
>>> -fixup_mirroring(bool &mirror, GLint &coord0, GLint &coord1)
>>> +fixup_mirroring(bool &mirror, GLfloat &coord0, GLfloat &coord1)
>>>  {
>>>     if (coord0 > coord1) {
>>>        mirror = !mirror;
>>> -      GLint tmp = coord0;
>>> +      GLfloat tmp = coord0;
>>>        coord0 = coord1;
>>>        coord1 = tmp;
>>>     }
>>> @@ -67,9 +67,10 @@ fixup_mirroring(bool &mirror, GLint &coord0, GLint
>>> &coord1)
>>>   * coordinates, by swapping the roles of src and dst.
>>>   */
>>>  static inline bool
>>> -clip_or_scissor(bool mirror, GLint &src_x0, GLint &src_x1, GLint &dst_x0,
>>> -                GLint &dst_x1, GLint fb_xmin, GLint fb_xmax)
>>> +clip_or_scissor(bool mirror, GLfloat &src_x0, GLfloat &src_x1, GLfloat
>>> &dst_x0,
>>> +                GLfloat &dst_x1, GLfloat fb_xmin, GLfloat fb_xmax)
>>>  {
>>> +   float scale = (float) (src_x1 - src_x0) / (dst_x1 - dst_x0);
>>>     /* If we are going to scissor everything away, stop. */
>>>     if (!(fb_xmin < fb_xmax &&
>>>           dst_x0 < fb_xmax &&
>>> @@ -105,8 +106,8 @@ clip_or_scissor(bool mirror, GLint &src_x0, GLint
>>> &src_x1, GLint &dst_x0,
>>>     /* Adjust the source rectangle to remove the pixels corresponding to
>>> those
>>>      * that were clipped/scissored out of the destination rectangle.
>>>      */
>>> -   src_x0 += pixels_clipped_left;
>>> -   src_x1 -= pixels_clipped_right;
>>> +   src_x0 += pixels_clipped_left * scale;
>>> +   src_x1 -= pixels_clipped_right * scale;
>>>
>>>     return true;
>>>  }
>>> @@ -127,9 +128,10 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
>>>                          unsigned src_level, unsigned src_layer,
>>>                          struct intel_mipmap_tree *dst_mt,
>>>                          unsigned dst_level, unsigned dst_layer,
>>> -                        int src_x0, int src_y0,
>>> -                        int dst_x0, int dst_y0,
>>> -                        int dst_x1, int dst_y1,
>>> +                        float src_x0, float src_y0,
>>> +                        float src_x1, float src_y1,
>>> +                        float dst_x0, float dst_y0,
>>> +                        float dst_x1, float dst_y1,
>>>                          bool mirror_x, bool mirror_y)
>>>  {
>>>     intel_miptree_slice_resolve_depth(intel, src_mt, src_level,
>>> src_layer);
>>> @@ -139,6 +141,7 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
>>>                                  src_mt, src_level, src_layer,
>>>                                  dst_mt, dst_level, dst_layer,
>>>                                  src_x0, src_y0,
>>> +                                src_x1, src_y1,
>>>                                  dst_x0, dst_y0,
>>>                                  dst_x1, dst_y1,
>>>                                  mirror_x, mirror_y);
>>> @@ -151,8 +154,8 @@ static void
>>>  do_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit,
>>>                struct intel_renderbuffer *src_irb,
>>>                struct intel_renderbuffer *dst_irb,
>>> -              GLint srcX0, GLint srcY0,
>>> -              GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
>>> +              GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
>>> +              GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
>>>                bool mirror_x, bool mirror_y)
>>>  {
>>>     /* Find source/dst miptrees */
>>> @@ -163,7 +166,8 @@ do_blorp_blit(struct intel_context *intel, GLbitfield
>>> buffer_bit,
>>>     brw_blorp_blit_miptrees(intel,
>>>                             src_mt, src_irb->mt_level, src_irb->mt_layer,
>>>                             dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
>>> -                           srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
>>> +                           srcX0, srcY0, srcX1, srcY1,
>>> +                           dstX0, dstY0, dstX1, dstY1,
>>>                             mirror_x, mirror_y);
>>>
>>>     intel_renderbuffer_set_needs_downsample(dst_irb);
>>> @@ -203,8 +207,8 @@ formats_match(GLbitfield buffer_bit, struct
>>> intel_renderbuffer *src_irb,
>>>
>>>  static bool
>>>  try_blorp_blit(struct intel_context *intel,
>>> -               GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
>>> -               GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
>>> +               GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat
>>> srcY1,
>>> +               GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat
>>> dstY1,
>>>                 GLenum filter, GLbitfield buffer_bit)
>>>  {
>>>     struct gl_context *ctx = &intel->ctx;
>>> @@ -224,9 +228,13 @@ try_blorp_blit(struct intel_context *intel,
>>>     fixup_mirroring(mirror_y, srcY0, srcY1);
>>>     fixup_mirroring(mirror_y, dstY0, dstY1);
>>>
>>> -   /* Make sure width and height match */
>>> -   if (srcX1 - srcX0 != dstX1 - dstX0) return false;
>>> -   if (srcY1 - srcY0 != dstY1 - dstY0) return false;
>>> +   /* Linear filtering is not yet implemented in blorp. So, do not use
>>> blorp
>>> +    * engine for single sampled scaled blits.
>>> +    */
>>> +   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
>>> +        srcY1 - srcY0 != dstY1 - dstY0) &&
>>> +       read_fb->Visual.samples == 0)
>>
>>
>> It's a little strange that your comment mentions linear filtering but the
>> code doesn't.  How about this instead (I believe it's equivalent)?
>>
>> if ((srcX1 - srcX0 != dstX1 - dstX0 ||
>>      srcY1 - srcY0 != dstY1 - dstY0) &&
>>      filter == GL_LINEAR)
>>
> Yeah. This is what I intended to write initially. But making this change causes
> a piglit subtest fail.
> ./bin/fbo-blit-stretch -auto
> 45x79 (13, 33)-(30, 44) => 200x150 (19, 23)-(87, 67) stretch_x stretch_y nearest
> Probe at (55,23)
>   Expected: 0.000000 1.000000 0.000000
>   Observed: 1.000000 0.000000 0.000000
>
> Test seems to fail at the edge pixel. Other sub tests with stretch_x stretch_y
> nearest passes. This sub test looks too strict at the edges.
> No failures are observed in gles3 CTS with above change.
> I have three options now:
> 1. Hold the patch and investigate the failing piglit test case.
> 2. Push the patch without suggested change.
> 3. Relying on gles3 CTS results, push the patch with suggested change.
> I'm little inclined towards 3. What do you think?
>
I missed a failure in es3 conform
framebuffer_blit_functionality_magnifying_blit.test :(
It looks like a genuine bug in my implementation. I'll investigate it
before pushing the patch.

>>>
>>> +      return false;
>>>
>>>     /* If the destination rectangle needs to be clipped or scissored, do
>>> so.
>>>      */
>>> @@ -278,7 +286,8 @@ try_blorp_blit(struct intel_context *intel,
>>>           dst_irb =
>>> intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
>>>          if (dst_irb)
>>>              do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0,
>>> srcY0,
>>> -                          dstX0, dstY0, dstX1, dstY1, mirror_x,
>>> mirror_y);
>>> +                          srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>>> +                          mirror_x, mirror_y);
>>>        }
>>>        break;
>>>     case GL_DEPTH_BUFFER_BIT:
>>> @@ -289,7 +298,8 @@ try_blorp_blit(struct intel_context *intel,
>>>        if (!formats_match(buffer_bit, src_irb, dst_irb))
>>>           return false;
>>>        do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>>> -                    dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
>>> +                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>>> +                    mirror_x, mirror_y);
>>>        break;
>>>     case GL_STENCIL_BUFFER_BIT:
>>>        src_irb =
>>> @@ -299,7 +309,8 @@ try_blorp_blit(struct intel_context *intel,
>>>        if (!formats_match(buffer_bit, src_irb, dst_irb))
>>>           return false;
>>>        do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>>> -                    dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
>>> +                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>>> +                    mirror_x, mirror_y);
>>>        break;
>>>     default:
>>>        assert(false);
>>> @@ -347,6 +358,7 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
>>>      */
>>>
>>>     int srcY1 = srcY0 + height;
>>> +   int srcX1 = srcX0 + width;
>>>     int dstX1 = dstX0 + width;
>>>     int dstY1 = dstY0 + height;
>>>
>>> @@ -364,7 +376,8 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
>>>     brw_blorp_blit_miptrees(intel,
>>>                             src_mt, src_irb->mt_level, src_irb->mt_layer,
>>>                             dst_mt, dst_image->Level, dst_image->Face,
>>> -                           srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
>>> +                           srcX0, srcY0, srcX1, srcY1,
>>> +                           dstX0, dstY0, dstX1, dstY1,
>>>                             false, mirror_y);
>>>
>>>     /* If we're copying to a packed depth stencil texture and the source
>>> @@ -380,7 +393,8 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
>>>        brw_blorp_blit_miptrees(intel,
>>>                                src_irb->mt, src_irb->mt_level,
>>> src_irb->mt_layer,
>>>                                dst_mt, dst_image->Level, dst_image->Face,
>>> -                              srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
>>> +                              srcX0, srcY0, srcX1, srcY1,
>>> +                              dstX0, dstY0, dstX1, dstY1,
>>>                                false, mirror_y);
>>>     }
>>>
>>> @@ -590,7 +604,7 @@ private:
>>>     void encode_msaa(unsigned num_samples, intel_msaa_layout layout);
>>>     void decode_msaa(unsigned num_samples, intel_msaa_layout layout);
>>>     void kill_if_outside_dst_rect();
>>> -   void translate_dst_to_src(unsigned intel_gen);
>>> +   void translate_dst_to_src();
>>>     void single_to_blend();
>>>     void manual_blend(unsigned num_samples);
>>>     void sample(struct brw_reg dst);
>>> @@ -772,7 +786,7 @@ brw_blorp_blit_program::compile(struct brw_context
>>> *brw,
>>>        kill_if_outside_dst_rect();
>>>
>>>     /* Next, apply a translation to obtain coordinates in the source
>>> image. */
>>> -   translate_dst_to_src(brw->intel.gen);
>>> +   translate_dst_to_src();
>>>
>>>     /* If the source image is not multisampled, then we want to fetch
>>> sample
>>>      * number 0, because that's the only sample there is.
>>> @@ -844,7 +858,7 @@ brw_blorp_blit_program::alloc_push_const_regs(int
>>> base_reg)
>>>  #define CONST_LOC(name) offsetof(brw_blorp_wm_push_constants, name)
>>>  #define ALLOC_REG(name) \
>>>     this->name = \
>>> -      brw_ud1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) /
>>> 4)
>>> +      brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) /
>>> 4)
>>>
>>>     ALLOC_REG(dst_x0);
>>>     ALLOC_REG(dst_x1);
>>> @@ -1322,29 +1336,37 @@ brw_blorp_blit_program::kill_if_outside_dst_rect()
>>>   * coordinates.
>>>   */
>>>  void
>>> -brw_blorp_blit_program::translate_dst_to_src(unsigned intel_gen)
>>> +brw_blorp_blit_program::translate_dst_to_src()
>>>  {
>>> +   struct brw_reg X_f = retype(X, BRW_REGISTER_TYPE_F);
>>> +   struct brw_reg Y_f = retype(Y, BRW_REGISTER_TYPE_F);
>>> +   struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
>>> +   struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
>>> +
>>>     brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
>>> -   /* For mul instruction:
>>> -    * On SNB when both src0 and src1 are of type D or UD, only the low 16
>>> bits
>>> -    * of each element of src0 are used.
>>> -    * On IVB when both src0 and src1 are of type D or UD, only the low 16
>>> bits
>>> -    * of each element of src1 are used.
>>> -    * multiplier can be positive or negative. So keep the multiplier in a
>>> src
>>> -    * register which don't get truncated during multiplication.
>>> -    */
>>> -   if (intel_gen == 6) {
>>> -      brw_MUL(&func, Xp, X, x_transform.multiplier);
>>> -      brw_MUL(&func, Yp, Y, y_transform.multiplier);
>>> +   /* Move the UD coordinates to float registers. */
>>> +   brw_MOV(&func, Xp_f, X);
>>> +   brw_MOV(&func, Yp_f, Y);
>>> +   /* Scale and offset */
>>> +   brw_MUL(&func, X_f, Xp_f, x_transform.multiplier);
>>> +   brw_MUL(&func, Y_f, Yp_f, y_transform.multiplier);
>>> +   brw_ADD(&func, X_f, X_f, x_transform.offset);
>>> +   brw_ADD(&func, Y_f, Y_f, y_transform.offset);
>>> +   if (key->blit_scaled) {
>>> +      /* Round the float coordinates down to nearest integer. */
>>> +      brw_RNDD(&func, Xp_f, X_f);
>>> +      brw_RNDD(&func, Yp_f, Y_f);
>>> +      /* Move the float coordinates back to UD registers. */
>>> +      brw_MOV(&func, X, Xp_f);
>>> +      brw_MOV(&func, Y, Yp_f);
>>>     }
>>>     else {
>>> -      brw_MUL(&func, Xp, x_transform.multiplier, X);
>>> -      brw_MUL(&func, Yp, y_transform.multiplier, Y);
>>> +      /* Move the float coordinates back to UD registers. */
>>> +      brw_MOV(&func, Xp, X_f);
>>> +      brw_MOV(&func, Yp, Y_f);
>>> +      SWAP_XY_AND_XPYP();
>>>     }
>>
>>
>> I don't believe key->blit_scaled is needed.  Both branches of the if
>> statement compute the same thing, since ((int) f) == ((int) RNDD(f)).  Let's
>> just use the code in the "else" block, since that's fewer instructions.
>>
> right. consider it fixed.
>>>
>>> -   brw_ADD(&func, Xp, Xp, x_transform.offset);
>>> -   brw_ADD(&func, Yp, Yp, y_transform.offset);
>>>     brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
>>> -   SWAP_XY_AND_XPYP();
>>>  }
>>>
>>>  /**
>>> @@ -1709,29 +1731,30 @@ brw_blorp_blit_program::render_target_write()
>>>
>>>
>>>  void
>>> -brw_blorp_coord_transform_params::setup(GLuint src0, GLuint dst0, GLuint
>>> dst1,
>>> +brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
>>> +                                        GLfloat dst0, GLfloat dst1,
>>>                                          bool mirror)
>>>  {
>>> +   float scale = (src1 - src0) / (dst1 - dst0);
>>>     if (!mirror) {
>>>        /* When not mirroring a coordinate (say, X), we need:
>>> -       *   x' - src_x0 = x - dst_x0
>>> +       *   x' - src_x0 = x - dst_x0 + 0.5
>>>         * Therefore:
>>> -       *   x' = 1*x + (src_x0 - dst_x0)
>>> +       *   x' = 1*x + (src_x0 - dst_x0 + 0.5)
>>>         */
>>> -      multiplier = 1;
>>> -      offset = (int) (src0 - dst0);
>>> +      multiplier = scale;
>>> +      offset = (src0 - dst0 + 0.5) * scale;
>>>     } else {
>>>        /* When mirroring X we need:
>>> -       *   x' - src_x0 = dst_x1 - x - 1
>>> +       *   x' - src_x0 = dst_x1 - x - 0.5
>>>         * Therefore:
>>> -       *   x' = -1*x + (src_x0 + dst_x1 - 1)
>>> +       *   x' = -1*x + (src_x0 + dst_x1 - 0.5)
>>>         */
>>> -      multiplier = -1;
>>> -      offset = src0 + dst1 - 1;
>>> +      multiplier = -scale;
>>> +      offset = (src0 + dst1 - 0.5) * scale;
>>>     }
>>>  }
>>
>>
>> It would be nice to have a comment in the above function saying why the
>> 0.5's are necessary.  If I'm not mistaken, the reason they are necessary is
>> that the blorp program uses "round toward zero" to convert the transformed
>> floating point coordinates to integer coordinates, whereas the behaviour we
>> actually want is "round to nearest", so 0.5 provides the necessary
>> correction.
>>
> I'll put this comment in the code.
>>>
>>>
>>> -
>>>  /**
>>>   * Determine which MSAA layout the GPU pipeline should be configured for,
>>>   * based on the chip generation, the number of samples, and the true
>>> layout of
>>> @@ -1766,9 +1789,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
>>> brw_context *brw,
>>>                                               unsigned src_level, unsigned
>>> src_layer,
>>>                                               struct intel_mipmap_tree
>>> *dst_mt,
>>>                                               unsigned dst_level, unsigned
>>> dst_layer,
>>> -                                             GLuint src_x0, GLuint
>>> src_y0,
>>> -                                             GLuint dst_x0, GLuint
>>> dst_y0,
>>> -                                             GLuint dst_x1, GLuint
>>> dst_y1,
>>> +                                             GLfloat src_x0, GLfloat
>>> src_y0,
>>> +                                             GLfloat src_x1, GLfloat
>>> src_y1,
>>> +                                             GLfloat dst_x0, GLfloat
>>> dst_y0,
>>> +                                             GLfloat dst_x1, GLfloat
>>> dst_y1,
>>>                                               bool mirror_x, bool
>>> mirror_y)
>>>  {
>>>     src.set(brw, src_mt, src_level, src_layer);
>>> @@ -1834,6 +1858,11 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
>>> brw_context *brw,
>>>        wm_prog_key.persample_msaa_dispatch = true;
>>>     }
>>>
>>> +   /* Scaled blitting or not. */
>>> +   wm_prog_key.blit_scaled =
>>> +      ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
>>> +       (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
>>> +
>>
>>
>> This should also be able to go away.
>>
>> With that fixed, this patch is:
>>
>> Reviewed-by: Paul Berry <stereotype441 at gmail.com>
>>
> Thanks for reviewing Paul.