[Mesa-dev] [PATCH V2 3/3] i965/blorp: Add support for single sample scaled blit with bilinear filter

Wed Aug 14 11:46:54 PDT 2013

On Wed, Aug 14, 2013 at 10:25 AM, Paul Berry <stereotype441 at gmail.com> wrote:
> On 9 August 2013 19:01, Anuj Phogat <anuj.phogat at gmail.com> wrote:
>>
>> Currently single sample scaled blits with GL_LINEAR filter falls
>> back to meta path. Patch removes this limitation in BLORP engine
>> and implements single sample scaled blit with bilinear filter.
>> No piglit, gles3 regressions are obeserved with this patch. Piglit
>> test case patches to verify this implementation are out on piglit
>> mailing list.
>>
>> V2:
>> Use "sample" message in place of "LD" message to utilize the
>> linear/nearest
>> filtering (of single-sampled texture) functionality built in to hardware.
>> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
>
>
> Thanks for re-working this, Anuj.  I think we're getting close to converging
> on a solution.
>
> My key concern right now is that there are several things that the blorp
> compiler should do differently we're doing scaled GL_LINEAR blits:
>
> 1 translate_dst_to_src() shouldn't round the floating point coordinates to
> integers
> 2 compile() should do its second call to sample() (the one that occurs when
> we're not blending)
> 3 compile() shouldn't call encode_msaa(), translate_tiling(), or
> decode_msaa()
> 4 texture_lookup() should use retype({X,Y}, BRW_REGISTER_TYPE_F) as the
> source of its MOV operations
>
All of these conditions are satisfied by this patch.
> It's not obvious from reading the code that these will all happen under the
> right conditions.
>
> I'd recommend adding a new boolean to brw_blorp_blit_prog_key to indicate
> when scaled GL_LINEAR blits are being done, and have all four behavioural
> changes above happen in response to that boolean.
>
Yes, It's not obvious by just looking at the code. I actually had this
bool variable in my earlier patch but removed it to avoid extra variables
:(. I'll update my patch to make the conditions more obvious.

> Two other minor comments below:
>
>>
>> ---
>>  src/mesa/drivers/dri/i965/brw_blorp.h         |  5 +-
>>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 79
>> ++++++++++++++++-----------
>>  src/mesa/drivers/dri/i965/gen7_blorp.cpp      |  9 ++-
>>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  4 +-
>>  4 files changed, 60 insertions(+), 37 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
>> b/src/mesa/drivers/dri/i965/brw_blorp.h
>> index 49862b8..6a3a429 100644
>> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
>> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
>> @@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>>                          float src_x1, float src_y1,
>>                          float dst_x0, float dst_y0,
>>                          float dst_x1, float dst_y1,
>> -                        bool mirror_x, bool mirror_y);
>> +                        GLenum filter, bool mirror_x, bool mirror_y);
>>
>>  bool
>>  brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
>> @@ -232,6 +232,7 @@ public:
>>     bool use_wm_prog;
>>     brw_blorp_wm_push_constants wm_push_consts;
>>     bool color_write_disable[4];
>> +   GLenum tex_filter;
>>  };
>>
>>
>> @@ -347,7 +348,7 @@ public:
>>                           GLfloat src_x1, GLfloat src_y1,
>>                           GLfloat dst_x0, GLfloat dst_y0,
>>                           GLfloat dst_x1, GLfloat dst_y1,
>> -                         bool mirror_x, bool mirror_y);
>> +                         GLenum filter, bool mirror_x, bool mirror_y);
>>
>>     virtual uint32_t get_wm_prog(struct brw_context *brw,
>>                                  brw_blorp_prog_data **prog_data) const;
>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> index 8c0db48..128940d 100644
>> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> @@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>>                          float src_x1, float src_y1,
>>                          float dst_x0, float dst_y0,
>>                          float dst_x1, float dst_y1,
>> -                        bool mirror_x, bool mirror_y)
>> +                        GLenum filter, bool mirror_x, bool mirror_y)
>>  {
>>     /* Get ready to blit.  This includes depth resolving the src and dst
>>      * buffers if necessary.  Note: it's not necessary to do a color
>> resolve on
>> @@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>>                                  src_x1, src_y1,
>>                                  dst_x0, dst_y0,
>>                                  dst_x1, dst_y1,
>> -                                mirror_x, mirror_y);
>> +                                filter, mirror_x, mirror_y);
>>     brw_blorp_exec(brw, &params);
>>
>>     intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level,
>> dst_layer);
>> @@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
>> buffer_bit,
>>                struct intel_renderbuffer *dst_irb,
>>                GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
>>                GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
>> -              bool mirror_x, bool mirror_y)
>> +              GLenum filter, bool mirror_x, bool mirror_y)
>>  {
>>     /* Find source/dst miptrees */
>>     struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
>> @@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
>> buffer_bit,
>>                             dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
>>                             srcX0, srcY0, srcX1, srcY1,
>>                             dstX0, dstY0, dstX1, dstY1,
>> -                           mirror_x, mirror_y);
>> +                           filter, mirror_x, mirror_y);
>>
>>     intel_renderbuffer_set_needs_downsample(dst_irb);
>>  }
>> @@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,
>>     fixup_mirroring(mirror_y, srcY0, srcY1);
>>     fixup_mirroring(mirror_y, dstY0, dstY1);
>>
>> -   /* Linear filtering is not yet implemented in blorp engine. So,
>> fallback
>> -    * to other blit paths.
>> -    */
>> -   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
>> -        srcY1 - srcY0 != dstY1 - dstY0) &&
>> -       filter == GL_LINEAR)
>> -      return false;
>> -
>>     /* If the destination rectangle needs to be clipped or scissored, do
>> so.
>>      */
>>     if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
>> @@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,
>>          if (dst_irb)
>>              do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0,
>> srcY0,
>>                            srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> -                          mirror_x, mirror_y);
>> +                          filter, mirror_x, mirror_y);
>>        }
>>        break;
>>     case GL_DEPTH_BUFFER_BIT:
>> @@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,
>>           return false;
>>        do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>>                      srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> -                    mirror_x, mirror_y);
>> +                    filter, mirror_x, mirror_y);
>>        break;
>>     case GL_STENCIL_BUFFER_BIT:
>>        src_irb =
>> @@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,
>>           return false;
>>        do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>>                      srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> -                    mirror_x, mirror_y);
>> +                    filter, mirror_x, mirror_y);
>>        break;
>>     default:
>>        assert(false);
>> @@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>>                             dst_mt, dst_image->Level, dst_image->Face +
>> slice,
>>                             srcX0, srcY0, srcX1, srcY1,
>>                             dstX0, dstY0, dstX1, dstY1,
>> -                           false, mirror_y);
>> +                           GL_NEAREST, false, mirror_y);
>>
>>     /* If we're copying to a packed depth stencil texture and the source
>>      * framebuffer has separate stencil, we need to also copy the stencil
>> data
>> @@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>>                                   dst_image->Face + slice,
>>                                   srcX0, srcY0, srcX1, srcY1,
>>                                   dstX0, dstY0, dstX1, dstY1,
>> -                                 false, mirror_y);
>> +                                 GL_NEAREST, false, mirror_y);
>>        }
>>     }
>>
>> @@ -873,15 +865,22 @@ brw_blorp_blit_program::compile(struct brw_context
>> *brw,
>>           decode_msaa(key->tex_samples, key->tex_layout);
>>        }
>>
>> -      /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
>> offset)).
>> -       *
>> -       * In other words: X, Y, and S now contain values which, when
>> passed to
>> -       * the texturing unit, will cause data to be read from the correct
>> -       * memory location.  So we can fetch the texel now.
>> -       */
>> -      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> -         mcs_fetch();
>> -      texel_fetch(texture_data[0]);
>> +      if (key->src_samples == 0 &&
>> +          key->dst_samples == 0 &&
>> +          !key->src_tiled_w) {
>> +         sample(texture_data[0]);
>> +      }
>> +      else {
>
>
> Style nit: we usually combine "} else {" on one line.
>
>>
>> +         /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
>> offset)).
>> +          *
>> +          * In other words: X, Y, and S now contain values which, when
>> passed to
>> +          * the texturing unit, will cause data to be read from the
>> correct
>> +          * memory location.  So we can fetch the texel now.
>> +          */
>> +         if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> +            mcs_fetch();
>> +         texel_fetch(texture_data[0]);
>> +      }
>>     }
>>
>>     /* Finally, write the fetched (or blended) value to the render target
>> and
>> @@ -1442,14 +1441,17 @@ brw_blorp_blit_program::translate_dst_to_src()
>>        brw_RNDD(&func, Yp_f, Y_f);
>>        brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
>>        brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
>> -   } else {
>> +      SWAP_XY_AND_XPYP();
>> +   } else if (key->src_samples > 1 ||
>> +              key->dst_samples > 1 ||
>> +              key->src_tiled_w) {
>>        /* Round the float coordinates down to nearest integer by moving to
>>         * UD registers.
>>         */
>>        brw_MOV(&func, Xp, X_f);
>>        brw_MOV(&func, Yp, Y_f);
>> +      SWAP_XY_AND_XPYP();
>>     }
>> -   SWAP_XY_AND_XPYP();
>>     brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
>>  }
>>
>> @@ -1873,10 +1875,18 @@ brw_blorp_blit_program::texture_lookup(struct
>> brw_reg dst,
>>     for (int arg = 0; arg < num_args; ++arg) {
>>        switch (args[arg]) {
>>        case SAMPLER_MESSAGE_ARG_U_FLOAT:
>> -         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);
>> +         if (s_is_zero)
>> +            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F),
>> +                    retype(X, BRW_REGISTER_TYPE_F));
>> +         else
>> +            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);
>>           break;
>>        case SAMPLER_MESSAGE_ARG_V_FLOAT:
>> -         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);
>> +         if (s_is_zero)
>> +            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F),
>> +                    retype(Y, BRW_REGISTER_TYPE_F));
>> +         else
>> +            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);
>>           break;
>>        case SAMPLER_MESSAGE_ARG_U_INT:
>>           brw_MOV(&func, mrf, X);
>> @@ -2050,6 +2060,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
>> brw_context *brw,
>>                                               GLfloat src_x1, GLfloat
>> src_y1,
>>                                               GLfloat dst_x0, GLfloat
>> dst_y0,
>>                                               GLfloat dst_x1, GLfloat
>> dst_y1,
>> +                                             GLenum filter,
>>                                               bool mirror_x, bool
>> mirror_y)
>>  {
>>     struct gl_context *ctx = &brw->ctx;
>> @@ -2058,7 +2069,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
>> brw_context *brw,
>>     src.set(brw, src_mt, src_level, src_layer);
>>     dst.set(brw, dst_mt, dst_level, dst_layer);
>>
>> -   src.brw_surfaceformat = dst.brw_surfaceformat;
>> +   if (src.num_samples > 1)
>> +      src.brw_surfaceformat = dst.brw_surfaceformat;
>> +   else
>> +      dst.brw_surfaceformat = src.brw_surfaceformat;
>
>
> I thought we decided that it was a bug to ever set src.brw_surfaceformat =
> dst.brw_surfaceformat, and we should do dst.brw_surfaceformat =
> src.brw_surfaceformat always.  Am I remembering wrong?
As suggested by Ian, I modified a piglit test and checked the
behavior on proprietary NVIDIA OpenGL 4.3 drivers. NVIDIA
still uses destination format to decide the type of averaging
during resolve. During our offline discussion you mentioned
how NVIDIA's approach produce slightly better looking images
as compared to using the source buffer format for avearging the
samples. Also, OpenGL specification doesn't seem to be strict
about it. So, I decided to use an if-else to take care of averaging
format in single sample scaled blits. I don't have any strong
opinions about it and I'm willing to post a patch to use
dst.brw_surfaceformat = src.brw_surfaceformat for all cases.
Please let me know if you're in favor of fixing it.
>
> If I'm remembering right, we should probably do this fix in a separate patch
> with an explanation as to why (the reasoning, as I recall, is that even
> though we do multisample resolves at the time of the blit, the spec defines
> them as though they happen at the time of rendering, which means that the
> type of averaging we do during the resolve should depend only on whether the
> source buffer is SRGB; the destination format should be ignored).  I can't
> remember if there are any piglit tests that need to be updated to account
> for this fix.
Most of MSAA piglit tests blit to a fbo. So, this change won't
effect them. But, It might be useful to change few of them to
blit to window system framebuffer.
>
>>
>>
>>     use_wm_prog = true;
>>     memset(&wm_prog_key, 0, sizeof(wm_prog_key));
>> @@ -2129,6 +2143,9 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
>> brw_context *brw,
>>     wm_prog_key.x_scale = 2.0;
>>     wm_prog_key.y_scale = src_mt->num_samples / 2.0;
>>
>> +   /* Filter used for framebuffer blitting. */
>> +   tex_filter = filter;
>> +
>>     /* The render path must be configured to use the same number of
>> samples as
>>      * the destination buffer.
>>      */
>> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
>> b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
>> index 518d7f5..5d01b73 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
>> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
>> @@ -230,9 +230,14 @@ gen7_blorp_emit_sampler_state(struct brw_context
>> *brw,
>>                        32, &sampler_offset);
>>     memset(sampler, 0, sizeof(*sampler));
>>
>> -   sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
>> +   if (params->tex_filter == GL_NEAREST) {
>> +      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
>> +      sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
>> +   } else {
>> +      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
>> +      sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
>> +   }
>>     sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
>> -   sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
>>
>>     sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
>>     sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
>> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> index d6643ca..de5f8f2 100644
>> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> @@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>>                             width, height,
>>                             dst_x0, dst_y0,
>>                             width, height,
>> -                           false, false /*mirror x, y*/);
>> +                           GL_NEAREST, false, false /*mirror x, y*/);
>>
>>     if (src->stencil_mt) {
>>        brw_blorp_blit_miptrees(brw,
>> @@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>>                                width, height,
>>                                dst_x0, dst_y0,
>>                                width, height,
>> -                              false, false /*mirror x, y*/);
>> +                              GL_NEAREST, false, false /*mirror x, y*/);
>>     }
>>  }
>>
>> --
>> 1.8.1.4
>>
>