[Mesa-dev] [PATCH 3/3] i965/blorp: Add support for single sample scaled blit with bilinear filter

Tue Aug 6 15:05:53 PDT 2013

On 5 August 2013 15:37, Anuj Phogat <anuj.phogat at gmail.com> wrote:

> Currently single sample scaled blits with GL_LINEAR filter falls
> back to meta path. Patch removes this limitation in BLORP engine
> and implements single sample scaled blit with bilinear filter.
> No piglit, gles3 regressions are obeserved with this patch. Piglit
> test case patches to verify this implementation are out on piglit
> mailing list.
>

I'm uncomfortable with the approach taken in this patch, because it doesn't
make use of the bilinear filtering capability built into the sampling
hardware.

Back when you were implementing EXT_framebuffer_multisample_blit_scaled,
there was good reason not to use the sampler's bilinear filtering
capability--because it doesn't work properly for multisampled textures.
But for scaled blitting of single-sampled textures it should work fine, and
in all likelihood it will be faster than doing manual bilinear filtering in
the shader.  Also, there's a higher risk of making mistakes if we manually
implement bilinear filtering in the shader.

I'd recommend instead using the "sample" message to read from the surface
when doing GL_LINEAR filtering.

>
> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.h         |   7 +-
>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 157
> ++++++++++++++++++++------
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c |   4 +-
>  3 files changed, 132 insertions(+), 36 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
> b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 49862b8..be40625 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>                          float src_x1, float src_y1,
>                          float dst_x0, float dst_y0,
>                          float dst_x1, float dst_y1,
> -                        bool mirror_x, bool mirror_y);
> +                        GLenum filter, bool mirror_x, bool mirror_y);
>
>  bool
>  brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
> @@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key
>      */
>     float x_scale;
>     float y_scale;
> +
> +   /* True for single sample scaled blits with linear filter. */
> +   bool bilinear_filter;
>  };
>
>  class brw_blorp_blit_params : public brw_blorp_params
> @@ -347,7 +350,7 @@ public:
>                           GLfloat src_x1, GLfloat src_y1,
>                           GLfloat dst_x0, GLfloat dst_y0,
>                           GLfloat dst_x1, GLfloat dst_y1,
> -                         bool mirror_x, bool mirror_y);
> +                         GLenum filter, bool mirror_x, bool mirror_y);
>
>     virtual uint32_t get_wm_prog(struct brw_context *brw,
>                                  brw_blorp_prog_data **prog_data) const;
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 8c0db48..0a28026 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>                          float src_x1, float src_y1,
>                          float dst_x0, float dst_y0,
>                          float dst_x1, float dst_y1,
> -                        bool mirror_x, bool mirror_y)
> +                        GLenum filter, bool mirror_x, bool mirror_y)
>  {
>     /* Get ready to blit.  This includes depth resolving the src and dst
>      * buffers if necessary.  Note: it's not necessary to do a color
> resolve on
> @@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>                                  src_x1, src_y1,
>                                  dst_x0, dst_y0,
>                                  dst_x1, dst_y1,
> -                                mirror_x, mirror_y);
> +                                filter, mirror_x, mirror_y);
>     brw_blorp_exec(brw, &params);
>
>     intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level,
> dst_layer);
> @@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
> buffer_bit,
>                struct intel_renderbuffer *dst_irb,
>                GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
>                GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
> -              bool mirror_x, bool mirror_y)
> +              GLenum filter, bool mirror_x, bool mirror_y)
>  {
>     /* Find source/dst miptrees */
>     struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
> @@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
> buffer_bit,
>                             dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
>                             srcX0, srcY0, srcX1, srcY1,
>                             dstX0, dstY0, dstX1, dstY1,
> -                           mirror_x, mirror_y);
> +                           filter, mirror_x, mirror_y);
>
>     intel_renderbuffer_set_needs_downsample(dst_irb);
>  }
> @@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,
>     fixup_mirroring(mirror_y, srcY0, srcY1);
>     fixup_mirroring(mirror_y, dstY0, dstY1);
>
> -   /* Linear filtering is not yet implemented in blorp engine. So,
> fallback
> -    * to other blit paths.
> -    */
> -   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
> -        srcY1 - srcY0 != dstY1 - dstY0) &&
> -       filter == GL_LINEAR)
> -      return false;
> -
>     /* If the destination rectangle needs to be clipped or scissored, do
> so.
>      */
>     if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
> @@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,
>          if (dst_irb)
>              do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>                            srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
> -                          mirror_x, mirror_y);
> +                          filter, mirror_x, mirror_y);
>        }
>        break;
>     case GL_DEPTH_BUFFER_BIT:
> @@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,
>           return false;
>        do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>                      srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
> -                    mirror_x, mirror_y);
> +                    filter, mirror_x, mirror_y);
>        break;
>     case GL_STENCIL_BUFFER_BIT:
>        src_irb =
> @@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,
>           return false;
>        do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>                      srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
> -                    mirror_x, mirror_y);
> +                    filter, mirror_x, mirror_y);
>        break;
>     default:
>        assert(false);
> @@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>                             dst_mt, dst_image->Level, dst_image->Face +
> slice,
>                             srcX0, srcY0, srcX1, srcY1,
>                             dstX0, dstY0, dstX1, dstY1,
> -                           false, mirror_y);
> +                           GL_NEAREST, false, mirror_y);
>
>     /* If we're copying to a packed depth stencil texture and the source
>      * framebuffer has separate stencil, we need to also copy the stencil
> data
> @@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>                                   dst_image->Face + slice,
>                                   srcX0, srcY0, srcX1, srcY1,
>                                   dstX0, dstY0, dstX1, dstY1,
> -                                 false, mirror_y);
> +                                 GL_NEAREST, false, mirror_y);
>        }
>     }
>
> @@ -637,6 +629,7 @@ private:
>     void single_to_blend();
>     void manual_blend_average(unsigned num_samples);
>     void manual_blend_bilinear(unsigned num_samples);
> +   void single_sample_bilinear_filter(void);
>     void sample(struct brw_reg dst);
>     void texel_fetch(struct brw_reg dst);
>     void mcs_fetch();
> @@ -873,15 +866,19 @@ brw_blorp_blit_program::compile(struct brw_context
> *brw,
>           decode_msaa(key->tex_samples, key->tex_layout);
>        }
>
> -      /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
> offset)).
> -       *
> -       * In other words: X, Y, and S now contain values which, when
> passed to
> -       * the texturing unit, will cause data to be read from the correct
> -       * memory location.  So we can fetch the texel now.
> -       */
> -      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
> -         mcs_fetch();
> -      texel_fetch(texture_data[0]);
> +      if (key->blit_scaled && key->bilinear_filter)
> +         single_sample_bilinear_filter();
> +      else {
> +         /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
> offset)).
> +          *
> +          * In other words: X, Y, and S now contain values which, when
> passed to
> +          * the texturing unit, will cause data to be read from the
> correct
> +          * memory location.  So we can fetch the texel now.
> +          */
> +         if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
> +            mcs_fetch();
> +         texel_fetch(texture_data[0]);
> +      }
>     }
>
>     /* Finally, write the fetched (or blended) value to the render target
> and
> @@ -947,7 +944,7 @@ brw_blorp_blit_program::alloc_regs()
>        reg += 2;
>     }
>
> -   if (key->blit_scaled && key->blend) {
> +   if (key->blit_scaled) {
>        this->x_sample_coords = brw_vec8_grf(reg, 0);
>        reg += 2;
>        this->y_sample_coords = brw_vec8_grf(reg, 0);
> @@ -1442,6 +1439,22 @@ brw_blorp_blit_program::translate_dst_to_src()
>        brw_RNDD(&func, Yp_f, Y_f);
>        brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
>        brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
> +   } else if (key->blit_scaled && key->bilinear_filter && !key->blend) {
> +      /* Adjust coordinates so that integers represent pixel centers
> rather
> +      * than pixel edges.
> +      */
> +      brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5));
> +      brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5));
> +
> +      /* Store the fractional parts to be used as bilinear interpolation
> +       * coefficients.
> +       */
> +      brw_FRC(&func, x_frac, X_f);
> +      brw_FRC(&func, y_frac, Y_f);
> +
> +      /* Round the float coordinates down to nearest integer */
> +      brw_MOV(&func, Xp, X_f);
> +      brw_MOV(&func, Yp, Y_f);
>     } else {
>        /* Round the float coordinates down to nearest integer by moving to
>         * UD registers.
> @@ -1765,6 +1778,74 @@
> brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
>  #undef SAMPLE
>  }
>
> +void
> +brw_blorp_blit_program::single_sample_bilinear_filter(void)
> +{
> +   /* Bilinear filtering is performed by following operations:
> +    * - Compute the colors from 2x2 pixels (vec4 c0, vec4 c1, vec4 c2,
> vec4 c3)
> +    * - linearly interpolate colors c0 and c1 in X
> +    * - linearly interpolate colors c2 and c3 in X
> +    * - linearly interpolate the results of last two operations in Y
> +    *
> +    *   result = lrp(lrp(c0 + c1) + lrp(c2 + c3))
> +    */
> +   ASSERT(s_is_zero);
> +   SWAP_XY_AND_XPYP();
> +
> +   /* Move the X1, Y1 from Float to UD regsiters. */
> +   brw_MOV(&func, vec1(t1), rect_grid_x1);
> +   brw_MOV(&func, vec1(t2), rect_grid_y1);
> +
> +   for (unsigned i = 0; i < 4; ++i) {
> +      assert(i < ARRAY_SIZE(texture_data));
> +
> +      /* Compute pixel coordinates */
> +      brw_ADD(&func, vec16(X), Xp, brw_imm_ud(i % 2));
> +      brw_ADD(&func, vec16(Y), Yp, brw_imm_ud(i / 2));
> +
> +      /* Clamp the X, Y texture coordinates to properly handle the
> sampling of
> +       * texels on texture edges.
> +       */
> +      clamp_tex_coords(vec16(X), vec16(Y),
> +                       brw_imm_ud(0), brw_imm_ud(0),
> +                       vec1(t1), vec1(t2));
> +
> +      /* The MCS value we fetch has to match up with the pixel that we're
> +       * sampling from. Since we sample from different pixels in each
> +       * iteration of this "for" loop, the call to mcs_fetch() should be
> +       * here inside the loop after computing the pixel coordinates.
> +       */
> +      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
> +         mcs_fetch();
> +
> +      texel_fetch(texture_data[i]);
> +   }
> +
> +#define PIXEL(x, y) offset(texture_data[x], y)
> +   brw_set_access_mode(&func, BRW_ALIGN_16);
> +   for (int index = 3; index > 0; ) {
> +      /* Since we're doing SIMD16, 4 color channels fits in to 8
> registers.
> +       * Counter value of 8 in 'for' loop below is used to interpolate all
> +       * the color components.
> +       */
> +      for (int k = 0; k < 8; ++k)
> +         brw_LRP(&func,
> +                 vec8(PIXEL(index - 1, k)),
> +                 offset(x_frac, k & 1),
> +                 PIXEL(index, k),
> +                 PIXEL(index - 1, k));
> +      index -= 2;
> +   }
> +   for (int k = 0; k < 8; ++k)
> +      brw_LRP(&func,
> +              vec8(PIXEL(0, k)),
> +              offset(y_frac, k & 1),
> +              vec8(PIXEL(2, k)),
> +              vec8(PIXEL(0, k)));
> +   brw_set_access_mode(&func, BRW_ALIGN_1);
> +#undef PIXEL
> +}
> +
>  /**
>   * Emit code to look up a value in the texture using the SAMPLE message
> (which
>   * does blending of MSAA surfaces).
> @@ -2050,6 +2131,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
> brw_context *brw,
>                                               GLfloat src_x1, GLfloat
> src_y1,
>                                               GLfloat dst_x0, GLfloat
> dst_y0,
>                                               GLfloat dst_x1, GLfloat
> dst_y1,
> +                                             GLenum filter,
>                                               bool mirror_x, bool mirror_y)
>  {
>     struct gl_context *ctx = &brw->ctx;
> @@ -2058,7 +2140,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
> brw_context *brw,
>     src.set(brw, src_mt, src_level, src_layer);
>     dst.set(brw, dst_mt, dst_level, dst_layer);
>
> -   src.brw_surfaceformat = dst.brw_surfaceformat;
> +   if (src.num_samples > 1)
> +      src.brw_surfaceformat = dst.brw_surfaceformat;
> +   else
> +      dst.brw_surfaceformat = src.brw_surfaceformat;
>
>     use_wm_prog = true;
>     memset(&wm_prog_key, 0, sizeof(wm_prog_key));
> @@ -2123,11 +2208,19 @@
> brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
>        ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
>         (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
>
> -   /* Scaling factors used for bilinear filtering in multisample scaled
> -    * blits.
> +   /* Scaling factors used for bilinear filtering in
> single-sample/multisample
> +    * scaled blits.
>      */
> -   wm_prog_key.x_scale = 2.0;
> -   wm_prog_key.y_scale = src_mt->num_samples / 2.0;
> +   wm_prog_key.x_scale = 1.0;
> +   wm_prog_key.y_scale = 1.0;
> +   if (wm_prog_key.blit_scaled && src_mt->num_samples > 0) {
> +      wm_prog_key.x_scale = 2.0;
> +      wm_prog_key.y_scale = src_mt->num_samples / 2.0;
> +   }
> +
> +   /* bilinear filtering or not */
> +   if (filter == GL_LINEAR)
> +      wm_prog_key.bilinear_filter = true;
>
>     /* The render path must be configured to use the same number of
> samples as
>      * the destination buffer.
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index d6643ca..de5f8f2 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>                             width, height,
>                             dst_x0, dst_y0,
>                             width, height,
> -                           false, false /*mirror x, y*/);
> +                           GL_NEAREST, false, false /*mirror x, y*/);
>
>     if (src->stencil_mt) {
>        brw_blorp_blit_miptrees(brw,
> @@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>                                width, height,
>                                dst_x0, dst_y0,
>                                width, height,
> -                              false, false /*mirror x, y*/);
> +                              GL_NEAREST, false, false /*mirror x, y*/);
>     }
>  }
>
> --
> 1.8.1.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130806/d995e317/attachment-0001.html>