[Mesa-dev] [PATCH V3 3/3] i965/blorp: Add support for single sample scaled blit with bilinear filter

Anuj Phogat anuj.phogat at gmail.com
Wed Aug 14 18:28:27 PDT 2013


Currently single sample scaled blits with GL_LINEAR filter falls
back to meta path. Patch removes this limitation in BLORP engine
and implements single sample scaled blit with bilinear filter.
No piglit, gles3 regressions are observed with this patch on Ivybridge.

V2: Use "sample" message to utilize the linear filtering functionality
built in to hardware.
V3: Define a bool variable (bilinear_filter) to handle the conditions
for GL_LINEAR blits.

Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
---
One subtest of piglit test fbo-blit-stetch fails on Sandybridge with following logs:
45x79 (0, 0)-(45, 79) => 200x150 (0, 0)-(200, 150) stretch_x stretch_y linear
Probe at (97,0)
  Expected: 0.562500 0.437500 0.000000
  Observed: 0.576471 0.423529 0.000000

After reviewing the changes in this patch, it seems more like a small precision loss
is causing this failure. Using piglit_set_tolerance_for_bits(7, 7, 7, 7) in test makes
it to pass. No other piglit/gles3 regressions on SNB.

 src/mesa/drivers/dri/i965/brw_blorp.h         |  7 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 96 +++++++++++++++++----------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  4 +-
 3 files changed, 69 insertions(+), 38 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 49862b8..dceb4fc 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                         float src_x1, float src_y1,
                         float dst_x0, float dst_y0,
                         float dst_x1, float dst_y1,
-                        bool mirror_x, bool mirror_y);
+                        GLenum filter, bool mirror_x, bool mirror_y);
 
 bool
 brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
@@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key
     */
    float x_scale;
    float y_scale;
+
+   /* True for blits with filter = GL_LINEAR. */
+   bool bilinear_filter;
 };
 
 class brw_blorp_blit_params : public brw_blorp_params
@@ -347,7 +350,7 @@ public:
                          GLfloat src_x1, GLfloat src_y1,
                          GLfloat dst_x0, GLfloat dst_y0,
                          GLfloat dst_x1, GLfloat dst_y1,
-                         bool mirror_x, bool mirror_y);
+                         GLenum filter, bool mirror_x, bool mirror_y);
 
    virtual uint32_t get_wm_prog(struct brw_context *brw,
                                 brw_blorp_prog_data **prog_data) const;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 8c0db48..f07d39f 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                         float src_x1, float src_y1,
                         float dst_x0, float dst_y0,
                         float dst_x1, float dst_y1,
-                        bool mirror_x, bool mirror_y)
+                        GLenum filter, bool mirror_x, bool mirror_y)
 {
    /* Get ready to blit.  This includes depth resolving the src and dst
     * buffers if necessary.  Note: it's not necessary to do a color resolve on
@@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                                 src_x1, src_y1,
                                 dst_x0, dst_y0,
                                 dst_x1, dst_y1,
-                                mirror_x, mirror_y);
+                                filter, mirror_x, mirror_y);
    brw_blorp_exec(brw, &params);
 
    intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
@@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
               struct intel_renderbuffer *dst_irb,
               GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
               GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
-              bool mirror_x, bool mirror_y)
+              GLenum filter, bool mirror_x, bool mirror_y)
 {
    /* Find source/dst miptrees */
    struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
@@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
                            dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
-                           mirror_x, mirror_y);
+                           filter, mirror_x, mirror_y);
 
    intel_renderbuffer_set_needs_downsample(dst_irb);
 }
@@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,
    fixup_mirroring(mirror_y, srcY0, srcY1);
    fixup_mirroring(mirror_y, dstY0, dstY1);
 
-   /* Linear filtering is not yet implemented in blorp engine. So, fallback
-    * to other blit paths.
-    */
-   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
-        srcY1 - srcY0 != dstY1 - dstY0) &&
-       filter == GL_LINEAR)
-      return false;
-
    /* If the destination rectangle needs to be clipped or scissored, do so.
     */
    if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
@@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,
 	 if (dst_irb)
             do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
                           srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                          mirror_x, mirror_y);
+                          filter, mirror_x, mirror_y);
       }
       break;
    case GL_DEPTH_BUFFER_BIT:
@@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,
          return false;
       do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
                     srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                    mirror_x, mirror_y);
+                    filter, mirror_x, mirror_y);
       break;
    case GL_STENCIL_BUFFER_BIT:
       src_irb =
@@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,
          return false;
       do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
                     srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                    mirror_x, mirror_y);
+                    filter, mirror_x, mirror_y);
       break;
    default:
       assert(false);
@@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
                            dst_mt, dst_image->Level, dst_image->Face + slice,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
-                           false, mirror_y);
+                           GL_NEAREST, false, mirror_y);
 
    /* If we're copying to a packed depth stencil texture and the source
     * framebuffer has separate stencil, we need to also copy the stencil data
@@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
                                  dst_image->Face + slice,
                                  srcX0, srcY0, srcX1, srcY1,
                                  dstX0, dstY0, dstX1, dstY1,
-                                 false, mirror_y);
+                                 GL_NEAREST, false, mirror_y);
       }
    }
 
@@ -863,9 +855,10 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
        * the same as the configuration of the texture, then we need to adjust
        * the coordinates to compensate for the difference.
        */
-      if (tex_tiled_w != key->src_tiled_w ||
-          key->tex_samples != key->src_samples ||
-          key->tex_layout != key->src_layout) {
+      if ((tex_tiled_w != key->src_tiled_w ||
+           key->tex_samples != key->src_samples ||
+           key->tex_layout != key->src_layout) &&
+          !key->bilinear_filter) {
          encode_msaa(key->src_samples, key->src_layout);
          /* Now (X, Y, S) = detile(src_tiling, offset) */
          translate_tiling(key->src_tiled_w, tex_tiled_w);
@@ -873,15 +866,20 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
          decode_msaa(key->tex_samples, key->tex_layout);
       }
 
-      /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
-       *
-       * In other words: X, Y, and S now contain values which, when passed to
-       * the texturing unit, will cause data to be read from the correct
-       * memory location.  So we can fetch the texel now.
-       */
-      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
-         mcs_fetch();
-      texel_fetch(texture_data[0]);
+      if (key->bilinear_filter) {
+         sample(texture_data[0]);
+      }
+      else {
+         /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
+          *
+          * In other words: X, Y, and S now contain values which, when passed to
+          * the texturing unit, will cause data to be read from the correct
+          * memory location.  So we can fetch the texel now.
+          */
+         if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+            mcs_fetch();
+         texel_fetch(texture_data[0]);
+      }
    }
 
    /* Finally, write the fetched (or blended) value to the render target and
@@ -1442,14 +1440,15 @@ brw_blorp_blit_program::translate_dst_to_src()
       brw_RNDD(&func, Yp_f, Y_f);
       brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
       brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
-   } else {
+      SWAP_XY_AND_XPYP();
+   } else if (!key->bilinear_filter) {
       /* Round the float coordinates down to nearest integer by moving to
        * UD registers.
        */
       brw_MOV(&func, Xp, X_f);
       brw_MOV(&func, Yp, Y_f);
+      SWAP_XY_AND_XPYP();
    }
-   SWAP_XY_AND_XPYP();
    brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
 }
 
@@ -1873,10 +1872,18 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
    for (int arg = 0; arg < num_args; ++arg) {
       switch (args[arg]) {
       case SAMPLER_MESSAGE_ARG_U_FLOAT:
-         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);
+         if (key->bilinear_filter)
+            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F),
+                    retype(X, BRW_REGISTER_TYPE_F));
+         else
+            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);
          break;
       case SAMPLER_MESSAGE_ARG_V_FLOAT:
-         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);
+         if (key->bilinear_filter)
+            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F),
+                    retype(Y, BRW_REGISTER_TYPE_F));
+         else
+            brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);
          break;
       case SAMPLER_MESSAGE_ARG_U_INT:
          brw_MOV(&func, mrf, X);
@@ -2050,6 +2057,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
                                              GLfloat src_x1, GLfloat src_y1,
                                              GLfloat dst_x0, GLfloat dst_y0,
                                              GLfloat dst_x1, GLfloat dst_y1,
+                                             GLenum filter,
                                              bool mirror_x, bool mirror_y)
 {
    struct gl_context *ctx = &brw->ctx;
@@ -2058,7 +2066,24 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
    src.set(brw, src_mt, src_level, src_layer);
    dst.set(brw, dst_mt, dst_level, dst_layer);
 
-   src.brw_surfaceformat = dst.brw_surfaceformat;
+   /* Even though we do multisample resolves at the time of the blit, OpenGL
+    * specification defines them as if they happen at the time of rendering,
+    * which means that the type of averaging we do during the resolve should
+    * only depend on the source format; the destination format should be
+    * ignored. But, specification doesn't seem to be strict about it.
+    *
+    * It has been observed that mulitisample resolves produce slightly better
+    * looking images when averaging is done using destination format. NVIDIA's
+    * proprietary OpenGL driver also follow this approach. So, we choose to
+    * follow it in our driver.
+    *
+    * Following if-else block takes care of this exception made for
+    * multisampled resolves.
+    */
+   if (src.num_samples > 1)
+      src.brw_surfaceformat = dst.brw_surfaceformat;
+   else
+      dst.brw_surfaceformat = src.brw_surfaceformat;
 
    use_wm_prog = true;
    memset(&wm_prog_key, 0, sizeof(wm_prog_key));
@@ -2129,6 +2154,9 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
    wm_prog_key.x_scale = 2.0;
    wm_prog_key.y_scale = src_mt->num_samples / 2.0;
 
+   if (filter == GL_LINEAR)
+      wm_prog_key.bilinear_filter = true;
+
    /* The render path must be configured to use the same number of samples as
     * the destination buffer.
     */
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index d6643ca..de5f8f2 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,
                            width, height,
                            dst_x0, dst_y0,
                            width, height,
-                           false, false /*mirror x, y*/);
+                           GL_NEAREST, false, false /*mirror x, y*/);
 
    if (src->stencil_mt) {
       brw_blorp_blit_miptrees(brw,
@@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,
                               width, height,
                               dst_x0, dst_y0,
                               width, height,
-                              false, false /*mirror x, y*/);
+                              GL_NEAREST, false, false /*mirror x, y*/);
    }
 }
 
-- 
1.8.1.4



More information about the mesa-dev mailing list