[Mesa-dev] [PATCH 3/3] i965/blorp: Add support for single sample scaled blit with bilinear filter

Mon Aug 5 15:37:01 PDT 2013

Currently single sample scaled blits with GL_LINEAR filter falls
back to meta path. Patch removes this limitation in BLORP engine
and implements single sample scaled blit with bilinear filter.
No piglit, gles3 regressions are obeserved with this patch. Piglit
test case patches to verify this implementation are out on piglit
mailing list.

Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
---
 src/mesa/drivers/dri/i965/brw_blorp.h         |   7 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 157 ++++++++++++++++++++------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |   4 +-
 3 files changed, 132 insertions(+), 36 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 49862b8..be40625 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                         float src_x1, float src_y1,
                         float dst_x0, float dst_y0,
                         float dst_x1, float dst_y1,
-                        bool mirror_x, bool mirror_y);
+                        GLenum filter, bool mirror_x, bool mirror_y);
 
 bool
 brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
@@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key
     */
    float x_scale;
    float y_scale;
+
+   /* True for single sample scaled blits with linear filter. */
+   bool bilinear_filter;
 };
 
 class brw_blorp_blit_params : public brw_blorp_params
@@ -347,7 +350,7 @@ public:
                          GLfloat src_x1, GLfloat src_y1,
                          GLfloat dst_x0, GLfloat dst_y0,
                          GLfloat dst_x1, GLfloat dst_y1,
-                         bool mirror_x, bool mirror_y);
+                         GLenum filter, bool mirror_x, bool mirror_y);
 
    virtual uint32_t get_wm_prog(struct brw_context *brw,
                                 brw_blorp_prog_data **prog_data) const;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 8c0db48..0a28026 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                         float src_x1, float src_y1,
                         float dst_x0, float dst_y0,
                         float dst_x1, float dst_y1,
-                        bool mirror_x, bool mirror_y)
+                        GLenum filter, bool mirror_x, bool mirror_y)
 {
    /* Get ready to blit.  This includes depth resolving the src and dst
     * buffers if necessary.  Note: it's not necessary to do a color resolve on
@@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                                 src_x1, src_y1,
                                 dst_x0, dst_y0,
                                 dst_x1, dst_y1,
-                                mirror_x, mirror_y);
+                                filter, mirror_x, mirror_y);
    brw_blorp_exec(brw, &params);
 
    intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
@@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
               struct intel_renderbuffer *dst_irb,
               GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
               GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
-              bool mirror_x, bool mirror_y)
+              GLenum filter, bool mirror_x, bool mirror_y)
 {
    /* Find source/dst miptrees */
    struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
@@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
                            dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
-                           mirror_x, mirror_y);
+                           filter, mirror_x, mirror_y);
 
    intel_renderbuffer_set_needs_downsample(dst_irb);
 }
@@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,
    fixup_mirroring(mirror_y, srcY0, srcY1);
    fixup_mirroring(mirror_y, dstY0, dstY1);
 
-   /* Linear filtering is not yet implemented in blorp engine. So, fallback
-    * to other blit paths.
-    */
-   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
-        srcY1 - srcY0 != dstY1 - dstY0) &&
-       filter == GL_LINEAR)
-      return false;
-
    /* If the destination rectangle needs to be clipped or scissored, do so.
     */
    if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
@@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,
 	 if (dst_irb)
             do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
                           srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                          mirror_x, mirror_y);
+                          filter, mirror_x, mirror_y);
       }
       break;
    case GL_DEPTH_BUFFER_BIT:
@@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,
          return false;
       do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
                     srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                    mirror_x, mirror_y);
+                    filter, mirror_x, mirror_y);
       break;
    case GL_STENCIL_BUFFER_BIT:
       src_irb =
@@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,
          return false;
       do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
                     srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                    mirror_x, mirror_y);
+                    filter, mirror_x, mirror_y);
       break;
    default:
       assert(false);
@@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
                            dst_mt, dst_image->Level, dst_image->Face + slice,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
-                           false, mirror_y);
+                           GL_NEAREST, false, mirror_y);
 
    /* If we're copying to a packed depth stencil texture and the source
     * framebuffer has separate stencil, we need to also copy the stencil data
@@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
                                  dst_image->Face + slice,
                                  srcX0, srcY0, srcX1, srcY1,
                                  dstX0, dstY0, dstX1, dstY1,
-                                 false, mirror_y);
+                                 GL_NEAREST, false, mirror_y);
       }
    }
 
@@ -637,6 +629,7 @@ private:
    void single_to_blend();
    void manual_blend_average(unsigned num_samples);
    void manual_blend_bilinear(unsigned num_samples);
+   void single_sample_bilinear_filter(void);
    void sample(struct brw_reg dst);
    void texel_fetch(struct brw_reg dst);
    void mcs_fetch();
@@ -873,15 +866,19 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
          decode_msaa(key->tex_samples, key->tex_layout);
       }
 
-      /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
-       *
-       * In other words: X, Y, and S now contain values which, when passed to
-       * the texturing unit, will cause data to be read from the correct
-       * memory location.  So we can fetch the texel now.
-       */
-      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
-         mcs_fetch();
-      texel_fetch(texture_data[0]);
+      if (key->blit_scaled && key->bilinear_filter)
+         single_sample_bilinear_filter();
+      else {
+         /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
+          *
+          * In other words: X, Y, and S now contain values which, when passed to
+          * the texturing unit, will cause data to be read from the correct
+          * memory location.  So we can fetch the texel now.
+          */
+         if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+            mcs_fetch();
+         texel_fetch(texture_data[0]);
+      }
    }
 
    /* Finally, write the fetched (or blended) value to the render target and
@@ -947,7 +944,7 @@ brw_blorp_blit_program::alloc_regs()
       reg += 2;
    }
 
-   if (key->blit_scaled && key->blend) {
+   if (key->blit_scaled) {
       this->x_sample_coords = brw_vec8_grf(reg, 0);
       reg += 2;
       this->y_sample_coords = brw_vec8_grf(reg, 0);
@@ -1442,6 +1439,22 @@ brw_blorp_blit_program::translate_dst_to_src()
       brw_RNDD(&func, Yp_f, Y_f);
       brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
       brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
+   } else if (key->blit_scaled && key->bilinear_filter && !key->blend) {
+      /* Adjust coordinates so that integers represent pixel centers rather
+      * than pixel edges.
+      */
+      brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5));
+      brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5));
+
+      /* Store the fractional parts to be used as bilinear interpolation
+       * coefficients.
+       */
+      brw_FRC(&func, x_frac, X_f);
+      brw_FRC(&func, y_frac, Y_f);
+
+      /* Round the float coordinates down to nearest integer */
+      brw_MOV(&func, Xp, X_f);
+      brw_MOV(&func, Yp, Y_f);
    } else {
       /* Round the float coordinates down to nearest integer by moving to
        * UD registers.
@@ -1765,6 +1778,74 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
 #undef SAMPLE
 }
 
+void
+brw_blorp_blit_program::single_sample_bilinear_filter(void)
+{
+   /* Bilinear filtering is performed by following operations:
+    * - Compute the colors from 2x2 pixels (vec4 c0, vec4 c1, vec4 c2, vec4 c3)
+    * - linearly interpolate colors c0 and c1 in X
+    * - linearly interpolate colors c2 and c3 in X
+    * - linearly interpolate the results of last two operations in Y
+    *
+    *   result = lrp(lrp(c0 + c1) + lrp(c2 + c3))
+    */
+   ASSERT(s_is_zero);
+   SWAP_XY_AND_XPYP();
+
+   /* Move the X1, Y1 from Float to UD regsiters. */
+   brw_MOV(&func, vec1(t1), rect_grid_x1);
+   brw_MOV(&func, vec1(t2), rect_grid_y1);
+
+   for (unsigned i = 0; i < 4; ++i) {
+      assert(i < ARRAY_SIZE(texture_data));
+
+      /* Compute pixel coordinates */
+      brw_ADD(&func, vec16(X), Xp, brw_imm_ud(i % 2));
+      brw_ADD(&func, vec16(Y), Yp, brw_imm_ud(i / 2));
+
+      /* Clamp the X, Y texture coordinates to properly handle the sampling of
+       * texels on texture edges.
+       */
+      clamp_tex_coords(vec16(X), vec16(Y),
+                       brw_imm_ud(0), brw_imm_ud(0),
+                       vec1(t1), vec1(t2));
+
+      /* The MCS value we fetch has to match up with the pixel that we're
+       * sampling from. Since we sample from different pixels in each
+       * iteration of this "for" loop, the call to mcs_fetch() should be
+       * here inside the loop after computing the pixel coordinates.
+       */
+      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+         mcs_fetch();
+
+      texel_fetch(texture_data[i]);
+   }
+
+#define PIXEL(x, y) offset(texture_data[x], y)
+   brw_set_access_mode(&func, BRW_ALIGN_16);
+   for (int index = 3; index > 0; ) {
+      /* Since we're doing SIMD16, 4 color channels fits in to 8 registers.
+       * Counter value of 8 in 'for' loop below is used to interpolate all
+       * the color components.
+       */
+      for (int k = 0; k < 8; ++k)
+         brw_LRP(&func,
+                 vec8(PIXEL(index - 1, k)),
+                 offset(x_frac, k & 1),
+                 PIXEL(index, k),
+                 PIXEL(index - 1, k));
+      index -= 2;
+   }
+   for (int k = 0; k < 8; ++k)
+      brw_LRP(&func,
+              vec8(PIXEL(0, k)),
+              offset(y_frac, k & 1),
+              vec8(PIXEL(2, k)),
+              vec8(PIXEL(0, k)));
+   brw_set_access_mode(&func, BRW_ALIGN_1);
+#undef PIXEL
+}
+
 /**
  * Emit code to look up a value in the texture using the SAMPLE message (which
  * does blending of MSAA surfaces).
@@ -2050,6 +2131,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
                                              GLfloat src_x1, GLfloat src_y1,
                                              GLfloat dst_x0, GLfloat dst_y0,
                                              GLfloat dst_x1, GLfloat dst_y1,
+                                             GLenum filter,
                                              bool mirror_x, bool mirror_y)
 {
    struct gl_context *ctx = &brw->ctx;
@@ -2058,7 +2140,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
    src.set(brw, src_mt, src_level, src_layer);
    dst.set(brw, dst_mt, dst_level, dst_layer);
 
-   src.brw_surfaceformat = dst.brw_surfaceformat;
+   if (src.num_samples > 1)
+      src.brw_surfaceformat = dst.brw_surfaceformat;
+   else
+      dst.brw_surfaceformat = src.brw_surfaceformat;
 
    use_wm_prog = true;
    memset(&wm_prog_key, 0, sizeof(wm_prog_key));
@@ -2123,11 +2208,19 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
       ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
        (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
 
-   /* Scaling factors used for bilinear filtering in multisample scaled
-    * blits.
+   /* Scaling factors used for bilinear filtering in single-sample/multisample
+    * scaled blits.
     */
-   wm_prog_key.x_scale = 2.0;
-   wm_prog_key.y_scale = src_mt->num_samples / 2.0;
+   wm_prog_key.x_scale = 1.0;
+   wm_prog_key.y_scale = 1.0;
+   if (wm_prog_key.blit_scaled && src_mt->num_samples > 0) {
+      wm_prog_key.x_scale = 2.0;
+      wm_prog_key.y_scale = src_mt->num_samples / 2.0;
+   }
+
+   /* bilinear filtering or not */
+   if (filter == GL_LINEAR)
+      wm_prog_key.bilinear_filter = true;
 
    /* The render path must be configured to use the same number of samples as
     * the destination buffer.
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index d6643ca..de5f8f2 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,
                            width, height,
                            dst_x0, dst_y0,
                            width, height,
-                           false, false /*mirror x, y*/);
+                           GL_NEAREST, false, false /*mirror x, y*/);
 
    if (src->stencil_mt) {
       brw_blorp_blit_miptrees(brw,
@@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,
                               width, height,
                               dst_x0, dst_y0,
                               width, height,
-                              false, false /*mirror x, y*/);
+                              GL_NEAREST, false, false /*mirror x, y*/);
    }
 }
 
-- 
1.8.1.4