[Mesa-dev] [PATCH 3/3] i965/blorp: Add support for single sample scaled blit with bilinear filter
Paul Berry
stereotype441 at gmail.com
Tue Aug 6 15:05:53 PDT 2013
On 5 August 2013 15:37, Anuj Phogat <anuj.phogat at gmail.com> wrote:
> Currently single sample scaled blits with GL_LINEAR filter falls
> back to meta path. Patch removes this limitation in BLORP engine
> and implements single sample scaled blit with bilinear filter.
> No piglit, gles3 regressions are obeserved with this patch. Piglit
> test case patches to verify this implementation are out on piglit
> mailing list.
>
I'm uncomfortable with the approach taken in this patch, because it doesn't
make use of the bilinear filtering capability built into the sampling
hardware.
Back when you were implementing EXT_framebuffer_multisample_blit_scaled,
there was good reason not to use the sampler's bilinear filtering
capability--because it doesn't work properly for multisampled textures.
But for scaled blitting of single-sampled textures it should work fine, and
in all likelihood it will be faster than doing manual bilinear filtering in
the shader. Also, there's a higher risk of making mistakes if we manually
implement bilinear filtering in the shader.
I'd recommend instead using the "sample" message to read from the surface
when doing GL_LINEAR filtering.
>
> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
> ---
> src/mesa/drivers/dri/i965/brw_blorp.h | 7 +-
> src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 157
> ++++++++++++++++++++------
> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 +-
> 3 files changed, 132 insertions(+), 36 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
> b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 49862b8..be40625 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
> float src_x1, float src_y1,
> float dst_x0, float dst_y0,
> float dst_x1, float dst_y1,
> - bool mirror_x, bool mirror_y);
> + GLenum filter, bool mirror_x, bool mirror_y);
>
> bool
> brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
> @@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key
> */
> float x_scale;
> float y_scale;
> +
> + /* True for single sample scaled blits with linear filter. */
> + bool bilinear_filter;
> };
>
> class brw_blorp_blit_params : public brw_blorp_params
> @@ -347,7 +350,7 @@ public:
> GLfloat src_x1, GLfloat src_y1,
> GLfloat dst_x0, GLfloat dst_y0,
> GLfloat dst_x1, GLfloat dst_y1,
> - bool mirror_x, bool mirror_y);
> + GLenum filter, bool mirror_x, bool mirror_y);
>
> virtual uint32_t get_wm_prog(struct brw_context *brw,
> brw_blorp_prog_data **prog_data) const;
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 8c0db48..0a28026 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
> float src_x1, float src_y1,
> float dst_x0, float dst_y0,
> float dst_x1, float dst_y1,
> - bool mirror_x, bool mirror_y)
> + GLenum filter, bool mirror_x, bool mirror_y)
> {
> /* Get ready to blit. This includes depth resolving the src and dst
> * buffers if necessary. Note: it's not necessary to do a color
> resolve on
> @@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
> src_x1, src_y1,
> dst_x0, dst_y0,
> dst_x1, dst_y1,
> - mirror_x, mirror_y);
> + filter, mirror_x, mirror_y);
> brw_blorp_exec(brw, ¶ms);
>
> intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level,
> dst_layer);
> @@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
> buffer_bit,
> struct intel_renderbuffer *dst_irb,
> GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
> GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
> - bool mirror_x, bool mirror_y)
> + GLenum filter, bool mirror_x, bool mirror_y)
> {
> /* Find source/dst miptrees */
> struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
> @@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
> buffer_bit,
> dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
> srcX0, srcY0, srcX1, srcY1,
> dstX0, dstY0, dstX1, dstY1,
> - mirror_x, mirror_y);
> + filter, mirror_x, mirror_y);
>
> intel_renderbuffer_set_needs_downsample(dst_irb);
> }
> @@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,
> fixup_mirroring(mirror_y, srcY0, srcY1);
> fixup_mirroring(mirror_y, dstY0, dstY1);
>
> - /* Linear filtering is not yet implemented in blorp engine. So,
> fallback
> - * to other blit paths.
> - */
> - if ((srcX1 - srcX0 != dstX1 - dstX0 ||
> - srcY1 - srcY0 != dstY1 - dstY0) &&
> - filter == GL_LINEAR)
> - return false;
> -
> /* If the destination rectangle needs to be clipped or scissored, do
> so.
> */
> if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
> @@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,
> if (dst_irb)
> do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
> srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
> - mirror_x, mirror_y);
> + filter, mirror_x, mirror_y);
> }
> break;
> case GL_DEPTH_BUFFER_BIT:
> @@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,
> return false;
> do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
> srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
> - mirror_x, mirror_y);
> + filter, mirror_x, mirror_y);
> break;
> case GL_STENCIL_BUFFER_BIT:
> src_irb =
> @@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,
> return false;
> do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
> srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
> - mirror_x, mirror_y);
> + filter, mirror_x, mirror_y);
> break;
> default:
> assert(false);
> @@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
> dst_mt, dst_image->Level, dst_image->Face +
> slice,
> srcX0, srcY0, srcX1, srcY1,
> dstX0, dstY0, dstX1, dstY1,
> - false, mirror_y);
> + GL_NEAREST, false, mirror_y);
>
> /* If we're copying to a packed depth stencil texture and the source
> * framebuffer has separate stencil, we need to also copy the stencil
> data
> @@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
> dst_image->Face + slice,
> srcX0, srcY0, srcX1, srcY1,
> dstX0, dstY0, dstX1, dstY1,
> - false, mirror_y);
> + GL_NEAREST, false, mirror_y);
> }
> }
>
> @@ -637,6 +629,7 @@ private:
> void single_to_blend();
> void manual_blend_average(unsigned num_samples);
> void manual_blend_bilinear(unsigned num_samples);
> + void single_sample_bilinear_filter(void);
> void sample(struct brw_reg dst);
> void texel_fetch(struct brw_reg dst);
> void mcs_fetch();
> @@ -873,15 +866,19 @@ brw_blorp_blit_program::compile(struct brw_context
> *brw,
> decode_msaa(key->tex_samples, key->tex_layout);
> }
>
> - /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
> offset)).
> - *
> - * In other words: X, Y, and S now contain values which, when
> passed to
> - * the texturing unit, will cause data to be read from the correct
> - * memory location. So we can fetch the texel now.
> - */
> - if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
> - mcs_fetch();
> - texel_fetch(texture_data[0]);
> + if (key->blit_scaled && key->bilinear_filter)
> + single_sample_bilinear_filter();
> + else {
> + /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
> offset)).
> + *
> + * In other words: X, Y, and S now contain values which, when
> passed to
> + * the texturing unit, will cause data to be read from the
> correct
> + * memory location. So we can fetch the texel now.
> + */
> + if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
> + mcs_fetch();
> + texel_fetch(texture_data[0]);
> + }
> }
>
> /* Finally, write the fetched (or blended) value to the render target
> and
> @@ -947,7 +944,7 @@ brw_blorp_blit_program::alloc_regs()
> reg += 2;
> }
>
> - if (key->blit_scaled && key->blend) {
> + if (key->blit_scaled) {
> this->x_sample_coords = brw_vec8_grf(reg, 0);
> reg += 2;
> this->y_sample_coords = brw_vec8_grf(reg, 0);
> @@ -1442,6 +1439,22 @@ brw_blorp_blit_program::translate_dst_to_src()
> brw_RNDD(&func, Yp_f, Y_f);
> brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
> brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
> + } else if (key->blit_scaled && key->bilinear_filter && !key->blend) {
> + /* Adjust coordinates so that integers represent pixel centers
> rather
> + * than pixel edges.
> + */
> + brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5));
> + brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5));
> +
> + /* Store the fractional parts to be used as bilinear interpolation
> + * coefficients.
> + */
> + brw_FRC(&func, x_frac, X_f);
> + brw_FRC(&func, y_frac, Y_f);
> +
> + /* Round the float coordinates down to nearest integer */
> + brw_MOV(&func, Xp, X_f);
> + brw_MOV(&func, Yp, Y_f);
> } else {
> /* Round the float coordinates down to nearest integer by moving to
> * UD registers.
> @@ -1765,6 +1778,74 @@
> brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
> #undef SAMPLE
> }
>
> +void
> +brw_blorp_blit_program::single_sample_bilinear_filter(void)
> +{
> + /* Bilinear filtering is performed by following operations:
> + * - Compute the colors from 2x2 pixels (vec4 c0, vec4 c1, vec4 c2,
> vec4 c3)
> + * - linearly interpolate colors c0 and c1 in X
> + * - linearly interpolate colors c2 and c3 in X
> + * - linearly interpolate the results of last two operations in Y
> + *
> + * result = lrp(lrp(c0 + c1) + lrp(c2 + c3))
> + */
> + ASSERT(s_is_zero);
> + SWAP_XY_AND_XPYP();
> +
> + /* Move the X1, Y1 from Float to UD regsiters. */
> + brw_MOV(&func, vec1(t1), rect_grid_x1);
> + brw_MOV(&func, vec1(t2), rect_grid_y1);
> +
> + for (unsigned i = 0; i < 4; ++i) {
> + assert(i < ARRAY_SIZE(texture_data));
> +
> + /* Compute pixel coordinates */
> + brw_ADD(&func, vec16(X), Xp, brw_imm_ud(i % 2));
> + brw_ADD(&func, vec16(Y), Yp, brw_imm_ud(i / 2));
> +
> + /* Clamp the X, Y texture coordinates to properly handle the
> sampling of
> + * texels on texture edges.
> + */
> + clamp_tex_coords(vec16(X), vec16(Y),
> + brw_imm_ud(0), brw_imm_ud(0),
> + vec1(t1), vec1(t2));
> +
> + /* The MCS value we fetch has to match up with the pixel that we're
> + * sampling from. Since we sample from different pixels in each
> + * iteration of this "for" loop, the call to mcs_fetch() should be
> + * here inside the loop after computing the pixel coordinates.
> + */
> + if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
> + mcs_fetch();
> +
> + texel_fetch(texture_data[i]);
> + }
> +
> +#define PIXEL(x, y) offset(texture_data[x], y)
> + brw_set_access_mode(&func, BRW_ALIGN_16);
> + for (int index = 3; index > 0; ) {
> + /* Since we're doing SIMD16, 4 color channels fits in to 8
> registers.
> + * Counter value of 8 in 'for' loop below is used to interpolate all
> + * the color components.
> + */
> + for (int k = 0; k < 8; ++k)
> + brw_LRP(&func,
> + vec8(PIXEL(index - 1, k)),
> + offset(x_frac, k & 1),
> + PIXEL(index, k),
> + PIXEL(index - 1, k));
> + index -= 2;
> + }
> + for (int k = 0; k < 8; ++k)
> + brw_LRP(&func,
> + vec8(PIXEL(0, k)),
> + offset(y_frac, k & 1),
> + vec8(PIXEL(2, k)),
> + vec8(PIXEL(0, k)));
> + brw_set_access_mode(&func, BRW_ALIGN_1);
> +#undef PIXEL
> +}
> +
> /**
> * Emit code to look up a value in the texture using the SAMPLE message
> (which
> * does blending of MSAA surfaces).
> @@ -2050,6 +2131,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
> brw_context *brw,
> GLfloat src_x1, GLfloat
> src_y1,
> GLfloat dst_x0, GLfloat
> dst_y0,
> GLfloat dst_x1, GLfloat
> dst_y1,
> + GLenum filter,
> bool mirror_x, bool mirror_y)
> {
> struct gl_context *ctx = &brw->ctx;
> @@ -2058,7 +2140,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
> brw_context *brw,
> src.set(brw, src_mt, src_level, src_layer);
> dst.set(brw, dst_mt, dst_level, dst_layer);
>
> - src.brw_surfaceformat = dst.brw_surfaceformat;
> + if (src.num_samples > 1)
> + src.brw_surfaceformat = dst.brw_surfaceformat;
> + else
> + dst.brw_surfaceformat = src.brw_surfaceformat;
>
> use_wm_prog = true;
> memset(&wm_prog_key, 0, sizeof(wm_prog_key));
> @@ -2123,11 +2208,19 @@
> brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
> ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
> (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
>
> - /* Scaling factors used for bilinear filtering in multisample scaled
> - * blits.
> + /* Scaling factors used for bilinear filtering in
> single-sample/multisample
> + * scaled blits.
> */
> - wm_prog_key.x_scale = 2.0;
> - wm_prog_key.y_scale = src_mt->num_samples / 2.0;
> + wm_prog_key.x_scale = 1.0;
> + wm_prog_key.y_scale = 1.0;
> + if (wm_prog_key.blit_scaled && src_mt->num_samples > 0) {
> + wm_prog_key.x_scale = 2.0;
> + wm_prog_key.y_scale = src_mt->num_samples / 2.0;
> + }
> +
> + /* bilinear filtering or not */
> + if (filter == GL_LINEAR)
> + wm_prog_key.bilinear_filter = true;
>
> /* The render path must be configured to use the same number of
> samples as
> * the destination buffer.
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index d6643ca..de5f8f2 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,
> width, height,
> dst_x0, dst_y0,
> width, height,
> - false, false /*mirror x, y*/);
> + GL_NEAREST, false, false /*mirror x, y*/);
>
> if (src->stencil_mt) {
> brw_blorp_blit_miptrees(brw,
> @@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,
> width, height,
> dst_x0, dst_y0,
> width, height,
> - false, false /*mirror x, y*/);
> + GL_NEAREST, false, false /*mirror x, y*/);
> }
> }
>
> --
> 1.8.1.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130806/d995e317/attachment-0001.html>
More information about the mesa-dev
mailing list