[Mesa-dev] [PATCH 3/3] i965/blorp: Add support for single sample scaled blit with bilinear filter
Anuj Phogat
anuj.phogat at gmail.com
Wed Aug 7 09:31:25 PDT 2013
On Tue, Aug 6, 2013 at 3:05 PM, Paul Berry <stereotype441 at gmail.com> wrote:
> On 5 August 2013 15:37, Anuj Phogat <anuj.phogat at gmail.com> wrote:
>
>> Currently single sample scaled blits with GL_LINEAR filter falls
>> back to meta path. Patch removes this limitation in BLORP engine
>> and implements single sample scaled blit with bilinear filter.
>> No piglit, gles3 regressions are obeserved with this patch. Piglit
>> test case patches to verify this implementation are out on piglit
>> mailing list.
>>
>
> I'm uncomfortable with the approach taken in this patch, because it
> doesn't make use of the bilinear filtering capability built into the
> sampling hardware.
>
> Back when you were implementing EXT_framebuffer_multisample_blit_scaled,
> there was good reason not to use the sampler's bilinear filtering
> capability--because it doesn't work properly for multisampled textures.
> But for scaled blitting of single-sampled textures it should work fine, and
> in all likelihood it will be faster than doing manual bilinear filtering in
> the shader. Also, there's a higher risk of making mistakes if we manually
> implement bilinear filtering in the shader.
>
> I'd recommend instead using the "sample" message to read from the surface
> when doing GL_LINEAR filtering.
>
> Yes, It'll be more efficient to use inbuilt support for bilinear filtering
in hardware. I'll modify my patch
to use "sample" message.
>
>
>
>>
>> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
>> ---
>> src/mesa/drivers/dri/i965/brw_blorp.h | 7 +-
>> src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 157
>> ++++++++++++++++++++------
>> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 +-
>> 3 files changed, 132 insertions(+), 36 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
>> b/src/mesa/drivers/dri/i965/brw_blorp.h
>> index 49862b8..be40625 100644
>> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
>> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
>> @@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>> float src_x1, float src_y1,
>> float dst_x0, float dst_y0,
>> float dst_x1, float dst_y1,
>> - bool mirror_x, bool mirror_y);
>> + GLenum filter, bool mirror_x, bool mirror_y);
>>
>> bool
>> brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
>> @@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key
>> */
>> float x_scale;
>> float y_scale;
>> +
>> + /* True for single sample scaled blits with linear filter. */
>> + bool bilinear_filter;
>> };
>>
>> class brw_blorp_blit_params : public brw_blorp_params
>> @@ -347,7 +350,7 @@ public:
>> GLfloat src_x1, GLfloat src_y1,
>> GLfloat dst_x0, GLfloat dst_y0,
>> GLfloat dst_x1, GLfloat dst_y1,
>> - bool mirror_x, bool mirror_y);
>> + GLenum filter, bool mirror_x, bool mirror_y);
>>
>> virtual uint32_t get_wm_prog(struct brw_context *brw,
>> brw_blorp_prog_data **prog_data) const;
>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> index 8c0db48..0a28026 100644
>> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> @@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>> float src_x1, float src_y1,
>> float dst_x0, float dst_y0,
>> float dst_x1, float dst_y1,
>> - bool mirror_x, bool mirror_y)
>> + GLenum filter, bool mirror_x, bool mirror_y)
>> {
>> /* Get ready to blit. This includes depth resolving the src and dst
>> * buffers if necessary. Note: it's not necessary to do a color
>> resolve on
>> @@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>> src_x1, src_y1,
>> dst_x0, dst_y0,
>> dst_x1, dst_y1,
>> - mirror_x, mirror_y);
>> + filter, mirror_x, mirror_y);
>> brw_blorp_exec(brw, ¶ms);
>>
>> intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level,
>> dst_layer);
>> @@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
>> buffer_bit,
>> struct intel_renderbuffer *dst_irb,
>> GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
>> GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
>> - bool mirror_x, bool mirror_y)
>> + GLenum filter, bool mirror_x, bool mirror_y)
>> {
>> /* Find source/dst miptrees */
>> struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
>> @@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
>> buffer_bit,
>> dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
>> srcX0, srcY0, srcX1, srcY1,
>> dstX0, dstY0, dstX1, dstY1,
>> - mirror_x, mirror_y);
>> + filter, mirror_x, mirror_y);
>>
>> intel_renderbuffer_set_needs_downsample(dst_irb);
>> }
>> @@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,
>> fixup_mirroring(mirror_y, srcY0, srcY1);
>> fixup_mirroring(mirror_y, dstY0, dstY1);
>>
>> - /* Linear filtering is not yet implemented in blorp engine. So,
>> fallback
>> - * to other blit paths.
>> - */
>> - if ((srcX1 - srcX0 != dstX1 - dstX0 ||
>> - srcY1 - srcY0 != dstY1 - dstY0) &&
>> - filter == GL_LINEAR)
>> - return false;
>> -
>> /* If the destination rectangle needs to be clipped or scissored, do
>> so.
>> */
>> if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
>> @@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,
>> if (dst_irb)
>> do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0,
>> srcY0,
>> srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> - mirror_x, mirror_y);
>> + filter, mirror_x, mirror_y);
>> }
>> break;
>> case GL_DEPTH_BUFFER_BIT:
>> @@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,
>> return false;
>> do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>> srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> - mirror_x, mirror_y);
>> + filter, mirror_x, mirror_y);
>> break;
>> case GL_STENCIL_BUFFER_BIT:
>> src_irb =
>> @@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,
>> return false;
>> do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>> srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> - mirror_x, mirror_y);
>> + filter, mirror_x, mirror_y);
>> break;
>> default:
>> assert(false);
>> @@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>> dst_mt, dst_image->Level, dst_image->Face +
>> slice,
>> srcX0, srcY0, srcX1, srcY1,
>> dstX0, dstY0, dstX1, dstY1,
>> - false, mirror_y);
>> + GL_NEAREST, false, mirror_y);
>>
>> /* If we're copying to a packed depth stencil texture and the source
>> * framebuffer has separate stencil, we need to also copy the stencil
>> data
>> @@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>> dst_image->Face + slice,
>> srcX0, srcY0, srcX1, srcY1,
>> dstX0, dstY0, dstX1, dstY1,
>> - false, mirror_y);
>> + GL_NEAREST, false, mirror_y);
>> }
>> }
>>
>> @@ -637,6 +629,7 @@ private:
>> void single_to_blend();
>> void manual_blend_average(unsigned num_samples);
>> void manual_blend_bilinear(unsigned num_samples);
>> + void single_sample_bilinear_filter(void);
>> void sample(struct brw_reg dst);
>> void texel_fetch(struct brw_reg dst);
>> void mcs_fetch();
>> @@ -873,15 +866,19 @@ brw_blorp_blit_program::compile(struct brw_context
>> *brw,
>> decode_msaa(key->tex_samples, key->tex_layout);
>> }
>>
>> - /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
>> offset)).
>> - *
>> - * In other words: X, Y, and S now contain values which, when
>> passed to
>> - * the texturing unit, will cause data to be read from the correct
>> - * memory location. So we can fetch the texel now.
>> - */
>> - if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> - mcs_fetch();
>> - texel_fetch(texture_data[0]);
>> + if (key->blit_scaled && key->bilinear_filter)
>> + single_sample_bilinear_filter();
>> + else {
>> + /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
>> offset)).
>> + *
>> + * In other words: X, Y, and S now contain values which, when
>> passed to
>> + * the texturing unit, will cause data to be read from the
>> correct
>> + * memory location. So we can fetch the texel now.
>> + */
>> + if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> + mcs_fetch();
>> + texel_fetch(texture_data[0]);
>> + }
>> }
>>
>> /* Finally, write the fetched (or blended) value to the render target
>> and
>> @@ -947,7 +944,7 @@ brw_blorp_blit_program::alloc_regs()
>> reg += 2;
>> }
>>
>> - if (key->blit_scaled && key->blend) {
>> + if (key->blit_scaled) {
>> this->x_sample_coords = brw_vec8_grf(reg, 0);
>> reg += 2;
>> this->y_sample_coords = brw_vec8_grf(reg, 0);
>> @@ -1442,6 +1439,22 @@ brw_blorp_blit_program::translate_dst_to_src()
>> brw_RNDD(&func, Yp_f, Y_f);
>> brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
>> brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
>> + } else if (key->blit_scaled && key->bilinear_filter && !key->blend) {
>> + /* Adjust coordinates so that integers represent pixel centers
>> rather
>> + * than pixel edges.
>> + */
>> + brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5));
>> + brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5));
>> +
>> + /* Store the fractional parts to be used as bilinear interpolation
>> + * coefficients.
>> + */
>> + brw_FRC(&func, x_frac, X_f);
>> + brw_FRC(&func, y_frac, Y_f);
>> +
>> + /* Round the float coordinates down to nearest integer */
>> + brw_MOV(&func, Xp, X_f);
>> + brw_MOV(&func, Yp, Y_f);
>> } else {
>> /* Round the float coordinates down to nearest integer by moving to
>> * UD registers.
>> @@ -1765,6 +1778,74 @@
>> brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
>> #undef SAMPLE
>> }
>>
>> +void
>> +brw_blorp_blit_program::single_sample_bilinear_filter(void)
>> +{
>> + /* Bilinear filtering is performed by following operations:
>> + * - Compute the colors from 2x2 pixels (vec4 c0, vec4 c1, vec4 c2,
>> vec4 c3)
>> + * - linearly interpolate colors c0 and c1 in X
>> + * - linearly interpolate colors c2 and c3 in X
>> + * - linearly interpolate the results of last two operations in Y
>> + *
>> + * result = lrp(lrp(c0 + c1) + lrp(c2 + c3))
>> + */
>> + ASSERT(s_is_zero);
>> + SWAP_XY_AND_XPYP();
>> +
>> + /* Move the X1, Y1 from Float to UD regsiters. */
>> + brw_MOV(&func, vec1(t1), rect_grid_x1);
>> + brw_MOV(&func, vec1(t2), rect_grid_y1);
>> +
>> + for (unsigned i = 0; i < 4; ++i) {
>> + assert(i < ARRAY_SIZE(texture_data));
>> +
>> + /* Compute pixel coordinates */
>> + brw_ADD(&func, vec16(X), Xp, brw_imm_ud(i % 2));
>> + brw_ADD(&func, vec16(Y), Yp, brw_imm_ud(i / 2));
>> +
>> + /* Clamp the X, Y texture coordinates to properly handle the
>> sampling of
>> + * texels on texture edges.
>> + */
>> + clamp_tex_coords(vec16(X), vec16(Y),
>> + brw_imm_ud(0), brw_imm_ud(0),
>> + vec1(t1), vec1(t2));
>> +
>> + /* The MCS value we fetch has to match up with the pixel that we're
>> + * sampling from. Since we sample from different pixels in each
>> + * iteration of this "for" loop, the call to mcs_fetch() should be
>> + * here inside the loop after computing the pixel coordinates.
>> + */
>> + if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> + mcs_fetch();
>> +
>> + texel_fetch(texture_data[i]);
>> + }
>> +
>> +#define PIXEL(x, y) offset(texture_data[x], y)
>> + brw_set_access_mode(&func, BRW_ALIGN_16);
>> + for (int index = 3; index > 0; ) {
>> + /* Since we're doing SIMD16, 4 color channels fits in to 8
>> registers.
>> + * Counter value of 8 in 'for' loop below is used to interpolate
>> all
>> + * the color components.
>> + */
>> + for (int k = 0; k < 8; ++k)
>> + brw_LRP(&func,
>> + vec8(PIXEL(index - 1, k)),
>> + offset(x_frac, k & 1),
>> + PIXEL(index, k),
>> + PIXEL(index - 1, k));
>> + index -= 2;
>> + }
>> + for (int k = 0; k < 8; ++k)
>> + brw_LRP(&func,
>> + vec8(PIXEL(0, k)),
>> + offset(y_frac, k & 1),
>> + vec8(PIXEL(2, k)),
>> + vec8(PIXEL(0, k)));
>> + brw_set_access_mode(&func, BRW_ALIGN_1);
>> +#undef PIXEL
>> +}
>> +
>> /**
>> * Emit code to look up a value in the texture using the SAMPLE message
>> (which
>> * does blending of MSAA surfaces).
>> @@ -2050,6 +2131,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
>> brw_context *brw,
>> GLfloat src_x1, GLfloat
>> src_y1,
>> GLfloat dst_x0, GLfloat
>> dst_y0,
>> GLfloat dst_x1, GLfloat
>> dst_y1,
>> + GLenum filter,
>> bool mirror_x, bool
>> mirror_y)
>> {
>> struct gl_context *ctx = &brw->ctx;
>> @@ -2058,7 +2140,10 @@
>> brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
>> src.set(brw, src_mt, src_level, src_layer);
>> dst.set(brw, dst_mt, dst_level, dst_layer);
>>
>> - src.brw_surfaceformat = dst.brw_surfaceformat;
>> + if (src.num_samples > 1)
>> + src.brw_surfaceformat = dst.brw_surfaceformat;
>> + else
>> + dst.brw_surfaceformat = src.brw_surfaceformat;
>>
>> use_wm_prog = true;
>> memset(&wm_prog_key, 0, sizeof(wm_prog_key));
>> @@ -2123,11 +2208,19 @@
>> brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
>> ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
>> (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
>>
>> - /* Scaling factors used for bilinear filtering in multisample scaled
>> - * blits.
>> + /* Scaling factors used for bilinear filtering in
>> single-sample/multisample
>> + * scaled blits.
>> */
>> - wm_prog_key.x_scale = 2.0;
>> - wm_prog_key.y_scale = src_mt->num_samples / 2.0;
>> + wm_prog_key.x_scale = 1.0;
>> + wm_prog_key.y_scale = 1.0;
>> + if (wm_prog_key.blit_scaled && src_mt->num_samples > 0) {
>> + wm_prog_key.x_scale = 2.0;
>> + wm_prog_key.y_scale = src_mt->num_samples / 2.0;
>> + }
>> +
>> + /* bilinear filtering or not */
>> + if (filter == GL_LINEAR)
>> + wm_prog_key.bilinear_filter = true;
>>
>> /* The render path must be configured to use the same number of
>> samples as
>> * the destination buffer.
>> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> index d6643ca..de5f8f2 100644
>> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> @@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>> width, height,
>> dst_x0, dst_y0,
>> width, height,
>> - false, false /*mirror x, y*/);
>> + GL_NEAREST, false, false /*mirror x, y*/);
>>
>> if (src->stencil_mt) {
>> brw_blorp_blit_miptrees(brw,
>> @@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>> width, height,
>> dst_x0, dst_y0,
>> width, height,
>> - false, false /*mirror x, y*/);
>> + GL_NEAREST, false, false /*mirror x, y*/);
>> }
>> }
>>
>> --
>> 1.8.1.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130807/0ed64dc0/attachment-0001.html>
More information about the mesa-dev
mailing list