<div dir="ltr">On 5 August 2013 15:37, Anuj Phogat <span dir="ltr"><<a href="mailto:anuj.phogat@gmail.com" target="_blank">anuj.phogat@gmail.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Currently single sample scaled blits with GL_LINEAR filter falls<br>
back to meta path. Patch removes this limitation in BLORP engine<br>
and implements single sample scaled blit with bilinear filter.<br>
No piglit, gles3 regressions are obeserved with this patch. Piglit<br>
test case patches to verify this implementation are out on piglit<br>
mailing list.<br></blockquote><div><br><div>I'm uncomfortable with the approach taken in this patch,
because it doesn't make use of the bilinear filtering capability built
into the sampling hardware.<br><br>Back when you were implementing
EXT_framebuffer_multisample_blit_scaled, there was good reason not to
use the sampler's bilinear filtering capability--because it doesn't work
properly for multisampled textures. But for scaled blitting of
single-sampled textures it should work fine, and in all likelihood it
will be faster than doing manual bilinear filtering in the shader.
Also, there's a higher risk of making mistakes if we manually implement
bilinear filtering in the shader.<br><br></div><div>I'd recommend instead using the "sample" message to read from the surface when doing GL_LINEAR filtering.<br></div><div><br></div><br> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
Signed-off-by: Anuj Phogat <<a href="mailto:anuj.phogat@gmail.com">anuj.phogat@gmail.com</a>><br>
---<br>
src/mesa/drivers/dri/i965/brw_blorp.h | 7 +-<br>
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 157 ++++++++++++++++++++------<br>
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 +-<br>
3 files changed, 132 insertions(+), 36 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h<br>
index 49862b8..be40625 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_blorp.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h<br>
@@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,<br>
float src_x1, float src_y1,<br>
float dst_x0, float dst_y0,<br>
float dst_x1, float dst_y1,<br>
- bool mirror_x, bool mirror_y);<br>
+ GLenum filter, bool mirror_x, bool mirror_y);<br>
<br>
bool<br>
brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,<br>
@@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key<br>
*/<br>
float x_scale;<br>
float y_scale;<br>
+<br>
+ /* True for single sample scaled blits with linear filter. */<br>
+ bool bilinear_filter;<br>
};<br>
<br>
class brw_blorp_blit_params : public brw_blorp_params<br>
@@ -347,7 +350,7 @@ public:<br>
GLfloat src_x1, GLfloat src_y1,<br>
GLfloat dst_x0, GLfloat dst_y0,<br>
GLfloat dst_x1, GLfloat dst_y1,<br>
- bool mirror_x, bool mirror_y);<br>
+ GLenum filter, bool mirror_x, bool mirror_y);<br>
<br>
virtual uint32_t get_wm_prog(struct brw_context *brw,<br>
brw_blorp_prog_data **prog_data) const;<br>
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
index 8c0db48..0a28026 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
@@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,<br>
float src_x1, float src_y1,<br>
float dst_x0, float dst_y0,<br>
float dst_x1, float dst_y1,<br>
- bool mirror_x, bool mirror_y)<br>
+ GLenum filter, bool mirror_x, bool mirror_y)<br>
{<br>
/* Get ready to blit. This includes depth resolving the src and dst<br>
* buffers if necessary. Note: it's not necessary to do a color resolve on<br>
@@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,<br>
src_x1, src_y1,<br>
dst_x0, dst_y0,<br>
dst_x1, dst_y1,<br>
- mirror_x, mirror_y);<br>
+ filter, mirror_x, mirror_y);<br>
brw_blorp_exec(brw, ¶ms);<br>
<br>
intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);<br>
@@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,<br>
struct intel_renderbuffer *dst_irb,<br>
GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,<br>
GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,<br>
- bool mirror_x, bool mirror_y)<br>
+ GLenum filter, bool mirror_x, bool mirror_y)<br>
{<br>
/* Find source/dst miptrees */<br>
struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);<br>
@@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,<br>
dst_mt, dst_irb->mt_level, dst_irb->mt_layer,<br>
srcX0, srcY0, srcX1, srcY1,<br>
dstX0, dstY0, dstX1, dstY1,<br>
- mirror_x, mirror_y);<br>
+ filter, mirror_x, mirror_y);<br>
<br>
intel_renderbuffer_set_needs_downsample(dst_irb);<br>
}<br>
@@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,<br>
fixup_mirroring(mirror_y, srcY0, srcY1);<br>
fixup_mirroring(mirror_y, dstY0, dstY1);<br>
<br>
- /* Linear filtering is not yet implemented in blorp engine. So, fallback<br>
- * to other blit paths.<br>
- */<br>
- if ((srcX1 - srcX0 != dstX1 - dstX0 ||<br>
- srcY1 - srcY0 != dstY1 - dstY0) &&<br>
- filter == GL_LINEAR)<br>
- return false;<br>
-<br>
/* If the destination rectangle needs to be clipped or scissored, do so.<br>
*/<br>
if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,<br>
@@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,<br>
if (dst_irb)<br>
do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,<br>
srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,<br>
- mirror_x, mirror_y);<br>
+ filter, mirror_x, mirror_y);<br>
}<br>
break;<br>
case GL_DEPTH_BUFFER_BIT:<br>
@@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,<br>
return false;<br>
do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,<br>
srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,<br>
- mirror_x, mirror_y);<br>
+ filter, mirror_x, mirror_y);<br>
break;<br>
case GL_STENCIL_BUFFER_BIT:<br>
src_irb =<br>
@@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,<br>
return false;<br>
do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,<br>
srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,<br>
- mirror_x, mirror_y);<br>
+ filter, mirror_x, mirror_y);<br>
break;<br>
default:<br>
assert(false);<br>
@@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,<br>
dst_mt, dst_image->Level, dst_image->Face + slice,<br>
srcX0, srcY0, srcX1, srcY1,<br>
dstX0, dstY0, dstX1, dstY1,<br>
- false, mirror_y);<br>
+ GL_NEAREST, false, mirror_y);<br>
<br>
/* If we're copying to a packed depth stencil texture and the source<br>
* framebuffer has separate stencil, we need to also copy the stencil data<br>
@@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,<br>
dst_image->Face + slice,<br>
srcX0, srcY0, srcX1, srcY1,<br>
dstX0, dstY0, dstX1, dstY1,<br>
- false, mirror_y);<br>
+ GL_NEAREST, false, mirror_y);<br>
}<br>
}<br>
<br>
@@ -637,6 +629,7 @@ private:<br>
void single_to_blend();<br>
void manual_blend_average(unsigned num_samples);<br>
void manual_blend_bilinear(unsigned num_samples);<br>
+ void single_sample_bilinear_filter(void);<br>
void sample(struct brw_reg dst);<br>
void texel_fetch(struct brw_reg dst);<br>
void mcs_fetch();<br>
@@ -873,15 +866,19 @@ brw_blorp_blit_program::compile(struct brw_context *brw,<br>
decode_msaa(key->tex_samples, key->tex_layout);<br>
}<br>
<br>
- /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).<br>
- *<br>
- * In other words: X, Y, and S now contain values which, when passed to<br>
- * the texturing unit, will cause data to be read from the correct<br>
- * memory location. So we can fetch the texel now.<br>
- */<br>
- if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)<br>
- mcs_fetch();<br>
- texel_fetch(texture_data[0]);<br>
+ if (key->blit_scaled && key->bilinear_filter)<br>
+ single_sample_bilinear_filter();<br>
+ else {<br>
+ /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).<br>
+ *<br>
+ * In other words: X, Y, and S now contain values which, when passed to<br>
+ * the texturing unit, will cause data to be read from the correct<br>
+ * memory location. So we can fetch the texel now.<br>
+ */<br>
+ if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)<br>
+ mcs_fetch();<br>
+ texel_fetch(texture_data[0]);<br>
+ }<br>
}<br>
<br>
/* Finally, write the fetched (or blended) value to the render target and<br>
@@ -947,7 +944,7 @@ brw_blorp_blit_program::alloc_regs()<br>
reg += 2;<br>
}<br>
<br>
- if (key->blit_scaled && key->blend) {<br>
+ if (key->blit_scaled) {<br>
this->x_sample_coords = brw_vec8_grf(reg, 0);<br>
reg += 2;<br>
this->y_sample_coords = brw_vec8_grf(reg, 0);<br>
@@ -1442,6 +1439,22 @@ brw_blorp_blit_program::translate_dst_to_src()<br>
brw_RNDD(&func, Yp_f, Y_f);<br>
brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));<br>
brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));<br>
+ } else if (key->blit_scaled && key->bilinear_filter && !key->blend) {<br>
+ /* Adjust coordinates so that integers represent pixel centers rather<br>
+ * than pixel edges.<br>
+ */<br>
+ brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5));<br>
+ brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5));<br>
+<br>
+ /* Store the fractional parts to be used as bilinear interpolation<br>
+ * coefficients.<br>
+ */<br>
+ brw_FRC(&func, x_frac, X_f);<br>
+ brw_FRC(&func, y_frac, Y_f);<br>
+<br>
+ /* Round the float coordinates down to nearest integer */<br>
+ brw_MOV(&func, Xp, X_f);<br>
+ brw_MOV(&func, Yp, Y_f);<br>
} else {<br>
/* Round the float coordinates down to nearest integer by moving to<br>
* UD registers.<br>
@@ -1765,6 +1778,74 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)<br>
#undef SAMPLE<br>
}<br>
<br>
+void<br>
+brw_blorp_blit_program::single_sample_bilinear_filter(void)<br>
+{<br>
+ /* Bilinear filtering is performed by following operations:<br>
+ * - Compute the colors from 2x2 pixels (vec4 c0, vec4 c1, vec4 c2, vec4 c3)<br>
+ * - linearly interpolate colors c0 and c1 in X<br>
+ * - linearly interpolate colors c2 and c3 in X<br>
+ * - linearly interpolate the results of last two operations in Y<br>
+ *<br>
+ * result = lrp(lrp(c0 + c1) + lrp(c2 + c3))<br>
+ */<br>
+ ASSERT(s_is_zero);<br>
+ SWAP_XY_AND_XPYP();<br>
+<br>
+ /* Move the X1, Y1 from Float to UD regsiters. */<br>
+ brw_MOV(&func, vec1(t1), rect_grid_x1);<br>
+ brw_MOV(&func, vec1(t2), rect_grid_y1);<br>
+<br>
+ for (unsigned i = 0; i < 4; ++i) {<br>
+ assert(i < ARRAY_SIZE(texture_data));<br>
+<br>
+ /* Compute pixel coordinates */<br>
+ brw_ADD(&func, vec16(X), Xp, brw_imm_ud(i % 2));<br>
+ brw_ADD(&func, vec16(Y), Yp, brw_imm_ud(i / 2));<br>
+<br>
+ /* Clamp the X, Y texture coordinates to properly handle the sampling of<br>
+ * texels on texture edges.<br>
+ */<br>
+ clamp_tex_coords(vec16(X), vec16(Y),<br>
+ brw_imm_ud(0), brw_imm_ud(0),<br>
+ vec1(t1), vec1(t2));<br>
+<br>
+ /* The MCS value we fetch has to match up with the pixel that we're<br>
+ * sampling from. Since we sample from different pixels in each<br>
+ * iteration of this "for" loop, the call to mcs_fetch() should be<br>
+ * here inside the loop after computing the pixel coordinates.<br>
+ */<br>
+ if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)<br>
+ mcs_fetch();<br>
+<br>
+ texel_fetch(texture_data[i]);<br>
+ }<br>
+<br>
+#define PIXEL(x, y) offset(texture_data[x], y)<br>
+ brw_set_access_mode(&func, BRW_ALIGN_16);<br>
+ for (int index = 3; index > 0; ) {<br>
+ /* Since we're doing SIMD16, 4 color channels fits in to 8 registers.<br>
+ * Counter value of 8 in 'for' loop below is used to interpolate all<br>
+ * the color components.<br>
+ */<br>
+ for (int k = 0; k < 8; ++k)<br>
+ brw_LRP(&func,<br>
+ vec8(PIXEL(index - 1, k)),<br>
+ offset(x_frac, k & 1),<br>
+ PIXEL(index, k),<br>
+ PIXEL(index - 1, k));<br>
+ index -= 2;<br>
+ }<br>
+ for (int k = 0; k < 8; ++k)<br>
+ brw_LRP(&func,<br>
+ vec8(PIXEL(0, k)),<br>
+ offset(y_frac, k & 1),<br>
+ vec8(PIXEL(2, k)),<br>
+ vec8(PIXEL(0, k)));<br>
+ brw_set_access_mode(&func, BRW_ALIGN_1);<br>
+#undef PIXEL<br>
+}<br>
+<br>
/**<br>
* Emit code to look up a value in the texture using the SAMPLE message (which<br>
* does blending of MSAA surfaces).<br>
@@ -2050,6 +2131,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,<br>
GLfloat src_x1, GLfloat src_y1,<br>
GLfloat dst_x0, GLfloat dst_y0,<br>
GLfloat dst_x1, GLfloat dst_y1,<br>
+ GLenum filter,<br>
bool mirror_x, bool mirror_y)<br>
{<br>
struct gl_context *ctx = &brw->ctx;<br>
@@ -2058,7 +2140,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,<br>
src.set(brw, src_mt, src_level, src_layer);<br>
dst.set(brw, dst_mt, dst_level, dst_layer);<br>
<br>
- src.brw_surfaceformat = dst.brw_surfaceformat;<br>
+ if (src.num_samples > 1)<br>
+ src.brw_surfaceformat = dst.brw_surfaceformat;<br>
+ else<br>
+ dst.brw_surfaceformat = src.brw_surfaceformat;<br>
<br>
use_wm_prog = true;<br>
memset(&wm_prog_key, 0, sizeof(wm_prog_key));<br>
@@ -2123,11 +2208,19 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,<br>
((dst_x1 - dst_x0) == (src_x1 - src_x0) &&<br>
(dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;<br>
<br>
- /* Scaling factors used for bilinear filtering in multisample scaled<br>
- * blits.<br>
+ /* Scaling factors used for bilinear filtering in single-sample/multisample<br>
+ * scaled blits.<br>
*/<br>
- wm_prog_key.x_scale = 2.0;<br>
- wm_prog_key.y_scale = src_mt->num_samples / 2.0;<br>
+ wm_prog_key.x_scale = 1.0;<br>
+ wm_prog_key.y_scale = 1.0;<br>
+ if (wm_prog_key.blit_scaled && src_mt->num_samples > 0) {<br>
+ wm_prog_key.x_scale = 2.0;<br>
+ wm_prog_key.y_scale = src_mt->num_samples / 2.0;<br>
+ }<br>
+<br>
+ /* bilinear filtering or not */<br>
+ if (filter == GL_LINEAR)<br>
+ wm_prog_key.bilinear_filter = true;<br>
<br>
/* The render path must be configured to use the same number of samples as<br>
* the destination buffer.<br>
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c<br>
index d6643ca..de5f8f2 100644<br>
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c<br>
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c<br>
@@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,<br>
width, height,<br>
dst_x0, dst_y0,<br>
width, height,<br>
- false, false /*mirror x, y*/);<br>
+ GL_NEAREST, false, false /*mirror x, y*/);<br>
<br>
if (src->stencil_mt) {<br>
brw_blorp_blit_miptrees(brw,<br>
@@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,<br>
width, height,<br>
dst_x0, dst_y0,<br>
width, height,<br>
- false, false /*mirror x, y*/);<br>
+ GL_NEAREST, false, false /*mirror x, y*/);<br>
}<br>
}<br>
<span class=""><font color="#888888"><br>
--<br>
1.8.1.4<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>