[Mesa-dev] [PATCH V3 1/2] i965/blorp: Add bilinear filtering of samples for multisample scaled blits
Anuj Phogat
anuj.phogat at gmail.com
Wed Jun 19 19:45:30 PDT 2013
Current implementation of ext_framebuffer_multisample_blit_scaled in
i965/blorp uses nearest filtering for multisample scaled blits. Using
nearest filtering produces blocky artifacts and negates the benefits
of MSAA. That is the reason why extension was not enabled on i965.
This patch implements the bilinear filtering of samples in blorp engine.
Images generated with this patch are free from blocky artifacts and show
big improvement in visual quality.
Observed no piglit and gles3 regressions.
V3:
- Algorithm used for filtering assumes a rectangular grid of samples
roughly corresponding to sample locations.
- Test the boundary conditions on the edges of texture.
Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
---
src/mesa/drivers/dri/i965/brw_blorp.h | 11 ++
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 257 +++++++++++++++++++++++++--
2 files changed, 258 insertions(+), 10 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index ffc27cc..0a15b89 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -319,6 +319,17 @@ struct brw_blorp_blit_prog_key
* than one sample per pixel.
*/
bool persample_msaa_dispatch;
+
+ /* True for scaled blitting. */
+ bool blit_scaled;
+
+ /* Source rectangle dimensions. Used to test boundary conditions in shader
+ * program.
+ */
+ float src_x0;
+ float src_y0;
+ float src_x1;
+ float src_y1;
};
class brw_blorp_blit_params : public brw_blorp_params
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 8694128..e99c9df 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -622,7 +622,8 @@ private:
void kill_if_outside_dst_rect();
void translate_dst_to_src();
void single_to_blend();
- void manual_blend(unsigned num_samples);
+ void manual_blend_average(unsigned num_samples);
+ void manual_blend_linear(unsigned num_samples);
void sample(struct brw_reg dst);
void texel_fetch(struct brw_reg dst);
void mcs_fetch();
@@ -676,6 +677,16 @@ private:
*/
struct brw_reg y_coords[2];
+ /* X, Y coordinates of the pixel from which we need to fetch the specific
+ * sample. These are used for multisample scaled blitting.
+ */
+ struct brw_reg x_sample_coords;
+ struct brw_reg y_sample_coords;
+
+ /* Store the values to use to interpolate in x and y directions */
+ struct brw_reg x_lerp;
+ struct brw_reg y_lerp;
+
/* Which element of x_coords and y_coords is currently in use.
*/
int xy_coord_index;
@@ -814,15 +825,17 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
* that we want to texture from. Exception: if we are blending, then S is
* irrelevant, because we are going to fetch all samples.
*/
- if (key->blend) {
+ if (key->blend && !key->blit_scaled) {
if (brw->intel.gen == 6) {
/* Gen6 hardware an automatically blend using the SAMPLE message */
single_to_blend();
sample(texture_data[0]);
} else {
/* Gen7+ hardware doesn't automaticaly blend. */
- manual_blend(key->src_samples);
+ manual_blend_average(key->src_samples);
}
+ } else if(key->blend && key->blit_scaled) {
+ manual_blend_linear(key->src_samples);
} else {
/* We aren't blending, which means we just want to fetch a single sample
* from the source surface. The address that we want to fetch from is
@@ -913,6 +926,18 @@ brw_blorp_blit_program::alloc_regs()
= retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
reg += 2;
}
+
+ if (key->blit_scaled && key->blend) {
+ this->x_sample_coords = brw_vec8_grf(reg, 0);
+ reg += 2;
+ this->y_sample_coords = brw_vec8_grf(reg, 0);
+ reg += 2;
+ this->x_lerp = brw_vec8_grf(reg, 0);
+ reg += 2;
+ this->y_lerp = brw_vec8_grf(reg, 0);
+ reg += 2;
+ }
+
this->xy_coord_index = 0;
this->sample_index
= retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
@@ -1368,11 +1393,82 @@ brw_blorp_blit_program::translate_dst_to_src()
brw_MUL(&func, Y_f, Yp_f, y_transform.multiplier);
brw_ADD(&func, X_f, X_f, x_transform.offset);
brw_ADD(&func, Y_f, Y_f, y_transform.offset);
- /* Round the float coordinates down to nearest integer by moving to
- * UD registers.
- */
- brw_MOV(&func, Xp, X_f);
- brw_MOV(&func, Yp, Y_f);
+ if (key->blit_scaled && key->blend) {
+ float x_scale = 2.0;
+ float y_scale = key->src_samples / 2.0;
+ /* Translate coordinates to lay out the samples in a rectangular grid
+ * roughly corresponding to sample locations.
+ */
+ brw_ADD(&func, X_f, X_f, brw_imm_f(-0.25));
+ brw_ADD(&func, Y_f, Y_f, brw_imm_f(-1.0 / key->src_samples));
+ brw_MUL(&func, X_f, X_f, brw_imm_f(x_scale));
+ brw_MUL(&func, Y_f, Y_f, brw_imm_f(y_scale));
+
+ /* Test boundary conditions to properly handle the sampling of texels on
+ * texture edges.
+ */
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
+ X_f, brw_imm_f(2.0 * key->src_x0));
+ brw_IF(&func, BRW_EXECUTE_16);
+ {
+ /* Left Edge in X */
+ brw_MOV(&func, x_lerp, brw_imm_f(1.0));
+ }
+ brw_ELSE(&func);
+ {
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_GE,
+ X_f, brw_imm_f(2.0 * (key->src_x1 - 0.50)));
+ brw_IF(&func, BRW_EXECUTE_16);
+ {
+ /* Right Edge in X */
+ brw_MOV(&func, x_lerp, brw_imm_f(0.0));
+ }
+ brw_ELSE(&func);
+ {
+ /* Store the fractional part to be used as color interpolator */
+ brw_FRC(&func, x_lerp, X_f);
+ }
+ brw_ENDIF(&func);
+ }
+ brw_ENDIF(&func);
+
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
+ Y_f, brw_imm_f((y_scale) * key->src_y0));
+ brw_IF(&func, BRW_EXECUTE_16);
+ {
+ /* Bottom Edge in Y */
+ brw_MOV(&func, y_lerp, brw_imm_f(1.0));
+ }
+ brw_ELSE(&func);
+ {
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_GE, Y_f,
+ brw_imm_f((y_scale) * (key->src_y1 - 1 / y_scale)));
+ brw_IF(&func, BRW_EXECUTE_16);
+ {
+ /* Top Edge in Y */
+ brw_MOV(&func, y_lerp, brw_imm_f(0.0));
+ }
+ brw_ELSE(&func);
+ {
+ /* Store the fractional part to be used as color interpolator */
+ brw_FRC(&func, y_lerp, Y_f);
+ }
+ brw_ENDIF(&func);
+ }
+ brw_ENDIF(&func);
+
+ /* Round the float coordinates down to nearest integer */
+ brw_RNDD(&func, Xp_f, X_f);
+ brw_RNDD(&func, Yp_f, Y_f);
+ brw_MUL(&func, X_f, Xp_f, brw_imm_f(0.5));
+ brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / y_scale));
+ } else {
+ /* Round the float coordinates down to nearest integer by moving to
+ * UD registers.
+ */
+ brw_MOV(&func, Xp, X_f);
+ brw_MOV(&func, Yp, Y_f);
+ }
SWAP_XY_AND_XPYP();
brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
}
@@ -1418,7 +1514,7 @@ inline int count_trailing_one_bits(unsigned value)
void
-brw_blorp_blit_program::manual_blend(unsigned num_samples)
+brw_blorp_blit_program::manual_blend_average(unsigned num_samples)
{
if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
mcs_fetch();
@@ -1523,6 +1619,135 @@ brw_blorp_blit_program::manual_blend(unsigned num_samples)
brw_ENDIF(&func);
}
+void
+brw_blorp_blit_program::manual_blend_linear(unsigned num_samples)
+{
+ if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+ mcs_fetch();
+
+ /* We do this computation by performing the following operations:
+ *
+ * In case of 4x, 8x MSAA:
+ * - Compute the pixel coordinates and sample numbers (a, b, c, d)
+ * which are later used for interpolation
+ * - linearly interpolate samples a and b in X
+ * - linearly interpolate samples c and d in X
+ * - linearly interpolate the results of last two operations in Y
+ *
+ * result = lrp(lrp(a + b) + lrp(c + d))
+ */
+ struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
+ struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
+ struct brw_reg t1_f = retype(t1, BRW_REGISTER_TYPE_F);
+ struct brw_reg t2_f = retype(t2, BRW_REGISTER_TYPE_F);
+
+ for (unsigned i = 0; i < 4; ++i) {
+ assert(i < ARRAY_SIZE(texture_data));
+ s_is_zero = false;
+
+ /* Compute pixel coordinates */
+ brw_ADD(&func, vec16(x_sample_coords), Xp_f,
+ brw_imm_f((float)(i & 0x1) * 0.5));
+ brw_ADD(&func, vec16(y_sample_coords), Yp_f,
+ brw_imm_f((float)(i & 0x2) * (1.0 / num_samples)));
+ brw_MOV(&func, vec16(X), x_sample_coords);
+ brw_MOV(&func, vec16(Y), y_sample_coords);
+
+ /* Compute sample index. In case of 4x MSAA, sample index is
+ * equal to sample number.
+ */
+ brw_FRC(&func, vec16(t1_f), x_sample_coords);
+ brw_FRC(&func, vec16(t2_f), y_sample_coords);
+ brw_MUL(&func, vec16(t1_f), t1_f, brw_imm_f(2.0));
+ brw_MUL(&func, vec16(t2_f), t2_f, brw_imm_f(num_samples));
+ brw_ADD(&func, vec16(t1_f), t1_f, t2_f);
+ brw_MOV(&func, vec16(S), t1_f);
+
+ if (num_samples == 8) {
+ /* Map the sample index to a sample number */
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
+ S, brw_imm_d(4));
+ brw_IF(&func, BRW_EXECUTE_16);
+ {
+ brw_MOV(&func, vec16(t2), brw_imm_d(5));
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+ S, brw_imm_d(1));
+ brw_MOV(&func, vec16(t2), brw_imm_d(2));
+ brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+ S, brw_imm_d(2));
+ brw_MOV(&func, vec16(t2), brw_imm_d(4));
+ brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+ S, brw_imm_d(3));
+ brw_MOV(&func, vec16(t2), brw_imm_d(6));
+ brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+ }
+ brw_ELSE(&func);
+ {
+ brw_MOV(&func, vec16(t2), brw_imm_d(0));
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+ S, brw_imm_d(5));
+ brw_MOV(&func, vec16(t2), brw_imm_d(3));
+ brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+ S, brw_imm_d(6));
+ brw_MOV(&func, vec16(t2), brw_imm_d(7));
+ brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+ S, brw_imm_d(7));
+ brw_MOV(&func, vec16(t2), brw_imm_d(1));
+ brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(&func);
+ brw_MOV(&func, vec16(S), t2);
+ }
+ texel_fetch(texture_data[i]);
+
+ if (i == 0 && key->tex_layout == INTEL_MSAA_LAYOUT_CMS) {
+ /* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface)
+ * suggests an optimization:
+ *
+ * "A simple optimization with probable large return in
+ * performance is to compare the MCS value to zero (indicating
+ * all samples are on sample slice 0), and sample only from
+ * sample slice 0 using ld2dss if MCS is zero."
+ *
+ * Note that in the case where the MCS value is zero, sampling from
+ * sample slice 0 using ld2dss and sampling from sample 0 using
+ * ld2dms are equivalent (since all samples are on sample slice 0).
+ * Since we have already sampled from sample 0, all we need to do is
+ * skip the remaining fetches and averaging if MCS is zero.
+ */
+ brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ mcs_data, brw_imm_ud(0));
+ brw_IF(&func, BRW_EXECUTE_16);
+ }
+ }
+
+#define SAMPLE(x, y) offset(texture_data[x], y)
+ brw_set_access_mode(&func, BRW_ALIGN_16);
+ for (int index = 3; index > 0; ) {
+ for (int k = 0; k < 8; ++k)
+ brw_LRP(&func,
+ vec8(SAMPLE(index - 1, k)),
+ offset(x_lerp, k & 1),
+ SAMPLE(index, k),
+ SAMPLE(index - 1, k));
+ index -= 2;
+ }
+ for (int k = 0; k < 8; ++k)
+ brw_LRP(&func,
+ vec8(SAMPLE(0, k)),
+ offset(y_lerp, k & 1),
+ vec8(SAMPLE(2, k)),
+ vec8(SAMPLE(0, k)));
+ brw_set_access_mode(&func, BRW_ALIGN_1);
+#undef SAMPLE
+ if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+ brw_ENDIF(&func);
+}
+
/**
* Emit code to look up a value in the texture using the SAMPLE message (which
* does blending of MSAA surfaces).
@@ -1535,7 +1760,8 @@ brw_blorp_blit_program::sample(struct brw_reg dst)
SAMPLER_MESSAGE_ARG_V_FLOAT
};
- texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE, args, ARRAY_SIZE(args));
+ texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE, args,
+ ARRAY_SIZE(args));
}
/**
@@ -1872,6 +2098,17 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
wm_prog_key.persample_msaa_dispatch = true;
}
+ /* Scaled blitting or not. */
+ wm_prog_key.blit_scaled =
+ ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
+ (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
+
+ /* Source rectangle dimensions */
+ wm_prog_key.src_x0 = src_x0;
+ wm_prog_key.src_y0 = src_y0;
+ wm_prog_key.src_x1 = src_x1;
+ wm_prog_key.src_y1 = src_y1;
+
/* The render path must be configured to use the same number of samples as
* the destination buffer.
*/
--
1.8.1.4
More information about the mesa-dev
mailing list