[Mesa-dev] [PATCH 2/2] i965/blorp: use BRW_COMPRESSION_2NDHALF for second half LPR
Topi Pohjolainen
topi.pohjolainen at intel.com
Tue Jan 21 08:07:52 PST 2014
No known bugs fixed but this is now in line with fs-generator.
No regresssions on IVB.
CC: Paul Berry <stereotype441 at gmail.com>
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 46 ++++++++++++++--------
.../drivers/dri/i965/test_blorp_blit_eu_gen.cpp | 24 +++++------
2 files changed, 42 insertions(+), 28 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index e2c2562..3b92c56 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -658,6 +658,11 @@ private:
const sampler_message_arg *args, int num_args);
void render_target_write();
+ void emit_lrp(const struct brw_reg &dst,
+ const struct brw_reg &src1,
+ const struct brw_reg &src2,
+ const struct brw_reg &src3);
+
/**
* Base-2 logarithm of the maximum number of samples that can be blended.
*/
@@ -1656,6 +1661,21 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples)
}
void
+brw_blorp_blit_program::emit_lrp(const struct brw_reg &dst,
+ const struct brw_reg &src1,
+ const struct brw_reg &src2,
+ const struct brw_reg &src3)
+{
+ brw_set_access_mode(&func, BRW_ALIGN_16);
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+ brw_LRP(&func, dst, src1, src2, src3);
+ brw_set_compression_control(&func, BRW_COMPRESSION_2NDHALF);
+ brw_LRP(&func, sechalf(dst), sechalf(src1), sechalf(src2), sechalf(src3));
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+ brw_set_access_mode(&func, BRW_ALIGN_1);
+}
+
+void
brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
{
/* We do this computation by performing the following operations:
@@ -1768,29 +1788,23 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
}
#define SAMPLE(x, y) offset(texture_data[x], y)
- brw_set_access_mode(&func, BRW_ALIGN_16);
- brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
for (int index = 3; index > 0; ) {
/* Since we're doing SIMD16, 4 color channels fits in to 8 registers.
* Counter value of 8 in 'for' loop below is used to interpolate all
* the color components.
*/
- for (int k = 0; k < 8; ++k)
- brw_LRP(&func,
- vec8(SAMPLE(index - 1, k)),
- offset(x_frac, k & 1),
- SAMPLE(index, k),
- SAMPLE(index - 1, k));
+ for (int k = 0; k < 8; k += 2)
+ emit_lrp(vec8(SAMPLE(index - 1, k)),
+ x_frac,
+ vec8(SAMPLE(index, k)),
+ vec8(SAMPLE(index - 1, k)));
index -= 2;
}
- for (int k = 0; k < 8; ++k)
- brw_LRP(&func,
- vec8(SAMPLE(0, k)),
- offset(y_frac, k & 1),
- vec8(SAMPLE(2, k)),
- vec8(SAMPLE(0, k)));
- brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
- brw_set_access_mode(&func, BRW_ALIGN_1);
+ for (int k = 0; k < 8; k += 2)
+ emit_lrp(vec8(SAMPLE(0, k)),
+ y_frac,
+ vec8(SAMPLE(2, k)),
+ vec8(SAMPLE(0, k)));
#undef SAMPLE
}
diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index 208959a..9f9e0c2 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -264,29 +264,29 @@ test_gen7_blend_scaled_msaa_8(struct brw_context *brw)
"0x000009f0: send(16) g28<1>UW g114<8,8,1>F\n"
" sampler (0, 0, 31, 2) mlen 6 rlen 8 { align1 WE_normal 1H };\n"
"0x00000a00: lrp(8) g20<1>F g56<4,1,1>F g28<4,1,1>F g20<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000a10: lrp(8) g21<1>F g57<4,1,1>F g29<4,1,1>F g21<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000a10: lrp(8) g21<1>F g57<4,1,1>F g29<4,1,1>F g21<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000a20: lrp(8) g22<1>F g56<4,1,1>F g30<4,1,1>F g22<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000a30: lrp(8) g23<1>F g57<4,1,1>F g31<4,1,1>F g23<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000a30: lrp(8) g23<1>F g57<4,1,1>F g31<4,1,1>F g23<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000a40: lrp(8) g24<1>F g56<4,1,1>F g32<4,1,1>F g24<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000a50: lrp(8) g25<1>F g57<4,1,1>F g33<4,1,1>F g25<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000a50: lrp(8) g25<1>F g57<4,1,1>F g33<4,1,1>F g25<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000a60: lrp(8) g26<1>F g56<4,1,1>F g34<4,1,1>F g26<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000a70: lrp(8) g27<1>F g57<4,1,1>F g35<4,1,1>F g27<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000a70: lrp(8) g27<1>F g57<4,1,1>F g35<4,1,1>F g27<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000a80: lrp(8) g4<1>F g56<4,1,1>F g12<4,1,1>F g4<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000a90: lrp(8) g5<1>F g57<4,1,1>F g13<4,1,1>F g5<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000a90: lrp(8) g5<1>F g57<4,1,1>F g13<4,1,1>F g5<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000aa0: lrp(8) g6<1>F g56<4,1,1>F g14<4,1,1>F g6<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000ab0: lrp(8) g7<1>F g57<4,1,1>F g15<4,1,1>F g7<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000ab0: lrp(8) g7<1>F g57<4,1,1>F g15<4,1,1>F g7<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000ac0: lrp(8) g8<1>F g56<4,1,1>F g16<4,1,1>F g8<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000ad0: lrp(8) g9<1>F g57<4,1,1>F g17<4,1,1>F g9<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000ad0: lrp(8) g9<1>F g57<4,1,1>F g17<4,1,1>F g9<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000ae0: lrp(8) g10<1>F g56<4,1,1>F g18<4,1,1>F g10<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000af0: lrp(8) g11<1>F g57<4,1,1>F g19<4,1,1>F g11<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000af0: lrp(8) g11<1>F g57<4,1,1>F g19<4,1,1>F g11<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000b00: lrp(8) g4<1>F g58<4,1,1>F g20<4,1,1>F g4<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000b10: lrp(8) g5<1>F g59<4,1,1>F g21<4,1,1>F g5<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000b10: lrp(8) g5<1>F g59<4,1,1>F g21<4,1,1>F g5<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000b20: lrp(8) g6<1>F g58<4,1,1>F g22<4,1,1>F g6<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000b30: lrp(8) g7<1>F g59<4,1,1>F g23<4,1,1>F g7<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000b30: lrp(8) g7<1>F g59<4,1,1>F g23<4,1,1>F g7<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000b40: lrp(8) g8<1>F g58<4,1,1>F g24<4,1,1>F g8<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000b50: lrp(8) g9<1>F g59<4,1,1>F g25<4,1,1>F g9<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000b50: lrp(8) g9<1>F g59<4,1,1>F g25<4,1,1>F g9<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000b60: lrp(8) g10<1>F g58<4,1,1>F g26<4,1,1>F g10<4,1,1>F { align16 WE_normal 1Q };\n"
- "0x00000b70: lrp(8) g11<1>F g59<4,1,1>F g27<4,1,1>F g11<4,1,1>F { align16 WE_normal 1Q };\n"
+ "0x00000b70: lrp(8) g11<1>F g59<4,1,1>F g27<4,1,1>F g11<4,1,1>F { align16 WE_normal 2Q };\n"
"0x00000b80: mov(16) g114<1>F g4<8,8,1>F { align1 WE_normal 1H };\n"
"0x00000b90: mov(16) g116<1>F g6<8,8,1>F { align1 WE_normal 1H };\n"
"0x00000ba0: mov(16) g118<1>F g8<8,8,1>F { align1 WE_normal 1H };\n"
--
1.8.3.1
More information about the mesa-dev
mailing list