[Mesa-dev] [PATCHv3 3/3] i965/gen7: emulate SIMD16 sample_d with dual SIMD8 sample_d
Chia-I Wu
olvaffe at gmail.com
Wed Oct 16 08:58:11 CEST 2013
From: Chia-I Wu <olv at lunarg.com>
Add fs_visitor::emit_dual_texture_gen7 that emulate SIMD16 sample_d with dual
SIMD8 sample_d on gen7+.
No piglit quick.tests regression on Ivy Bridge and Haswell.
Improved Xonotic with Ultra effects by 6.0209% +/- 0.396586% (N=11) on
Haswell.
v2: no change
v3: reworked because of texture-from-GRF changes
Signed-off-by: Chia-I Wu <olv at lunarg.com>
---
src/mesa/drivers/dri/i965/brw_fs.h | 3 +
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 117 ++++++++++++++++++++++++++-
2 files changed, 118 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c2ba351..05bf39e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -347,6 +347,9 @@ public:
fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
fs_reg sample_index, int sampler);
+ void emit_dual_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
+ fs_reg sample_index, int sampler);
fs_inst *emit_texture(ir_texture *ir, fs_reg dst, fs_reg payload, int mlen,
bool header_present, int regs_written, int sampler);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d164b04..19e3f1e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1394,6 +1394,114 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
return emit_texture(ir, dst, payload, mlen, header_present, 4, sampler);
}
+/* Emulate a SIMD16 sampler message with dual SIMD8 sampler messages. For
+ * now, and for pratical reaons, only ir_txd is supported.
+ */
+void
+fs_visitor::emit_dual_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ fs_reg shadow_c, fs_reg lod, fs_reg lod2,
+ fs_reg sample_index, int sampler)
+{
+ /* no need to emit dual SIMD8 messages */
+ if (dispatch_width != 16 || ir->op != ir_txd) {
+ emit_texture_gen7(ir, dst, coordinate, shadow_c,
+ lod, lod2, sample_index, sampler);
+ return;
+ }
+
+ fs_reg simd8_dst = fs_reg(GRF, virtual_grf_alloc(4),
+ brw_type_for_base_type(ir->type));
+
+#define ADVANCE_HALF(reg) \
+ do { reg.reg_offset += reg.sechalf; reg.sechalf = !reg.sechalf; } while (0)
+
+ for (int msg = 0; msg < 2; msg++) {
+ bool header_present = false;
+ fs_reg payload = fs_reg(this, glsl_type::float_type);
+ fs_reg next = payload;
+
+ if (msg == 0)
+ push_force_uncompressed();
+ else
+ push_force_sechalf();
+
+ /* only txd is supported for now */
+ assert(ir->op == ir_txd);
+
+ if (ir->offset) {
+ /* Need the header to put texture offsets in */
+ header_present = true;
+ ADVANCE_HALF(next);
+ }
+
+ if (ir->shadow_comparitor) {
+ emit(MOV(next, shadow_c));
+ ADVANCE_HALF(next);
+ }
+
+ /* Load dPdx and the coordinate together:
+ * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
+ */
+ fs_reg coord = coordinate, ddx = lod, ddy = lod2;
+ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ emit(MOV(next, coord));
+ coord.reg_offset++;
+ ADVANCE_HALF(next);
+
+ /* For cube map array, the coordinate is (u,v,r,ai) but there are
+ * only derivatives for (u, v, r).
+ */
+ if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
+ emit(MOV(next, ddx));
+ ddx.reg_offset++;
+ ADVANCE_HALF(next);
+
+ emit(MOV(next, ddy));
+ ddy.reg_offset++;
+ ADVANCE_HALF(next);
+ }
+ }
+
+ int mlen = next.reg_offset * 2 + next.sechalf;
+ if (mlen > 11) {
+ fail("Message length >11 disallowed by hardware\n");
+ break;
+ }
+
+ /* Message length is mlen and response length is 4. In vgrf, that means
+ * (mlen + 1) / 2 registers for payload and 2 registers for writeback.
+ */
+ virtual_grf_sizes[payload.reg] = (mlen + 1) / 2;
+ emit_texture(ir, simd8_dst, payload, mlen, header_present, 2, sampler);
+
+ fs_reg d = dst, s = simd8_dst;
+ d.sechalf = (msg == 1);
+
+ /* swizzle the result to match SIMD16 writeback */
+ for (int i = 0; i < 4; i++) {
+ emit(MOV(d, s));
+ d.reg_offset++;
+ ADVANCE_HALF(s);
+ }
+
+ if (msg == 0) {
+ pop_force_uncompressed();
+
+ /* prepare for the second message */
+ simd8_dst.reg_offset += 2;
+ coordinate.sechalf = true;
+ shadow_c.sechalf = true;
+ lod.sechalf = true;
+ lod2.sechalf = true;
+ sample_index.sechalf = true;
+ } else {
+ pop_force_sechalf();
+ }
+ }
+
+#undef ADVANCE_HALF
+}
+
fs_reg
fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
bool is_rect, int sampler, int texunit)
@@ -1586,8 +1694,13 @@ fs_visitor::visit(ir_texture *ir)
fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
if (brw->gen >= 7) {
- emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
- lod, lod2, sample_index, sampler);
+ if (dispatch_width == 16 && ir->op == ir_txd) {
+ emit_dual_texture_gen7(ir, dst, coordinate, shadow_comparitor,
+ lod, lod2, sample_index, sampler);
+ } else {
+ emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
+ lod, lod2, sample_index, sampler);
+ }
} else if (brw->gen >= 5) {
emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
lod, lod2, sample_index, sampler);
--
1.8.3.1
More information about the mesa-dev
mailing list