[Mesa-dev] [PATCH RFC 6/6] i965/gen7: emulate SIMD16 sample_d with dual SIMD8 sample_d
Chia-I Wu
olvaffe at gmail.com
Mon Sep 30 01:27:50 PDT 2013
From: Chia-I Wu <olv at lunarg.com>
Add fs_visitor::emit_dual_texture_gen7 that emulate SIMD16 sample_d with dual
SIMD8 sample_d on gen7+. Fix fs_generator::generate_tex to send SIMD8
messages when force_uncompressed or force_sechalf is set.
No piglit quick.tests regression on Ivy Bridge and Haswell.
With this change, I am seeing 6.76479% +/- 0.619064% (at 95.0% confidence)
improvement on Xonotic with Ultra effects.
Signed-off-by: Chia-I Wu <olv at lunarg.com>
---
src/mesa/drivers/dri/i965/brw_fs.h | 3 +
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 137 ++++++++++++++++++++++++++-
2 files changed, 138 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c161e7d..82a0a7d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -335,6 +335,9 @@ public:
fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
fs_reg sample_index, int sampler);
+ void emit_dual_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
+ fs_reg sample_index, int sampler);
fs_inst *emit_texture(ir_texture *ir, fs_reg dst, int base_mrf, int mlen,
bool header_present, int regs_written, int sampler);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6435a17..b9f97b6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1334,6 +1334,133 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
return emit_texture(ir, dst, base_mrf, mlen, header_present, 4, sampler);
}
+/* Emulate a SIMD16 sampler message with dual SIMD8 sampler messages. For
+ * now, and for pratical reaons, only ir_txd is supported.
+ */
+void
+fs_visitor::emit_dual_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ fs_reg shadow_c, fs_reg lod, fs_reg lod2,
+ fs_reg sample_index, int sampler)
+{
+ /* no need to emit dual SIMD8 messages */
+ if (dispatch_width != 16 || ir->op != ir_txd) {
+ emit_texture_gen7(ir, dst, coordinate, shadow_c,
+ lod, lod2, sample_index, sampler);
+ return;
+ }
+
+ const int reg_width = 1;
+ int mlen = 0;
+ int base_mrf = 2;
+ bool header_present = false;
+ fs_reg temp = fs_reg(GRF, virtual_grf_alloc(4),
+ brw_type_for_base_type(ir->type));
+
+ emit(FS_OPCODE_OVERWRITE_DST, dst);
+ emit(FS_OPCODE_OVERWRITE_DST, temp);
+
+ for (int msg = 0; msg < 2; msg++) {
+ if (msg == 0)
+ push_force_uncompressed();
+ else
+ push_force_sechalf();
+
+ /* only txd is supported for now */
+ assert(ir->op == ir_txd);
+
+ if (ir->offset) {
+ /* The offsets set up by the ir_texture visitor are in the
+ * m1 header, so we can't go headerless.
+ */
+ header_present = true;
+ mlen++;
+ base_mrf--;
+ }
+
+ if (ir->shadow_comparitor) {
+ emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
+ mlen += reg_width;
+ }
+
+ /* Load dPdx and the coordinate together:
+ * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
+ */
+ fs_reg coord = coordinate, ddx = lod, ddy = lod2;
+ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ emit(MOV(fs_reg(MRF, base_mrf + mlen), coord));
+ coord.reg_offset++;
+ mlen += reg_width;
+
+ /* For cube map array, the coordinate is (u,v,r,ai) but there are
+ * only derivatives for (u, v, r).
+ */
+ if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
+ emit(MOV(fs_reg(MRF, base_mrf + mlen), ddx));
+ ddx.reg_offset++;
+ mlen += reg_width;
+
+ emit(MOV(fs_reg(MRF, base_mrf + mlen), ddy));
+ ddy.reg_offset++;
+ mlen += reg_width;
+ }
+ }
+
+ if (mlen > 11) {
+ fail("Message length >11 disallowed by hardware\n");
+ break;
+ }
+
+ /* response length is 4, which are 2 vgrf */
+ emit_texture(ir, temp, base_mrf, mlen, header_present, 2, sampler);
+
+ if (msg == 0) {
+ /* move from temp to dst */
+ for (int i = 0; i < 4; i++) {
+ fs_reg d = dst;
+ d.reg_offset += i;
+
+ fs_reg s = temp;
+ s.reg_offset += i / 2;
+ s.sechalf = (i % 2);
+
+ emit(MOV(d, s));
+ }
+
+ pop_force_uncompressed();
+
+ /* use non-overlapping MRF range if possible */
+ if (base_mrf + mlen * 2 < BRW_MAX_MRF)
+ base_mrf += mlen;
+
+ mlen = 0;
+
+ temp.reg_offset += 2;
+
+ coordinate.sechalf = true;
+ shadow_c.sechalf = true;
+ lod.sechalf = true;
+ lod2.sechalf = true;
+ sample_index.sechalf = true;
+ }
+ else {
+ /* move from temp to dst */
+ for (int i = 0; i < 4; i++) {
+ fs_reg d = dst;
+ d.reg_offset += i;
+ d.sechalf = true;
+
+ fs_reg s = temp;
+ s.reg_offset += i / 2;
+ s.sechalf = (i % 2);
+
+ emit(MOV(d, s));
+ }
+
+ pop_force_sechalf();
+ }
+ }
+}
+
fs_reg
fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
bool is_rect, int sampler, int texunit)
@@ -1503,8 +1630,14 @@ fs_visitor::visit(ir_texture *ir)
fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
if (brw->gen >= 7) {
- emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
- lod, lod2, sample_index, sampler);
+ if (dispatch_width == 16 && ir->op == ir_txd) {
+ emit_dual_texture_gen7(ir, dst, coordinate, shadow_comparitor,
+ lod, lod2, sample_index, sampler);
+ }
+ else {
+ emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
+ lod, lod2, sample_index, sampler);
+ }
} else if (brw->gen >= 5) {
emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
lod, lod2, sample_index, sampler);
--
1.8.3.1
More information about the mesa-dev
mailing list