[Mesa-dev] [PATCH 09/11] i965/fs: Don't emit a header on gen5+ sample messages unless required.

Eric Anholt eric at anholt.net
Thu May 12 21:05:28 PDT 2011


Improves glbenchmark egypt performance 0.6% +/- 0.4% (n=6).
---
 src/mesa/drivers/dri/i965/brw_fs.cpp |   26 +++++++++++++++++++-------
 1 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 126081b..d9ecc6f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1283,6 +1283,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    }
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
+   inst->header_present = true;
 
    if (simd16) {
       for (int i = 0; i < 4; i++) {
@@ -1307,9 +1308,19 @@ fs_inst *
 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 			      int sampler)
 {
-   int mlen = 1; /* g0 header always present. */
-   int base_mrf = 1;
+   int mlen = 0;
+   int base_mrf = 2;
    int reg_width = c->dispatch_width / 8;
+   bool header_present = false;
+
+   if (ir->offset) {
+      /* The TXD offsets set up by the ir_texture visitor are in the
+       * m1 header, so we can't go headerless.
+       */
+      header_present = true;
+      mlen++;
+      base_mrf--;
+   }
 
    for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width),
@@ -1319,7 +1330,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    mlen += ir->coordinate->type->vector_elements * reg_width;
 
    if (ir->shadow_comparitor) {
-      mlen = MAX2(mlen, 1 + 4 * reg_width);
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
 
       ir->shadow_comparitor->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
@@ -1333,7 +1344,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       break;
    case ir_txb:
       ir->lod_info.bias->accept(this);
-      mlen = MAX2(mlen, 1 + 4 * reg_width);
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
 
@@ -1342,7 +1353,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       break;
    case ir_txl:
       ir->lod_info.lod->accept(this);
-      mlen = MAX2(mlen, 1 + 4 * reg_width);
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
 
@@ -1355,6 +1366,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    }
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
+   inst->header_present = header_present;
 
    if (mlen > 11) {
       fail("Message length >11 disallowed by hardware\n");
@@ -1472,7 +1484,7 @@ fs_visitor::visit(ir_texture *ir)
    /* If there's an offset, we already set up m1.  To avoid the implied move,
     * use the null register.  Otherwise, we want an implied move from g0.
     */
-   if (ir->offset != NULL)
+   if (ir->offset != NULL || !inst->header_present)
       inst->src[0] = fs_reg(brw_null_reg());
    else
       inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
@@ -2539,7 +2551,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	      rlen,
 	      inst->mlen,
 	      0,
-	      1,
+	      inst->header_present,
 	      simd_mode);
 }
 
-- 
1.7.5.1



More information about the mesa-dev mailing list