Mesa (9.1): i965/fs: Bake regs_written into the IR instead of recomputing it later.

Kenneth Graunke kwg at kemper.freedesktop.org
Wed May 29 22:18:17 UTC 2013


Module: Mesa
Branch: 9.1
Commit: 887aaa9b4d06a6a94692d8a424a3637b91567569
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=887aaa9b4d06a6a94692d8a424a3637b91567569

Author: Eric Anholt <eric at anholt.net>
Date:   Mon Mar 18 11:30:57 2013 -0700

i965/fs: Bake regs_written into the IR instead of recomputing it later.

For sampler messages, it depends on the target gen, and on gen4
SIMD16-sampler-on-SIMD8-execution we were returning 4 instead of 8 like we
should.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
(cherry picked from commit 3cf69b228404791cf15231321b6a18b5701be0a6)

Conflicts:
(We didn't pick over the varying-index changes, so
 FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 returns 1 reg, not 4.)
	src/mesa/drivers/dri/i965/brw_fs.cpp

---

 src/mesa/drivers/dri/i965/brw_fs.cpp               |   29 +++++++-------------
 src/mesa/drivers/dri/i965/brw_fs.h                 |    2 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp           |    6 ++--
 .../drivers/dri/i965/brw_fs_live_variables.cpp     |    2 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |    8 +++---
 .../dri/i965/brw_fs_schedule_instructions.cpp      |    6 ++--
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       |    7 +++-
 7 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3c3b3a1..45de480 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -60,6 +60,9 @@ fs_inst::init()
    this->src[0] = reg_undef;
    this->src[1] = reg_undef;
    this->src[2] = reg_undef;
+
+   /* This will be the case for almost all instructions. */
+   this->regs_written = 1;
 }
 
 fs_inst::fs_inst()
@@ -233,6 +236,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
    if (intel->gen >= 7) {
       inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
                                   dst, surf_index, offset);
+      inst->regs_written = 1;
       instructions.push_tail(inst);
    } else {
       int base_mrf = 13;
@@ -302,26 +306,13 @@ fs_inst::equals(fs_inst *inst)
            offset == inst->offset);
 }
 
-int
-fs_inst::regs_written()
-{
-   if (is_tex())
-      return 4;
-
-   /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
-    * but we don't currently use them...nor do we have an opcode for them.
-    */
-
-   return 1;
-}
-
 bool
 fs_inst::overwrites_reg(const fs_reg &reg)
 {
    return (reg.file == dst.file &&
            reg.reg == dst.reg &&
            reg.reg_offset >= dst.reg_offset  &&
-           reg.reg_offset < dst.reg_offset + regs_written());
+           reg.reg_offset < dst.reg_offset + regs_written);
 }
 
 bool
@@ -1368,7 +1359,7 @@ fs_visitor::split_virtual_grfs()
       /* If there's a SEND message that requires contiguous destination
        * registers, no splitting is allowed.
        */
-      if (inst->regs_written() > 1) {
+      if (inst->regs_written > 1) {
 	 split_grf[inst->dst.reg] = false;
       }
    }
@@ -2094,7 +2085,7 @@ fs_visitor::compute_to_mrf()
             /* Things returning more than one register would need us to
              * understand coalescing out more than one MOV at a time.
              */
-            if (scan_inst->regs_written() > 1)
+            if (scan_inst->regs_written > 1)
                break;
 
 	    /* SEND instructions can't have MRF as a destination. */
@@ -2311,7 +2302,7 @@ void
 fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 {
    int reg_size = dispatch_width / 8;
-   int write_len = inst->regs_written() * reg_size;
+   int write_len = inst->regs_written * reg_size;
    int first_write_grf = inst->dst.reg;
    bool needs_dep[BRW_MAX_MRF];
    assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2351,7 +2342,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
        * dependency has more latency than a MOV.
        */
       if (scan_inst->dst.file == GRF) {
-         for (int i = 0; i < scan_inst->regs_written(); i++) {
+         for (int i = 0; i < scan_inst->regs_written; i++) {
             int reg = scan_inst->dst.reg + i * reg_size;
 
             if (reg >= first_write_grf &&
@@ -2390,7 +2381,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 void
 fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
 {
-   int write_len = inst->regs_written() * dispatch_width / 8;
+   int write_len = inst->regs_written * dispatch_width / 8;
    int first_write_grf = inst->dst.reg;
    bool needs_dep[BRW_MAX_MRF];
    assert(write_len < (int)sizeof(needs_dep) - 1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c776c77..36fd0f2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -174,7 +174,6 @@ public:
            fs_reg src0, fs_reg src1,fs_reg src2);
 
    bool equals(fs_inst *inst);
-   int regs_written();
    bool overwrites_reg(const fs_reg &reg);
    bool is_tex();
    bool is_math();
@@ -192,6 +191,7 @@ public:
    uint8_t flag_subreg;
 
    int mlen; /**< SEND message length */
+   int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
    int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
    uint32_t texture_offset; /**< Texture offset bitfield */
    int sampler;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index ebdfa27..52b7fce 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -127,7 +127,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 	     */
 	    bool no_existing_temp = entry->tmp.file == BAD_FILE;
 	    if (no_existing_temp) {
-               int written = entry->generator->regs_written();
+               int written = entry->generator->regs_written;
 
                fs_reg orig_dst = entry->generator->dst;
                fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
@@ -147,8 +147,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 	    }
 
 	    /* dest <- temp */
-            int written = inst->regs_written();
-            assert(written == entry->generator->regs_written());
+            int written = inst->regs_written;
+            assert(written == entry->generator->regs_written);
             assert(inst->dst.type == entry->tmp.type);
             fs_reg dst = inst->dst;
             fs_reg tmp = entry->tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 4c7991d..1b7f478 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -77,7 +77,7 @@ fs_live_variables::setup_def_use()
 	  * variable, and thus qualify for being in def[].
 	  */
 	 if (inst->dst.file == GRF &&
-	     inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] &&
+	     inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
 	     !inst->predicate &&
 	     !inst->force_uncompressed &&
 	     !inst->force_sechalf) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index d1147f5..2ac7215 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -549,7 +549,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
       }
 
       if (inst->dst.file == GRF) {
-	 spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
+	 spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
 
          if (inst->dst.smear >= 0) {
             no_spill[inst->dst.reg] = true;
@@ -618,7 +618,7 @@ fs_visitor::spill_reg(int spill_reg)
 	  inst->dst.reg == spill_reg) {
          int subset_spill_offset = (spill_offset +
                                     REG_SIZE * inst->dst.reg_offset);
-         inst->dst.reg = virtual_grf_alloc(inst->regs_written());
+         inst->dst.reg = virtual_grf_alloc(inst->regs_written);
          inst->dst.reg_offset = 0;
 
 	 /* If our write is going to affect just part of the
@@ -627,7 +627,7 @@ fs_visitor::spill_reg(int spill_reg)
 	  */
 	 if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
             fs_reg unspill_reg = inst->dst;
-            for (int chan = 0; chan < inst->regs_written(); chan++) {
+            for (int chan = 0; chan < inst->regs_written; chan++) {
                emit_unspill(inst, unspill_reg,
                             subset_spill_offset + REG_SIZE * chan);
                unspill_reg.reg_offset++;
@@ -640,7 +640,7 @@ fs_visitor::spill_reg(int spill_reg)
 	 spill_src.negate = false;
 	 spill_src.smear = -1;
 
-	 for (int chan = 0; chan < inst->regs_written(); chan++) {
+	 for (int chan = 0; chan < inst->regs_written; chan++) {
 	    fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
 						       reg_null_f, spill_src);
 	    spill_src.reg_offset++;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index c125928..0d68e3d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -510,7 +510,7 @@ instruction_scheduler::calculate_deps()
       /* write-after-write deps. */
       if (inst->dst.file == GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++) {
+            for (int r = 0; r < inst->regs_written * reg_width; r++) {
                add_dep(last_grf_write[inst->dst.reg + r], n);
                last_grf_write[inst->dst.reg + r] = n;
             }
@@ -617,7 +617,7 @@ instruction_scheduler::calculate_deps()
        */
       if (inst->dst.file == GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++)
+            for (int r = 0; r < inst->regs_written * reg_width; r++)
                last_grf_write[inst->dst.reg + r] = n;
          } else {
             last_grf_write[inst->dst.reg] = n;
@@ -716,7 +716,7 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
             schedule_node *n = (schedule_node *)node;
 
             chosen = n;
-            if (chosen->inst->regs_written() <= 1)
+            if (chosen->inst->regs_written <= 1)
                break;
          }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 007c8ef..f5ca48e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -237,7 +237,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
     * src, generate a saturated MOV
     */
    fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (!modify || modify->regs_written() != 1) {
+   if (!modify || modify->regs_written != 1) {
       this->result = fs_reg(this, ir->type);
       fs_inst *inst = emit(MOV(this->result, src));
       inst->saturate = true;
@@ -717,7 +717,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
    /* If last_rhs_inst wrote a different number of components than our LHS,
     * we can't safely rewrite it.
     */
-   if (virtual_grf_sizes[dst.reg] != modify->regs_written())
+   if (virtual_grf_sizes[dst.reg] != modify->regs_written)
       return false;
 
    /* Success!  Rewrite the instruction. */
@@ -917,6 +917,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
    inst->header_present = true;
+   inst->regs_written = simd16 ? 8 : 4;
 
    if (simd16) {
       for (int i = 0; i < 4; i++) {
@@ -1046,6 +1047,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
    inst->header_present = header_present;
+   inst->regs_written = 4;
 
    if (mlen > 11) {
       fail("Message length >11 disallowed by hardware\n");
@@ -1176,6 +1178,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
    inst->header_present = header_present;
+   inst->regs_written = 4;
 
    if (mlen > 11) {
       fail("Message length >11 disallowed by hardware\n");




More information about the mesa-commit mailing list