[Mesa-dev] [PATCH 09/41] SQUASH: i965/fs: Change regs_written to be actual hardware registers

Jason Ekstrand jason at jlekstrand.net
Sat Sep 20 10:22:58 PDT 2014


---
 src/mesa/drivers/dri/i965/brw_fs.cpp               | 27 +++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       | 19 ++++++++-------
 .../drivers/dri/i965/brw_schedule_instructions.cpp |  7 +++---
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 33214fd..9656081 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -65,7 +65,23 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources)
    this->conditional_mod = BRW_CONDITIONAL_NONE;
 
    /* This will be the case for almost all instructions. */
-   this->regs_written = 1;
+   switch (dst.file) {
+   case GRF:
+   case HW_REG:
+   case MRF:
+      this->regs_written = (dst.width * dst.stride * type_sz(dst.type) + 31) / 32;
+      break;
+   case BAD_FILE:
+      this->regs_written = 0;
+      break;
+   case IMM:
+   case UNIFORM:
+      unreachable("Invalid destination register file");
+      break;
+   default:
+      unreachable("Invalid register file");
+      break;
+   }
 
    this->writes_accumulator = false;
 }
@@ -2346,7 +2362,7 @@ fs_visitor::compute_to_mrf()
             /* Things returning more than one register would need us to
              * understand coalescing out more than one MOV at a time.
              */
-            if (scan_inst->regs_written > 1)
+            if (scan_inst->regs_written > scan_inst->dst.width / 8)
                break;
 
 	    /* SEND instructions can't have MRF as a destination. */
@@ -2605,8 +2621,7 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
 void
 fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 {
-   int reg_size = dispatch_width / 8;
-   int write_len = inst->regs_written * reg_size;
+   int write_len = inst->regs_written;
    int first_write_grf = inst->dst.reg;
    bool needs_dep[BRW_MAX_MRF];
    assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2648,7 +2663,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
        */
       if (scan_inst->dst.file == GRF) {
          for (int i = 0; i < scan_inst->regs_written; i++) {
-            int reg = scan_inst->dst.reg + i * reg_size;
+            int reg = scan_inst->dst.reg + i;
 
             if (reg >= first_write_grf &&
                 reg < first_write_grf + write_len &&
@@ -2686,7 +2701,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 void
 fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
 {
-   int write_len = inst->regs_written * dispatch_width / 8;
+   int write_len = inst->regs_written;
    int first_write_grf = inst->dst.reg;
    bool needs_dep[BRW_MAX_MRF];
    assert(write_len < (int)sizeof(needs_dep) - 1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 35dc318..dae8f25 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -285,7 +285,8 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
     * src, just set the saturate flag instead of emmitting a separate mov.
     */
    fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (modify && modify->regs_written == 1 && modify->can_do_saturate()) {
+   if (modify && modify->regs_written == modify->dst.width / 8 &&
+       modify->can_do_saturate()) {
       modify->saturate = true;
       this->result = src;
       return true;
@@ -1429,7 +1430,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
    inst->header_present = header_present;
-   inst->regs_written = 4;
+   inst->regs_written = 4 * reg_width;
 
    if (mlen > MAX_SAMPLER_MESSAGE_SIZE) {
       fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
@@ -1645,7 +1646,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    else
       inst->mlen = length * reg_width;
    inst->header_present = header_present;
-   inst->regs_written = 4;
+   inst->regs_written = 4 * reg_width;
 
    if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
       fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
@@ -1793,9 +1794,10 @@ fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, fs_reg sampler)
    inst->base_mrf = -1;
    inst->mlen = length * reg_width;
    inst->header_present = false;
-   inst->regs_written = 4; /* we only care about one reg of response,
-                            * but the sampler always writes 4/8
-                            */
+   inst->regs_written = 4 * reg_width; /* we only care about one reg of
+                                        * response, but the sampler always
+                                        * writes 4/8
+                                        */
 
    return dest;
 }
@@ -1966,14 +1968,15 @@ fs_visitor::visit(ir_texture *ir)
          emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
 
          fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
-         for (int i = 0; i < inst->regs_written; i++) {
+         int components = inst->regs_written / (dst.width / 8);
+         for (int i = 0; i < components; i++) {
             if (i == 2) {
                fixed_payload[i] = fixed_depth;
             } else {
                fixed_payload[i] = offset(dst, i);
             }
          }
-         emit(LOAD_PAYLOAD(dst, fixed_payload, inst->regs_written));
+         emit(LOAD_PAYLOAD(dst, fixed_payload, components));
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 86d16ab..a9fa6eb 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -834,7 +834,7 @@ fs_instruction_scheduler::calculate_deps()
       /* write-after-write deps. */
       if (inst->dst.file == GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written * reg_width; r++) {
+            for (int r = 0; r < inst->regs_written; r++) {
                add_dep(last_grf_write[inst->dst.reg + r], n);
                last_grf_write[inst->dst.reg + r] = n;
             }
@@ -964,7 +964,7 @@ fs_instruction_scheduler::calculate_deps()
        */
       if (inst->dst.file == GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written * reg_width; r++)
+            for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.reg + r] = n;
          } else {
             for (int r = 0; r < inst->regs_written; r++) {
@@ -1287,7 +1287,8 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
                 * single-result send is probably actually reducing register
                 * pressure.
                 */
-               if (inst->regs_written <= 1 && chosen_inst->regs_written > 1) {
+               if (inst->regs_written <= inst->dst.width / 8 &&
+                   chosen_inst->regs_written > chosen_inst->dst.width / 8) {
                   chosen = n;
                   continue;
                } else if (inst->regs_written > chosen_inst->regs_written) {
-- 
2.1.0



More information about the mesa-dev mailing list