[Mesa-dev] [PATCH 3/4] i965/fs: Make emit_shader_time_end() insert before EOT.

Kenneth Graunke kenneth at whitecape.org
Fri Feb 27 00:06:01 PST 2015


Previously, we emitted the shader-time epilogue from emit_fb_writes(),
during the middle of looping through color regions (or emit_urb_writes
for the VS).  This is duplicated several times and rather awkward.

I need to fix a bug in our FB write handling, and it will be a lot
easier if we move emit_shader_time_end() out of there.

Now, we simply emit FB writes/URB writes, and subsequently have
emit_shader_time_end() insert instructions before the final SEND with
EOT.  Not only is this simpler, it's actually a slight improvement:
we now include the MOVs to set up the final FB write payload in our
shader-time measurements.

Note that INTEL_DEBUG=shader_time only exists on Gen7+, and uses
send-from-GRF.  (In the past, we might have hit trouble where both
attempt to use MRFs for messages; that's not a problem now.)

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         | 29 ++++++++++++++++++----------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 13 -------------
 2 files changed, 19 insertions(+), 23 deletions(-)

I verified that INTEL_DEBUG=shader_time produced equivalent results on
anholt's openarena timedemo before and after this patch.

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index faa6f3f..2fd7f46 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -754,7 +754,12 @@ fs_visitor::emit_shader_time_end()
       unreachable("fs_visitor::emit_shader_time_end missing code");
    }
 
-   fs_inst *tm_read = emit(timestamp_read());
+   /* Insert our code just before the final SEND with EOT. */
+   exec_node *end = this->instructions.get_tail();
+   assert(end && ((fs_inst *) end)->eot);
+
+   fs_inst *tm_read = timestamp_read();
+   end->insert_before(tm_read);
    fs_reg shader_end_time = tm_read->dst;
 
    /* Check that there weren't any timestamp reset events (assuming these
@@ -762,26 +767,27 @@ fs_visitor::emit_shader_time_end()
     */
    fs_reg reset = shader_end_time;
    reset.set_smear(2);
-   fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u)));
+   fs_inst *test = AND(reg_null_d, reset, fs_reg(1u));
    test->conditional_mod = BRW_CONDITIONAL_Z;
-   emit(IF(BRW_PREDICATE_NORMAL));
+   end->insert_before(test);
+   end->insert_before(IF(BRW_PREDICATE_NORMAL));
 
    fs_reg start = shader_start_time;
    start.negate = true;
    fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
-   emit(ADD(diff, start, shader_end_time));
+   end->insert_before(ADD(diff, start, shader_end_time));
 
    /* If there were no instructions between the two timestamp gets, the diff
     * is 2 cycles.  Remove that overhead, so I can forget about that when
     * trying to determine the time taken for single instructions.
     */
-   emit(ADD(diff, diff, fs_reg(-2u)));
+   end->insert_before(ADD(diff, diff, fs_reg(-2u)));
 
-   emit(SHADER_TIME_ADD(type, diff));
-   emit(SHADER_TIME_ADD(written_type, fs_reg(1u)));
-   emit(BRW_OPCODE_ELSE);
-   emit(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
-   emit(BRW_OPCODE_ENDIF);
+   end->insert_before(SHADER_TIME_ADD(type, diff));
+   end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u)));
+   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width));
+   end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
+   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width));
 }
 
 fs_inst *
@@ -3913,6 +3919,9 @@ fs_visitor::run_fs()
 
       emit_fb_writes();
 
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+
       calculate_cfg();
 
       optimize();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 13a3bf2..77e1103 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3648,9 +3648,6 @@ fs_visitor::emit_fb_writes()
 
    fs_inst *inst;
    if (do_dual_src) {
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-         emit_shader_time_end();
-
       this->current_annotation = ralloc_asprintf(this->mem_ctx,
 						 "FB dual-source write");
       inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
@@ -3666,19 +3663,12 @@ fs_visitor::emit_fb_writes()
          if (brw->gen >= 6 && key->replicate_alpha && target != 0)
             src0_alpha = offset(outputs[0], 3);
 
-         if (target == key->nr_color_regions - 1 &&
-             (INTEL_DEBUG & DEBUG_SHADER_TIME))
-            emit_shader_time_end();
-
          inst = emit_single_fb_write(this->outputs[target], reg_undef,
                                      src0_alpha,
                                      this->output_components[target]);
          inst->target = target;
       }
    } else {
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-         emit_shader_time_end();
-
       /* Even if there's no color buffers enabled, we still need to send
        * alpha out the pipeline to our null renderbuffer to support
        * alpha-testing, alpha-to-coverage, and so on.
@@ -3889,9 +3879,6 @@ fs_visitor::emit_urb_writes()
       if (length == 8 || last)
          flush = true;
       if (flush) {
-         if (last && (INTEL_DEBUG & DEBUG_SHADER_TIME))
-            emit_shader_time_end();
-
          fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
          fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
                                  BRW_REGISTER_TYPE_F);
-- 
2.2.2



More information about the mesa-dev mailing list