[Mesa-dev] [PATCH 3/8] i965/fs: Switch shader_time writes to using GRFs.

Eric Anholt eric at anholt.net
Tue Mar 19 17:05:58 PDT 2013


This avoids conflicts between shader_time and FB writes, so we can include
more of the program under our profiling.  This does mean hiding more of
the message setup from the optimizer, which doesn't have a way to handle
multi-reg sends from GRFs.
---
 src/mesa/drivers/dri/i965/brw_eu.h          |    2 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c     |    6 ++---
 src/mesa/drivers/dri/i965/brw_fs.cpp        |   29 +++++++++++----------
 src/mesa/drivers/dri/i965/brw_fs.h          |    5 ++++
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp   |   37 ++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |    3 ++-
 6 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 60ce231..6ef1f83 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -305,7 +305,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
 				   GLuint offset);
 
 void brw_shader_time_add(struct brw_compile *p,
-                         int mrf,
+                         struct brw_reg payload,
                          uint32_t surf_index);
 
 /* If/else/endif.  Works by manipulating the execution flags on each
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 992e784..19a3577 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2435,7 +2435,7 @@ brw_svb_write(struct brw_compile *p,
  * messages.
  */
 void brw_shader_time_add(struct brw_compile *p,
-                         int base_mrf,
+                         struct brw_reg payload,
                          uint32_t surf_index)
 {
    struct intel_context *intel = &p->brw->intel;
@@ -2452,8 +2452,8 @@ void brw_shader_time_add(struct brw_compile *p,
     */
    brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
                                       BRW_ARF_NULL, 0));
-   brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                      base_mrf, 0));
+   brw_set_src0(p, send, brw_vec1_reg(payload.file,
+                                      payload.nr, 0));
 
    uint32_t sfid, msg_type;
    if (intel->is_haswell) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7817655..682c3d3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -377,6 +377,7 @@ bool
 fs_inst::is_send_from_grf()
 {
    return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
+           opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
            (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
             src[1].file == GRF));
 }
@@ -607,19 +608,16 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
 {
    int shader_time_index = brw_get_shader_time_index(brw, prog, &fp->Base,
                                                      type);
-   int base_mrf = 6;
+   fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
 
-   fs_reg offset_mrf = fs_reg(MRF, base_mrf);
-   offset_mrf.type = BRW_REGISTER_TYPE_UD;
-   emit(MOV(offset_mrf, fs_reg(shader_time_index * SHADER_TIME_STRIDE)));
-
-   fs_reg time_mrf = fs_reg(MRF, base_mrf + 1);
-   time_mrf.type = BRW_REGISTER_TYPE_UD;
-   emit(MOV(time_mrf, value));
+   fs_reg payload;
+   if (dispatch_width == 8)
+      payload = fs_reg(this, glsl_type::uvec2_type);
+   else
+      payload = fs_reg(this, glsl_type::uint_type);
 
-   fs_inst *inst = emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD));
-   inst->base_mrf = base_mrf;
-   inst->mlen = 2;
+   emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
+                fs_reg(), payload, offset, value));
 }
 
 void
@@ -735,8 +733,6 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case SHADER_OPCODE_TXL:
    case SHADER_OPCODE_TXS:
       return 1;
-   case SHADER_OPCODE_SHADER_TIME_ADD:
-      return 0;
    case FS_OPCODE_FB_WRITE:
       return 2;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
@@ -1359,6 +1355,13 @@ fs_visitor::split_virtual_grfs()
       if (inst->regs_written() > 1) {
 	 split_grf[inst->dst.reg] = false;
       }
+
+      /* If we're sending from a GRF, don't split it, on the assumption that
+       * the send is reading the whole thing.
+       */
+      if (inst->is_send_from_grf()) {
+         split_grf[inst->src[0].reg] = false;
+      }
    }
 
    /* Allocate new space for split regs.  Note that the virtual
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 1fabec4..d9d17a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -559,6 +559,11 @@ private:
                                         struct brw_reg dst,
                                         struct brw_reg src);
 
+   void generate_shader_time_add(fs_inst *inst,
+                                 struct brw_reg payload,
+                                 struct brw_reg offset,
+                                 struct brw_reg value);
+
    void patch_discard_jumps_to_fb_writes();
 
    struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 712fef6..5db481c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -972,6 +972,41 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
 }
 
 void
+fs_generator::generate_shader_time_add(fs_inst *inst,
+                                       struct brw_reg payload,
+                                       struct brw_reg offset,
+                                       struct brw_reg value)
+{
+   assert(intel->gen >= 7);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, true);
+
+   assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+   struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
+                                          offset.type);
+   struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
+                                         value.type);
+
+   assert(offset.file == BRW_IMMEDIATE_VALUE);
+   if (value.file == BRW_GENERAL_REGISTER_FILE) {
+      value.width = BRW_WIDTH_1;
+      value.hstride = BRW_HORIZONTAL_STRIDE_0;
+      value.vstride = BRW_VERTICAL_STRIDE_0;
+   } else {
+      assert(value.file == BRW_IMMEDIATE_VALUE);
+   }
+
+   /* Trying to deal with setup of the params from the IR is crazy in the FS8
+    * case, and we don't really care about squeezing every bit of performance
+    * out of this path, so we just emit the MOVs from here.
+    */
+   brw_MOV(p, payload_offset, offset);
+   brw_MOV(p, payload_value, value);
+   brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME);
+   brw_pop_insn_state(p);
+}
+
+void
 fs_generator::generate_code(exec_list *instructions)
 {
    int last_native_insn_offset = p->next_insn_offset;
@@ -1291,7 +1326,7 @@ fs_generator::generate_code(exec_list *instructions)
          break;
 
       case SHADER_OPCODE_SHADER_TIME_ADD:
-         brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME);
+         generate_shader_time_add(inst, src[0], src[1], src[2]);
          break;
 
       case FS_OPCODE_SET_SIMD4X2_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index b73711c..54f3efd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -674,7 +674,8 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
       break;
 
    case SHADER_OPCODE_SHADER_TIME_ADD:
-      brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_VS_SHADER_TIME);
+      brw_shader_time_add(p, brw_message_reg(inst->base_mrf),
+                          SURF_INDEX_VS_SHADER_TIME);
       break;
 
    default:
-- 
1.7.10.4



More information about the mesa-dev mailing list