Mesa (master): i965/fs: Set up gen7 UBO loads as sends from GRFs.

Eric Anholt anholt at kemper.freedesktop.org
Fri Dec 14 23:25:13 UTC 2012


Module: Mesa
Branch: master
Commit: 461a29783a28e579a9a5a236e5f47ffb6d18a328
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=461a29783a28e579a9a5a236e5f47ffb6d18a328

Author: Eric Anholt <eric at anholt.net>
Date:   Wed Dec  5 00:06:30 2012 -0800

i965/fs: Set up gen7 UBO loads as sends from GRFs.

This gives the instruction scheduler a chance to schedule between the
loads, whereas before it was restricted due to the dependencies between
the MRFs for setting them up.

For one shader in gles3conform, it goes from getting stuck in register
allocation for as long as anybody's bothered to leave it running down
to 23 seconds, thanks to the LIFO scheduling.

Acked-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_defines.h      |    2 +
 src/mesa/drivers/dri/i965/brw_fs.cpp         |    4 +-
 src/mesa/drivers/dri/i965/brw_fs.h           |    8 +++
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp    |   75 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   32 +++++++++--
 5 files changed, 114 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 40571a4..ab206d1 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -676,10 +676,12 @@ enum opcode {
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+   FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
    FS_OPCODE_DISCARD_JUMP,
+   FS_OPCODE_SET_GLOBAL_OFFSET,
 
    VS_OPCODE_URB_WRITE,
    VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9a18410..8312811 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -330,7 +330,9 @@ fs_inst::is_math()
 bool
 fs_inst::is_send_from_grf()
 {
-   return opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
+   return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
+           (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
+            src[1].file == GRF));
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b75314c..8725712 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -529,6 +529,10 @@ private:
    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
                                             struct brw_reg index,
                                             struct brw_reg offset);
+   void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg surf_index,
+                                                 struct brw_reg offset);
    void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
                                             struct brw_reg index);
    void generate_varying_pull_constant_load_gen7(fs_inst *inst,
@@ -536,6 +540,10 @@ private:
                                                  struct brw_reg index,
                                                  struct brw_reg offset);
    void generate_mov_dispatch_to_flags(fs_inst *inst);
+   void generate_set_global_offset(fs_inst *inst,
+                                   struct brw_reg dst,
+                                   struct brw_reg src,
+                                   struct brw_reg offset);
    void generate_discard_jump(fs_inst *inst);
 
    void patch_discard_jumps_to_fb_writes();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 9a89141..63f09fe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -666,6 +666,44 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
 }
 
 void
+fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+                                                       struct brw_reg dst,
+                                                       struct brw_reg index,
+                                                       struct brw_reg offset)
+{
+   assert(inst->mlen == 0);
+
+   assert(index.file == BRW_IMMEDIATE_VALUE &&
+	  index.type == BRW_REGISTER_TYPE_UD);
+   uint32_t surf_index = index.dw1.ud;
+
+   assert(offset.file == BRW_GENERAL_REGISTER_FILE);
+
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_pop_insn_state(p);
+
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, offset);
+   if (intel->gen < 6)
+      send->header.destreg__conditionalmod = inst->base_mrf;
+
+   uint32_t msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+   uint32_t msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
+   bool header_present = true;
+   brw_set_dp_read_message(p, send,
+                           surf_index,
+                           msg_control,
+                           msg_type,
+                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+                           1,
+                           header_present,
+                           1);
+}
+
+void
 fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
                                                   struct brw_reg dst,
                                                   struct brw_reg index)
@@ -852,6 +890,35 @@ brw_reg_from_fs_reg(fs_reg *reg)
    return brw_reg;
 }
 
+/**
+ * Sets the second dword of a vgrf for gen7+ message setup.
+ *
+ * For setting up gen7 messages in VGRFs, we need to be able to set the second
+ * dword for some payloads where in the MRF world we'd have just used
+ * brw_message_reg().  We don't want to bake it into the send message's code
+ * generation because that means we don't get a chance to schedule the
+ * instructions.
+ */
+void
+fs_generator::generate_set_global_offset(fs_inst *inst,
+                                         struct brw_reg dst,
+                                         struct brw_reg src,
+                                         struct brw_reg value)
+{
+   /* We use a matching src and dst to get the information on how this
+    * instruction works exposed to various optimization passes that would
+    * otherwise treat it as completely overwriting the dst.
+    */
+   assert(src.file == dst.file && src.nr == dst.nr);
+   assert(value.file == BRW_IMMEDIATE_VALUE);
+
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 2), value.type), value);
+   brw_pop_insn_state(p);
+}
+
 void
 fs_generator::generate_code(exec_list *instructions)
 {
@@ -1127,6 +1194,10 @@ fs_generator::generate_code(exec_list *instructions)
 	 generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
 	 break;
 
+      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+	 generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+	 break;
+
       case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
 	 generate_varying_pull_constant_load(inst, dst, src[0]);
 	 break;
@@ -1151,6 +1222,10 @@ fs_generator::generate_code(exec_list *instructions)
          brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME);
          break;
 
+      case FS_OPCODE_SET_GLOBAL_OFFSET:
+         generate_set_global_offset(inst, dst, src[0], src[1]);
+         break;
+
       default:
 	 if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
 	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index ccf905e..6a39f98 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -581,12 +581,32 @@ fs_visitor::visit(ir_expression *ir)
       if (const_offset) {
          fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
          packed_consts.type = result.type;
-         fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                                      packed_consts,
-                                      surf_index,
-                                      fs_reg(const_offset->value.u[0])));
-         pull->base_mrf = 14;
-         pull->mlen = 1;
+
+         if (intel->gen >= 7) {
+            fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16);
+            fs_reg payload = fs_reg(this, glsl_type::uint_type);
+            struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
+                                       BRW_REGISTER_TYPE_UD);
+            fs_inst *setup = emit(MOV(payload, fs_reg(g0)));
+            setup->force_writemask_all = true;
+            /* We don't need the second half of this vgrf to be filled with g1
+             * in the 16-wide case, but if we use force_uncompressed then live
+             * variable analysis won't consider this a def!
+             */
+
+            emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload,
+                 payload, const_offset_reg);
+            emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts,
+                 surf_index, payload);
+         } else {
+            fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]);
+            fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                                         packed_consts,
+                                         surf_index,
+                                         const_offset_reg));
+            pull->base_mrf = 14;
+            pull->mlen = 1;
+         }
 
          packed_consts.smear = const_offset->value.u[0] % 16 / 4;
          for (int i = 0; i < ir->type->vector_elements; i++) {




More information about the mesa-commit mailing list