[Mesa-dev] [PATCH 2/2] i965/vs: Use GRFs for pull constant offsets on gen7.

Eric Anholt eric at anholt.net
Fri Apr 5 11:11:29 PDT 2013


This allows the computation of the offset to get written directly into the
message source.

shader-db results:
total instructions in shared programs: 3308390 -> 3283025 (-0.77%)
instructions in affected programs:     442998 -> 417633 (-5.73%)

No difference in GLB2.7 low res (n=9).
---
 src/mesa/drivers/dri/i965/brw_defines.h        |    1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp       |    2 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp         |    8 ++++-
 src/mesa/drivers/dri/i965/brw_vec4.h           |    4 +++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |   45 +++++++++++++++---------
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   18 +++++++---
 6 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 3d07c36..a13f9dc 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -739,6 +739,7 @@ enum opcode {
    VS_OPCODE_SCRATCH_READ,
    VS_OPCODE_SCRATCH_WRITE,
    VS_OPCODE_PULL_CONSTANT_LOAD,
+   VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
 };
 
 #define BRW_PREDICATE_NONE             0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1a52039..b3bd1b9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op)
       return "scratch_write";
    case VS_OPCODE_PULL_CONSTANT_LOAD:
       return "pull_constant_load";
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      return "pull_constant_load_gen7";
 
    default:
       /* Yes, this leaks.  It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index e470ac8..67dd17a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -230,7 +230,13 @@ vec4_instruction::is_math()
 bool
 vec4_instruction::is_send_from_grf()
 {
-   return opcode == SHADER_OPCODE_SHADER_TIME_ADD;
+   switch (opcode) {
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      return true;
+   default:
+      return false;
+   }
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 8f130e1..e286925 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -544,6 +544,10 @@ private:
 				    struct brw_reg dst,
 				    struct brw_reg index,
 				    struct brw_reg offset);
+   void generate_pull_constant_load_gen7(vec4_instruction *inst,
+                                         struct brw_reg dst,
+                                         struct brw_reg surf_index,
+                                         struct brw_reg offset);
 
    struct brw_context *brw;
    struct intel_context *intel;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index e378f7f..963901c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -558,27 +558,11 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
                                             struct brw_reg index,
                                             struct brw_reg offset)
 {
+   assert(intel->gen <= 7);
    assert(index.file == BRW_IMMEDIATE_VALUE &&
 	  index.type == BRW_REGISTER_TYPE_UD);
    uint32_t surf_index = index.dw1.ud;
 
-   if (intel->gen == 7) {
-      gen6_resolve_implied_move(p, &offset, inst->base_mrf);
-      brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
-      brw_set_dest(p, insn, dst);
-      brw_set_src0(p, insn, offset);
-      brw_set_sampler_message(p, insn,
-                              surf_index,
-                              0, /* LD message ignores sampler unit */
-                              GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
-                              1, /* rlen */
-                              1, /* mlen */
-                              false, /* no header */
-                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                              0);
-      return;
-   }
-
    struct brw_reg header = brw_vec8_grf(0, 0);
 
    gen6_resolve_implied_move(p, &header, inst->base_mrf);
@@ -614,6 +598,29 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 }
 
 void
+vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg surf_index,
+                                                 struct brw_reg offset)
+{
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
+	  surf_index.type == BRW_REGISTER_TYPE_UD);
+
+   brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, insn, dst);
+   brw_set_src0(p, insn, offset);
+   brw_set_sampler_message(p, insn,
+                           surf_index.dw1.ud,
+                           0, /* LD message ignores sampler unit */
+                           GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+                           1, /* rlen */
+                           1, /* mlen */
+                           false, /* no header */
+                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+                           0);
+}
+
+void
 vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
                                         struct brw_reg dst,
                                         struct brw_reg *src)
@@ -673,6 +680,10 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
       generate_pull_constant_load(inst, dst, src[0], src[1]);
       break;
 
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+      break;
+
    case SHADER_OPCODE_SHADER_TIME_ADD:
       brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8bd2fd8..246a373 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2882,10 +2882,20 @@ vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
    src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
    vec4_instruction *load;
 
-   load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
-					temp, index, offset);
-   load->base_mrf = 14;
-   load->mlen = 1;
+   if (intel->gen >= 7) {
+      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+      grf_offset.type = offset.type;
+      emit_before(inst, MOV(grf_offset, offset));
+
+      load = new(mem_ctx) vec4_instruction(this,
+                                           VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+                                           temp, index, src_reg(grf_offset));
+   } else {
+      load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
+                                           temp, index, offset);
+      load->base_mrf = 14;
+      load->mlen = 1;
+   }
    emit_before(inst, load);
 }
 
-- 
1.7.10.4



More information about the mesa-dev mailing list