[Mesa-dev] [PATCH 06/12] i965: Add optimization pass to let us use the replicate data message

Kristian Høgsberg krh at bitplanet.net
Mon Aug 11 17:29:36 PDT 2014


The data port has a SIMD16 'replicate data' message, which lets us write
the same color for all 16 pixels by sending the four floats in the
lower half of a register instead of sending 4 times 16 identical
component values in 8 registers.

The message comes with a lot of restrictions and could be made generally
useful by recognizing when those restriction are satisfied.  For now,
this lets us enable the optimization when we know it's safe, but we don't
enable it by default.  The optimization works for simple color clear shaders
only, but does recognized and support multiple render targets.

Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.h         |  1 +
 src/mesa/drivers/dri/i965/brw_defines.h         |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp            | 56 +++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h              |  4 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp  |  5 ++-
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp |  5 ++-
 6 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7de9b64..6ab7713 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1033,6 +1033,7 @@ struct brw_context
    bool has_negative_rhw_bug;
    bool has_pln;
    bool no_simd8;
+   bool use_rep_send;
 
    /**
     * Some versions of Gen hardware don't do centroid interpolation correctly
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a519629..194d35f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -850,6 +850,7 @@ enum opcode {
     */
    FS_OPCODE_FB_WRITE = 128,
    FS_OPCODE_BLORP_FB_WRITE,
+   FS_OPCODE_REP_FB_WRITE,
    SHADER_OPCODE_RCP,
    SHADER_OPCODE_RSQ,
    SHADER_OPCODE_SQRT,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 061c32d..640e222 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2215,6 +2215,59 @@ fs_visitor::compute_to_mrf()
    return progress;
 }
 
+bool
+fs_visitor::try_rep_send()
+{
+   int i, count, step = dispatch_width / 8;
+   fs_inst *start = NULL;
+
+   count = 0;
+   foreach_in_list_safe(fs_inst, inst, &this->instructions) {
+      if (count == 0)
+         start = inst;
+      if (inst->opcode == BRW_OPCODE_MOV &&
+	  inst->dst.file == MRF &&
+          inst->dst.reg == start->dst.reg + step * count &&
+          inst->src[0].file == HW_REG &&
+          inst->src[0].reg_offset == start->src[0].reg_offset + count) {
+         if (count == 0)
+            start = inst;
+         count++;
+      }
+
+      if (inst->opcode == FS_OPCODE_FB_WRITE &&
+          count == 4 &&
+          (inst->base_mrf == start->dst.reg ||
+           (inst->base_mrf + 2 == start->dst.reg && inst->header_present))) {
+         fs_inst *mov = MOV(start->dst, start->src[0]);
+
+         mov->dst.fixed_hw_reg =
+            brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE,
+                         start->dst.reg, 0);
+         mov->dst.file = HW_REG;
+         mov->dst.type = mov->dst.fixed_hw_reg.type;
+
+         mov->src[0].fixed_hw_reg =
+            brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+         mov->src[0].file = HW_REG;
+         mov->src[0].type = mov->src[0].fixed_hw_reg.type;
+         mov->force_writemask_all = true;
+         mov->dst.type = BRW_REGISTER_TYPE_F;
+
+         start->insert_before(mov);
+
+         for (i = 0; i < 4; i++)
+            mov->next->remove();
+
+         inst->opcode = FS_OPCODE_REP_FB_WRITE;
+         inst->mlen -= 4 * step - 1;
+         count = 0;
+      }
+   }
+
+   return true;
+}
+
 /**
  * Walks through basic blocks, looking for repeated MRF writes and
  * removing the later ones.
@@ -3157,6 +3210,9 @@ fs_visitor::run()
       prog_data->total_scratch = brw_get_scratch_size(last_scratch);
    }
 
+   if (brw->use_rep_send)
+      try_rep_send();
+
    if (dispatch_width == 8)
       prog_data->reg_blocks = brw_register_blocks(grf_used);
    else
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index dfb13ea..884a963 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -355,6 +355,8 @@ public:
    void lower_uniform_pull_constant_loads();
    bool lower_load_payload();
 
+   bool try_rep_send();
+
    void push_force_uncompressed();
    void pop_force_uncompressed();
 
@@ -591,6 +593,7 @@ private:
                       GLuint nr);
    void generate_fb_write(fs_inst *inst);
    void generate_blorp_fb_write(fs_inst *inst);
+   void generate_rep_fb_write(fs_inst *inst);
    void generate_pixel_xy(struct brw_reg dst, bool is_x);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
 			 struct brw_reg *src);
@@ -715,6 +718,7 @@ public:
 private:
    void generate_code(exec_list *instructions);
    void generate_fb_write(fs_inst *inst);
+   void generate_rep_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
                          struct brw_reg *src);
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index cec2e82..ca413c9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -115,7 +115,9 @@ fs_generator::fire_fb_write(fs_inst *inst,
       brw_pop_insn_state(p);
    }
 
-   if (this->dual_source_output)
+   if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
+   else if (this->dual_source_output)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
    else if (dispatch_width == 16)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
@@ -1692,6 +1694,7 @@ fs_generator::generate_code(exec_list *instructions)
 	 generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
 	 break;
 
+      case FS_OPCODE_REP_FB_WRITE:
       case FS_OPCODE_FB_WRITE:
 	 generate_fb_write(inst);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
index f4aae67..1e7ab1e 100644
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -107,7 +107,9 @@ gen8_fs_generator::generate_fb_write(fs_inst *ir)
     */
    uint32_t msg_type;
    /* Set the Message Type */
-   if (this->dual_source_output)
+   if (ir->opcode == FS_OPCODE_REP_FB_WRITE)
+      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
+   else if (this->dual_source_output)
       msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
    else if (dispatch_width == 16)
       msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
@@ -1177,6 +1179,7 @@ gen8_fs_generator::generate_code(exec_list *instructions)
          generate_varying_pull_constant_load(ir, dst, src[0], src[1]);
          break;
 
+      case FS_OPCODE_REP_FB_WRITE:
       case FS_OPCODE_FB_WRITE:
          generate_fb_write(ir);
          break;
-- 
2.0.0



More information about the mesa-dev mailing list