[Mesa-dev] [PATCH 02/10] i965: Replace guess_execution_size with something simpler.

Matt Turner mattst88 at gmail.com
Tue Apr 14 16:15:39 PDT 2015


guess_execution_size() does two things:

   1. Cope with small destination registers.
   2. Cope with SIMD8 vs SIMD16 mode.

This patch replaces the first with a simple if block in brw_set_dest: if
the destination register width is less than 8, you probably want the
execution size to match.  (I didn't put this in the 3src block because
it doesn't seem to matter.)

Since only the FS compiler cares about SIMD16 mode, it's easy to just
set the default execution size there.

This pattern was already been proven in the Gen8+ generator, but we
didn't port it back to the existing generator when we combined the two.

This is based on a patch from Ken from about a year ago. I've rebased it
and and fixed a few bugs.
---
 src/mesa/drivers/dri/i965/brw_eu.c             |  7 ++++++
 src/mesa/drivers/dri/i965/brw_eu.h             |  1 +
 src/mesa/drivers/dri/i965/brw_eu_emit.c        | 32 ++++++++------------------
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 26 +++++++++++++++++----
 4 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 146202b..c21d14d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -110,6 +110,12 @@ brw_swap_cmod(uint32_t cmod)
    }
 }
 
+void
+brw_set_default_exec_size(struct brw_compile *p, unsigned value)
+{
+   brw_inst_set_exec_size(p->brw, p->current, value);
+}
+
 void brw_set_default_predicate_control( struct brw_compile *p, unsigned pc )
 {
    brw_inst_set_pred_control(p->brw, p->current, pc);
@@ -228,6 +234,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)
 
    /* Some defaults?
     */
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
    brw_set_default_saturate(p, 0);
    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index f8fd155..31c1492 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -99,6 +99,7 @@ struct brw_compile {
 
 void brw_pop_insn_state( struct brw_compile *p );
 void brw_push_insn_state( struct brw_compile *p );
+void brw_set_default_exec_size(struct brw_compile *p, unsigned value);
 void brw_set_default_mask_control( struct brw_compile *p, unsigned value );
 void brw_set_default_saturate( struct brw_compile *p, bool enable );
 void brw_set_default_access_mode( struct brw_compile *p, unsigned access_mode );
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 1fe9e7b..706b66b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -36,25 +36,6 @@
 
 #include "util/ralloc.h"
 
-/***********************************************************************
- * Internal helper for constructing instructions
- */
-
-static void guess_execution_size(struct brw_compile *p,
-				 brw_inst *insn,
-				 struct brw_reg reg)
-{
-   const struct brw_context *brw = p->brw;
-
-   if (reg.width == BRW_WIDTH_8 && p->compressed) {
-      brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_16);
-   } else {
-      /* Register width definitions are compatible with BRW_EXECUTE_* enums. */
-      brw_inst_set_exec_size(brw, insn, reg.width);
-   }
-}
-
-
 /**
  * Prior to Sandybridge, the SEND instruction accepted non-MRF source
  * registers, implicitly moving the operand to a message register.
@@ -76,6 +57,7 @@ gen6_resolve_implied_move(struct brw_compile *p,
 
    if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
       brw_push_insn_state(p);
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
@@ -215,10 +197,12 @@ brw_set_dest(struct brw_compile *p, brw_inst *inst, struct brw_reg dest)
       }
    }
 
-   /* NEW: Set the execution size based on dest.width and
-    * inst->compression_control:
+   /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
+    * or 16 (SIMD16), as that's normally correct.  However, when dealing with
+    * small registers, we automatically reduce it to match the register size.
     */
-   guess_execution_size(p, inst, dest);
+   if (dest.width < BRW_EXECUTE_8)
+      brw_inst_set_exec_size(brw, inst, dest.width);
 }
 
 extern int reg_type_size[];
@@ -874,7 +858,6 @@ brw_alu3(struct brw_compile *p, unsigned opcode, struct brw_reg dest,
    brw_inst_set_3src_dst_reg_nr(brw, inst, dest.nr);
    brw_inst_set_3src_dst_subreg_nr(brw, inst, dest.subnr / 16);
    brw_inst_set_3src_dst_writemask(brw, inst, dest.dw1.bits.writemask);
-   guess_execution_size(p, inst, dest);
 
    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
@@ -2015,6 +1998,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
     */
    {
       brw_push_insn_state(p);
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
 
@@ -2135,6 +2119,7 @@ brw_oword_block_read_scratch(struct brw_compile *p,
 
    {
       brw_push_insn_state(p);
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
@@ -2228,6 +2213,7 @@ void brw_oword_block_read(struct brw_compile *p,
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
    brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 78925d7..a62ba03 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -207,6 +207,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
 
    if (brw->gen < 6) {
       brw_push_insn_state(p);
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
@@ -287,11 +288,13 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
       }
 
       if (brw->gen >= 6) {
+         brw_push_insn_state(p);
+         brw_set_default_exec_size(p, BRW_EXECUTE_16);
 	 brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 	 brw_MOV(p,
 		 retype(payload, BRW_REGISTER_TYPE_UD),
 		 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-	 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+         brw_pop_insn_state(p);
 
          if (inst->target > 0 && key->replicate_alpha) {
             /* Set "Source0 Alpha Present to RenderTarget" bit in message
@@ -448,12 +451,14 @@ fs_generator::generate_math_gen6(fs_inst *inst,
    if (dispatch_width == 8) {
       gen6_math(p, dst, op, src0, src1);
    } else if (dispatch_width == 16) {
+      brw_push_insn_state(p);
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       gen6_math(p, firsthalf(dst), op, firsthalf(src0), firsthalf(src1));
       brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
       gen6_math(p, sechalf(dst), op, sechalf(src0),
                 binop ? sechalf(src1) : brw_null_reg());
-      brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+      brw_pop_insn_state(p);
    }
 }
 
@@ -472,6 +477,7 @@ fs_generator::generate_math_gen4(fs_inst *inst,
                 inst->base_mrf, src,
                 BRW_MATH_PRECISION_FULL);
    } else if (dispatch_width == 16) {
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       gen4_math(p, firsthalf(dst),
 	        op,
@@ -712,6 +718,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
          }
 
          brw_push_insn_state(p);
+         brw_set_default_exec_size(p, BRW_EXECUTE_8);
          brw_set_default_mask_control(p, BRW_MASK_DISABLE);
          brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
          /* Explicitly set up the message header by copying g0 to the MRF. */
@@ -1322,6 +1329,7 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
    assert(value.file == BRW_IMMEDIATE_VALUE);
 
    brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
    brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
@@ -1347,8 +1355,6 @@ fs_generator::generate_set_omask(fs_inst *inst,
    assert(stride_8_8_1 || stride_0_1_0);
    assert(dst.type == BRW_REGISTER_TYPE_UW);
 
-   if (dispatch_width == 16)
-      dst = vec16(dst);
    brw_push_insn_state(p);
    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -1376,6 +1382,7 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
           src0.type == BRW_REGISTER_TYPE_UD);
 
    brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
    struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW);
@@ -1596,6 +1603,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       brw_set_default_mask_control(p, inst->force_writemask_all);
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
 
+      if (dispatch_width == 16 && !inst->force_uncompressed)
+         brw_set_default_exec_size(p, BRW_EXECUTE_16);
+      else
+         brw_set_default_exec_size(p, BRW_EXECUTE_8);
+
       switch (inst->exec_size) {
       case 1:
       case 2:
@@ -1642,6 +1654,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          assert(brw->gen >= 6);
 	 brw_set_default_access_mode(p, BRW_ALIGN_16);
          if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) {
+            brw_set_default_exec_size(p, BRW_EXECUTE_8);
 	    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
             brw_inst *f = brw_MAD(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
 	    brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
@@ -1663,6 +1676,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          assert(brw->gen >= 6);
 	 brw_set_default_access_mode(p, BRW_ALIGN_16);
          if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) {
+            brw_set_default_exec_size(p, BRW_EXECUTE_8);
 	    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
             brw_inst *f = brw_LRP(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
 	    brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
@@ -1736,6 +1750,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           */
          if (dispatch_width == 16 && brw->gen == 7 && !brw->is_haswell) {
             if (dst.file == BRW_GENERAL_REGISTER_FILE) {
+               brw_set_default_exec_size(p, BRW_EXECUTE_8);
                brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
                brw_CMP(p, firsthalf(dst), inst->conditional_mod,
                           firsthalf(src[0]), firsthalf(src[1]));
@@ -1800,6 +1815,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          assert(brw->gen >= 7);
          brw_set_default_access_mode(p, BRW_ALIGN_16);
          if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) {
+            brw_set_default_exec_size(p, BRW_EXECUTE_8);
             brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
             brw_BFE(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
             brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
@@ -1819,6 +1835,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           *    "Force BFI instructions to be executed always in SIMD8."
           */
          if (dispatch_width == 16 && brw->is_haswell) {
+            brw_set_default_exec_size(p, BRW_EXECUTE_8);
             brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
             brw_BFI1(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]));
             brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
@@ -1841,6 +1858,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           */
          if (dispatch_width == 16 &&
              (brw->is_haswell || !brw_supports_simd16_3src(brw))) {
+            brw_set_default_exec_size(p, BRW_EXECUTE_8);
             brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
             brw_BFI2(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2]));
             brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
-- 
2.0.5



More information about the mesa-dev mailing list