[Mesa-dev] [PATCH v4 14/28] i965/vec4: split DF instructions and later double its execsize in IVB/BYT

Samuel Iglesias Gonsálvez siglesias at igalia.com
Mon Mar 20 09:17:11 UTC 2017


We need to split DF instructions in two on IVB/BYT as it needs an
execsize 8 to process 4 DF values (one GRF in total).

v2:
- Rename helper and make it static inline function (Matt).
- Fix indention and add braces (Matt).

v3:
- Don't edit IR instruction when doubling exec_size (Curro)
- Add comment into the code (Curro).
- Manage ARF registers like the others (Curro)

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/intel/compiler/brw_ir_vec4.h          | 14 ++++++++++++++
 src/intel/compiler/brw_vec4.cpp           | 10 ++++++++++
 src/intel/compiler/brw_vec4_generator.cpp | 17 +++++++++++++++--
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_ir_vec4.h b/src/intel/compiler/brw_ir_vec4.h
index bd026eb2aeb..6130950c72f 100644
--- a/src/intel/compiler/brw_ir_vec4.h
+++ b/src/intel/compiler/brw_ir_vec4.h
@@ -404,6 +404,20 @@ regs_read(const vec4_instruction *inst, unsigned i)
                        reg_size);
 }
 
+static inline unsigned
+get_exec_type_size(const vec4_instruction *inst)
+{
+   unsigned exec_type_size = 0;
+
+   for (int i = 0; i < 3; i++) {
+      if (inst->src[i].type != BAD_FILE) {
+         exec_type_size = MAX2(exec_type_size, type_sz(inst->src[i].type));
+      }
+   }
+
+   return exec_type_size;
+}
+
 } /* namespace brw */
 
 #endif
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index d7c09093032..a213bbb4707 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -2093,6 +2093,16 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
       if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8)
          lowered_width = MIN2(lowered_width, 4);
 
+      /* IvyBridge can manage a maximum of 4 DFs per SIMD4x2 instruction,
+       * as we need to duplicate the instruction's exec_size at generation time
+       * to work with doubles. No matter if it has force_writemask_all enabled
+       * or disabled (the latter is affected by compressed instruction bug in
+       * gen7, which is another reason to force this limit).
+       */
+      if (devinfo->gen == 7 && !devinfo->is_haswell &&
+          (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8))
+         lowered_width = MIN2(lowered_width, 4);
+
       /* HSW PRM, 3D Media GPGPU Engine, Region Alignment Rules for Direct
        * Register Addressing:
        *
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index d3192ab7db3..0d70690b057 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1522,14 +1522,27 @@ generate_code(struct brw_codegen *p,
       brw_set_default_saturate(p, inst->saturate);
       brw_set_default_mask_control(p, inst->force_writemask_all);
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
-      brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
 
-      assert(inst->group % inst->exec_size == 0);
+      bool is_ivb_df = devinfo->gen == 7 &&
+         !devinfo->is_haswell &&
+         (get_exec_type_size(inst) == 8 ||
+          inst->dst.type == BRW_REGISTER_TYPE_DF);
+
+      assert(inst->group % inst->exec_size == 0 ||
+             is_ivb_df);
+
       assert(inst->group % 8 == 0 ||
              inst->dst.type == BRW_REGISTER_TYPE_DF ||
              inst->src[0].type == BRW_REGISTER_TYPE_DF ||
              inst->src[1].type == BRW_REGISTER_TYPE_DF ||
              inst->src[2].type == BRW_REGISTER_TYPE_DF);
+
+      unsigned exec_size = inst->exec_size;
+      if (is_ivb_df && inst->exec_size < 8)
+         exec_size *= 2;
+
+      brw_set_default_exec_size(p, cvt(exec_size) - 1);
+
       if (!inst->force_writemask_all)
          brw_set_default_group(p, inst->group);
 
-- 
2.11.0



More information about the mesa-dev mailing list