[Mesa-dev] [PATCH 12/22] i965/vec4: split DF instructions and later duplicate its execsize in IVB/VLV

Samuel Iglesias Gonsálvez siglesias at igalia.com
Thu Jan 5 13:07:32 UTC 2017


We need to split DF instructions in two on IVB/VLV as it needs an
execsize 8 to process 4 DF values (one GRF in total).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/mesa/drivers/dri/i965/brw_ir_vec4.h          |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp           | 20 +++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 +++++++++++++--
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 57fc6be..9291be0 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -291,6 +291,7 @@ public:
    bool can_do_writemask(const struct gen_device_info *devinfo);
    bool can_change_types() const;
    bool has_source_and_destination_hazard() const;
+   unsigned exec_data_size() const;
 
    bool is_align1_partial_write()
    {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b5e846d..6ba85d7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -301,6 +301,19 @@ vec4_instruction::can_change_types() const
             !src[1].abs && !src[1].negate));
 }
 
+unsigned
+vec4_instruction::exec_data_size() const
+{
+  unsigned exec_data_size = 0;
+
+  for (int i = 0; i < 3; i++) {
+    if (this->src[i].type != BAD_FILE)
+      exec_data_size = MAX2(exec_data_size, type_sz(this->src[i].type));
+  }
+
+  return exec_data_size;
+}
+
 /**
  * Returns how many MRFs an opcode will write over.
  *
@@ -2087,6 +2100,10 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
       if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8)
          lowered_width = MIN2(lowered_width, 4);
 
+      if (devinfo->gen == 7 && !devinfo->is_haswell &&
+          (inst->exec_data_size() == 8 || type_sz(inst->dst.type) == 8))
+         lowered_width = MIN2(lowered_width, 4);
+
       /* HSW PRM, 3D Media GPGPU Engine, Region Alignment Rules for Direct
        * Register Addressing:
        *
@@ -2194,7 +2211,8 @@ vec4_visitor::lower_simd_width()
                inst->insert_before(block, copy);
             }
          } else {
-            dst = horiz_offset(inst->dst, channel_offset);
+            if (inst->dst.file != ARF)
+               dst = horiz_offset(inst->dst, channel_offset);
          }
          linst->dst = dst;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 3d688cf..0eaa91b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1511,14 +1511,25 @@ generate_code(struct brw_codegen *p,
       brw_set_default_saturate(p, inst->saturate);
       brw_set_default_mask_control(p, inst->force_writemask_all);
       brw_set_default_acc_write_control(p, inst->writes_accumulator);
-      brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
 
-      assert(inst->group % inst->exec_size == 0);
+      bool is_ivb_df = devinfo->gen == 7 &&
+         !devinfo->is_haswell &&
+         (inst->exec_data_size() == 8 ||
+          inst->dst.type == BRW_REGISTER_TYPE_DF);
+
+      assert(inst->group % inst->exec_size == 0 ||
+             is_ivb_df);
+
       assert(inst->group % 8 == 0 ||
              inst->dst.type == BRW_REGISTER_TYPE_DF ||
              inst->src[0].type == BRW_REGISTER_TYPE_DF ||
              inst->src[1].type == BRW_REGISTER_TYPE_DF ||
              inst->src[2].type == BRW_REGISTER_TYPE_DF);
+
+      if (is_ivb_df && inst->exec_size < 8)
+         inst->exec_size *= 2;
+      brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
+
       if (!inst->force_writemask_all)
          brw_set_default_group(p, inst->group);
 
-- 
2.9.3



More information about the mesa-dev mailing list