[Mesa-dev] [PATCH v4 07/28] i965/fs: generalize the legalization d2x pass

Mon Mar 20 09:17:04 UTC 2017

Generalize it to lower any unsupported narrower conversion.

v2 (Curro):
- Add supports_type_conversion()
- Reuse existing intruction instead of cloning it.
- Generalize d2x to narrower and equal size conversions.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/intel/compiler/brw_fs.cpp           | 11 ++--
 src/intel/compiler/brw_fs_lower_d2x.cpp | 97 ++++++++++++++++++++++++---------
 2 files changed, 77 insertions(+), 31 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 6da23b17107..8612a83805d 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5694,11 +5694,6 @@ fs_visitor::optimize()
       OPT(dead_code_eliminate);
    }
 
-   if (OPT(lower_d2x)) {
-      OPT(opt_copy_propagation);
-      OPT(dead_code_eliminate);
-   }
-
    OPT(lower_simd_width);
 
    /* After SIMD lowering just in case we had to unroll the EOT send. */
@@ -5745,6 +5740,12 @@ fs_visitor::optimize()
       OPT(dead_code_eliminate);
    }
 
+   if (OPT(lower_d2x)) {
+      OPT(opt_copy_propagation);
+      OPT(dead_code_eliminate);
+      OPT(lower_simd_width);
+   }
+
    lower_uniform_pull_constant_loads();
 
    validate();
diff --git a/src/intel/compiler/brw_fs_lower_d2x.cpp b/src/intel/compiler/brw_fs_lower_d2x.cpp
index a2db1154615..fa25d313dcd 100644
--- a/src/intel/compiler/brw_fs_lower_d2x.cpp
+++ b/src/intel/compiler/brw_fs_lower_d2x.cpp
@@ -27,47 +27,92 @@
 
 using namespace brw;
 
+static bool
+supports_type_conversion(fs_inst *inst) {
+   switch(inst->opcode) {
+   case BRW_OPCODE_MOV:
+   case SHADER_OPCODE_MOV_INDIRECT:
+      return true;
+   case BRW_OPCODE_SEL:
+      return false;
+   default:
+      /* FIXME: We assume the opcodes don't explicitly mentioned
+       * before just work fine with arbitrary conversions.
+       */
+      return true;
+   }
+}
+
 bool
 fs_visitor::lower_d2x()
 {
    bool progress = false;
 
    foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
-      if (inst->opcode != BRW_OPCODE_MOV)
-         continue;
+      bool inst_support_conversion = supports_type_conversion(inst);
+      bool supported_conversion =
+         inst_support_conversion &&
+         (get_exec_type_size(inst) != 8 ||
+          type_sz(inst->dst.type) > 4 ||
+          type_sz(inst->dst.type) >= get_exec_type_size(inst));
 
-      if (inst->dst.type != BRW_REGISTER_TYPE_F &&
-          inst->dst.type != BRW_REGISTER_TYPE_D &&
-          inst->dst.type != BRW_REGISTER_TYPE_UD)
+      /* If the conversion is supported or there is no conversion then
+       * do nothing.
+       */
+      if (supported_conversion ||
+          (!inst_support_conversion && inst->dst.type == inst->src[0].type) ||
+          inst->dst.file == BAD_FILE || inst->src[0].file == BAD_FILE)
          continue;
 
-      if (inst->src[0].type != BRW_REGISTER_TYPE_DF &&
-          inst->src[0].type != BRW_REGISTER_TYPE_UQ &&
-          inst->src[0].type != BRW_REGISTER_TYPE_Q)
-         continue;
+      /* This pass only supports conversion to narrower or equal size types. */
+      if (get_exec_type_size(inst) < type_sz(inst->dst.type))
+          continue;
 
-      assert(inst->dst.file == VGRF);
       assert(inst->saturate == false);
-      fs_reg dst = inst->dst;
 
       const fs_builder ibld(this, block, inst);
+      fs_reg dst = inst->dst;
 
-      /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
-       * Single Precision Float":
-       *
-       *    The upper Dword of every Qword will be written with undefined
-       *    value when converting DF to F.
-       *
-       * So we need to allocate a temporary that's two registers, and then do
-       * a strided MOV to get the lower DWord of every Qword that has the
-       * result.
-       */
-      fs_reg temp = ibld.vgrf(inst->src[0].type, 1);
-      fs_reg strided_temp = subscript(temp, inst->dst.type, 0);
-      ibld.MOV(strided_temp, inst->src[0]);
-      ibld.MOV(dst, strided_temp);
+      if (inst_support_conversion && !supported_conversion) {
+         /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
+          * Single Precision Float":
+          *
+          *    The upper Dword of every Qword will be written with undefined
+          *    value when converting DF to F.
+          *
+          * So we need to allocate a temporary that's two registers, and then do
+          * a strided MOV to get the lower DWord of every Qword that has the
+          * result.
+          */
+         fs_reg temp = ibld.vgrf(inst->src[0].type, 1);
+         fs_reg strided_temp = subscript(temp, dst.type, 0);
+
+         /* We clone the original instruction as we are going to modify it
+          * and emit another one after it.
+          */
+         inst->dst = strided_temp;
+         /* As it is an strided destination, we write n-times more being n the
+          * size difference between source and destination types. Update
+          * size_written with the new destination.
+          */
+         inst->size_written = inst->dst.component_size(inst->exec_size);
+         ibld.at(block, inst->next).MOV(dst, strided_temp);
+      } else {
+         fs_reg temp0 = ibld.vgrf(inst->src[0].type, 1);
+         fs_reg temp1 = ibld.vgrf(inst->src[0].type, 1);
+         fs_reg strided_temp1 = subscript(temp1, dst.type, 0);
+
+         inst->dst = temp0;
+         /* As it is an strided destination, we write n-times more being n the
+          * size difference between source and destination types. Update
+          * size_written with the new destination.
+          */
+         inst->size_written = inst->dst.component_size(inst->exec_size);
 
-      inst->remove(block);
+         /* Now, do the conversion to original destination's type. */
+         fs_inst *mov = ibld.at(block, inst->next).MOV(strided_temp1, temp0);
+         ibld.at(block, mov->next).MOV(dst, strided_temp1);
+      }
       progress = true;
    }
 
-- 
2.11.0