[Mesa-dev] [PATCH v4.1 17.5/28] i965/vec4: split VEC4_OPCODE_FROM_DOUBLE into one opcode per destination's type

Fri Mar 24 11:15:18 UTC 2017

This way we can set the destination type as double to all these new opcodes,
avoiding any optimizer's confusion that was happening before.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---

This patch is going to be placed just after patch 17.
Please discard patch 19.

 src/intel/compiler/brw_eu_defines.h              |  4 +++-
 src/intel/compiler/brw_shader.cpp                |  8 ++++++--
 src/intel/compiler/brw_vec4.cpp                  | 12 +++++++++---
 src/intel/compiler/brw_vec4_copy_propagation.cpp |  4 +++-
 src/intel/compiler/brw_vec4_generator.cpp        | 23 +++++++++++++++++++++--
 src/intel/compiler/brw_vec4_nir.cpp              | 24 ++++++++++++++++++------
 src/intel/compiler/brw_vec4_reg_allocate.cpp     |  4 +++-
 7 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index f0b0d5c2a06..13a70f6f6a1 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -447,7 +447,9 @@ enum opcode {
    VEC4_OPCODE_MOV_BYTES,
    VEC4_OPCODE_PACK_BYTES,
    VEC4_OPCODE_UNPACK_UNIFORM,
-   VEC4_OPCODE_FROM_DOUBLE,
+   VEC4_OPCODE_DOUBLE_TO_F32,
+   VEC4_OPCODE_DOUBLE_TO_D32,
+   VEC4_OPCODE_DOUBLE_TO_U32,
    VEC4_OPCODE_TO_DOUBLE,
    VEC4_OPCODE_PICK_LOW_32BIT,
    VEC4_OPCODE_PICK_HIGH_32BIT,
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index bfaa5e7bfe2..4d9d3990a63 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -317,8 +317,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
       return "pack_bytes";
    case VEC4_OPCODE_UNPACK_UNIFORM:
       return "unpack_uniform";
-   case VEC4_OPCODE_FROM_DOUBLE:
-      return "double_to_single";
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+      return "double_to_f32";
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+      return "double_to_d32";
+   case VEC4_OPCODE_DOUBLE_TO_U32:
+      return "double_to_u32";
    case VEC4_OPCODE_TO_DOUBLE:
       return "single_to_double";
    case VEC4_OPCODE_PICK_LOW_32BIT:
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index b26f8035811..36855a8cff6 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -260,7 +260,9 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
 {
    switch (opcode) {
    case SHADER_OPCODE_GEN4_SCRATCH_READ:
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
    case VEC4_OPCODE_TO_DOUBLE:
    case VEC4_OPCODE_PICK_LOW_32BIT:
    case VEC4_OPCODE_PICK_HIGH_32BIT:
@@ -521,7 +523,9 @@ vec4_visitor::opt_reduce_swizzle()
          break;
 
       case VEC4_OPCODE_TO_DOUBLE:
-      case VEC4_OPCODE_FROM_DOUBLE:
+      case VEC4_OPCODE_DOUBLE_TO_F32:
+      case VEC4_OPCODE_DOUBLE_TO_D32:
+      case VEC4_OPCODE_DOUBLE_TO_U32:
       case VEC4_OPCODE_PICK_LOW_32BIT:
       case VEC4_OPCODE_PICK_HIGH_32BIT:
       case VEC4_OPCODE_SET_LOW_32BIT:
@@ -2255,7 +2259,9 @@ static bool
 is_align1_df(vec4_instruction *inst)
 {
    switch (inst->opcode) {
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
    case VEC4_OPCODE_TO_DOUBLE:
    case VEC4_OPCODE_PICK_LOW_32BIT:
    case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp
index e7f6f93f8bd..c1ae32a2936 100644
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp
@@ -293,7 +293,9 @@ static bool
 is_align1_opcode(unsigned opcode)
 {
    switch (opcode) {
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
    case VEC4_OPCODE_TO_DOUBLE:
    case VEC4_OPCODE_PICK_LOW_32BIT:
    case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index d0fd694901f..26de5c12e8e 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1944,9 +1944,28 @@ generate_code(struct brw_codegen *p,
          break;
       }
 
-      case VEC4_OPCODE_FROM_DOUBLE: {
+      case VEC4_OPCODE_DOUBLE_TO_F32:
+      case VEC4_OPCODE_DOUBLE_TO_D32:
+      case VEC4_OPCODE_DOUBLE_TO_U32: {
          assert(type_sz(src[0].type) == 8);
-         assert(type_sz(dst.type) == 4);
+         assert(type_sz(dst.type) == 8);
+
+         brw_reg_type dst_type;
+
+         switch (inst->opcode) {
+         case VEC4_OPCODE_DOUBLE_TO_F32:
+            dst_type = BRW_REGISTER_TYPE_F;
+            break;
+         case VEC4_OPCODE_DOUBLE_TO_D32:
+            dst_type = BRW_REGISTER_TYPE_D;
+            break;
+         case VEC4_OPCODE_DOUBLE_TO_U32:
+            dst_type = BRW_REGISTER_TYPE_UD;
+            break;
+         default:
+            unreachable("Not supported conversion");
+         }
+         dst = retype(dst, dst_type);
 
          brw_set_default_access_mode(p, BRW_ALIGN_1);
 
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 45d2c9f4a93..a4257e45b60 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1183,16 +1183,28 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src,
       return;
    }
 
+   enum opcode op;
+   switch (dst.type) {
+   case BRW_REGISTER_TYPE_D:
+      op = VEC4_OPCODE_DOUBLE_TO_D32;
+      break;
+   case BRW_REGISTER_TYPE_UD:
+      op = VEC4_OPCODE_DOUBLE_TO_U32;
+      break;
+   case BRW_REGISTER_TYPE_F:
+      op = VEC4_OPCODE_DOUBLE_TO_F32;
+      break;
+   default:
+      unreachable("Unknown conversion");
+   }
+
    dst_reg temp = dst_reg(this, glsl_type::dvec4_type);
    emit(MOV(temp, src));
-
    dst_reg temp2 = dst_reg(this, glsl_type::dvec4_type);
-   temp2 = retype(temp2, dst.type);
-   emit(VEC4_OPCODE_FROM_DOUBLE, temp2, src_reg(temp))
-      ->size_written = 2 * REG_SIZE;
+   emit(op, temp2, src_reg(temp));
 
-   emit(VEC4_OPCODE_PICK_LOW_32BIT, temp2, src_reg(retype(temp2, BRW_REGISTER_TYPE_DF)));
-   vec4_instruction *inst = emit(MOV(dst, src_reg(temp2)));
+   emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2));
+   vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type))));
    inst->saturate = saturate;
 }
 
diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp
index e3b46cc2f7f..f46aca8f49d 100644
--- a/src/intel/compiler/brw_vec4_reg_allocate.cpp
+++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp
@@ -456,7 +456,9 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
           * dst we see a 32-bit destination and emit a scratch write that
           * allocates a single spill register.
           */
-         if (inst->opcode == VEC4_OPCODE_FROM_DOUBLE)
+         if (inst->opcode == VEC4_OPCODE_DOUBLE_TO_F32 ||
+             inst->opcode == VEC4_OPCODE_DOUBLE_TO_D32 ||
+             inst->opcode == VEC4_OPCODE_DOUBLE_TO_U32)
             no_spill[inst->dst.nr] = true;
 
          /* We can't spill registers that mix 32-bit and 64-bit access (that
-- 
2.11.0