Mesa (master): intel/compiler: don't use byte operands for src1 on ICL

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Jun 29 13:04:54 UTC 2019


Module: Mesa
Branch: master
Commit: 5847de6e9afe12bd29ad694a76860a0575ab4747
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5847de6e9afe12bd29ad694a76860a0575ab4747

Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date:   Wed Jun 19 05:09:35 2019 -0700

intel/compiler: don't use byte operands for src1 on ICL

The simulator complains about using byte operands, we also have
documentation telling us.

Note that add operations on bytes seems to work fine on HW (like ADD).
Using dwords operands with CMP & SEL fixes the following tests :

   dEQP-VK.spirv_assembly.type.vec*.i8.*

v2: Drop the GLK changes (Matt)
    Add validator tests (Matt)

v3: Drop GLK ref (Matt)
    Don't mix float/integer in MAD (Matt)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Rafael Antognolli <rafael.antognolli at intel.com> (v1)
Reviewed-by: Matt Turner <mattst88 at gmail.com>
BSpec: 3017
Cc: <mesa-stable at lists.freedesktop.org>

---

 src/intel/compiler/brw_eu_validate.c    |  42 ++++++++---
 src/intel/compiler/brw_fs_builder.h     |  38 +++++++---
 src/intel/compiler/brw_fs_nir.cpp       |  11 ++-
 src/intel/compiler/test_eu_validate.cpp | 121 ++++++++++++++++++++++++++++++++
 4 files changed, 192 insertions(+), 20 deletions(-)

diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index 943f724e60f..203280570aa 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -289,6 +289,18 @@ sources_not_null(const struct gen_device_info *devinfo,
    return error_msg;
 }
 
+static struct string
+alignment_supported(const struct gen_device_info *devinfo,
+                    const brw_inst *inst)
+{
+   struct string error_msg = { .str = NULL, .len = 0 };
+
+   ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
+            "Align16 not supported");
+
+   return error_msg;
+}
+
 static bool
 inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
 {
@@ -600,17 +612,31 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
    unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
    struct string error_msg = { .str = NULL, .len = 0 };
 
+   if (devinfo->gen >= 11) {
+      if (num_sources == 3) {
+         ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
+                  brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
+                  "Byte data type is not supported for src1/2 register regioning. This includes "
+                  "byte broadcast as well.");
+      }
+      if (num_sources == 2) {
+         ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
+                  "Byte data type is not supported for src1 register regioning. This includes "
+                  "byte broadcast as well.");
+      }
+   }
+
    if (num_sources == 3)
-      return (struct string){};
+      return error_msg;
 
    if (inst_is_send(devinfo, inst))
-      return (struct string){};
+      return error_msg;
 
    if (exec_size == 1)
-      return (struct string){};
+      return error_msg;
 
    if (desc->ndst == 0)
-      return (struct string){};
+      return error_msg;
 
    /* The PRMs say:
     *
@@ -635,12 +661,9 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
 
    if (dst_type_is_byte) {
       if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
-         if (!inst_is_raw_move(devinfo, inst)) {
+         if (!inst_is_raw_move(devinfo, inst))
             ERROR("Only raw MOV supports a packed-byte destination");
-            return error_msg;
-         } else {
-            return (struct string){};
-         }
+         return error_msg;
       }
    }
 
@@ -1823,6 +1846,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
       } else {
          CHECK(sources_not_null);
          CHECK(send_restrictions);
+         CHECK(alignment_supported);
          CHECK(general_restrictions_based_on_operand_types);
          CHECK(general_restrictions_on_region_parameters);
          CHECK(special_restrictions_for_mixed_float_mode);
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index 9655c2ef554..b781f1257ba 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -322,10 +322,11 @@ namespace brw {
          case SHADER_OPCODE_INT_REMAINDER:
             return emit(instruction(opcode, dispatch_width(), dst,
                                     fix_math_operand(src0),
-                                    fix_math_operand(src1)));
+                                    fix_math_operand(fix_byte_src(src1))));
 
          default:
-            return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
+            return emit(instruction(opcode, dispatch_width(), dst,
+                                    src0, fix_byte_src(src1)));
 
          }
       }
@@ -344,12 +345,12 @@ namespace brw {
          case BRW_OPCODE_LRP:
             return emit(instruction(opcode, dispatch_width(), dst,
                                     fix_3src_operand(src0),
-                                    fix_3src_operand(src1),
-                                    fix_3src_operand(src2)));
+                                    fix_3src_operand(fix_byte_src(src1)),
+                                    fix_3src_operand(fix_byte_src(src2))));
 
          default:
             return emit(instruction(opcode, dispatch_width(), dst,
-                                    src0, src1, src2));
+                                    src0, fix_byte_src(src1), fix_byte_src(src2)));
          }
       }
 
@@ -399,8 +400,11 @@ namespace brw {
       {
          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
 
-         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
-                                     fix_unsigned_negate(src1)));
+         /* In some cases we can't have bytes as operand for src1, so use the
+          * same type for both operand.
+          */
+         return set_condmod(mod, SEL(dst, fix_unsigned_negate(fix_byte_src(src0)),
+                                     fix_unsigned_negate(fix_byte_src(src1))));
       }
 
       /**
@@ -657,8 +661,8 @@ namespace brw {
                             emit(BRW_OPCODE_CSEL,
                                  retype(dst, BRW_REGISTER_TYPE_F),
                                  retype(src0, BRW_REGISTER_TYPE_F),
-                                 retype(src1, BRW_REGISTER_TYPE_F),
-                                 src2));
+                                 retype(fix_byte_src(src1), BRW_REGISTER_TYPE_F),
+                                 fix_byte_src(src2)));
       }
 
       /**
@@ -719,6 +723,22 @@ namespace brw {
 
       backend_shader *shader;
 
+      /**
+       * Byte sized operands are not supported for src1 on Gen11+.
+       */
+      src_reg
+      fix_byte_src(const src_reg &src) const
+      {
+         if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) ||
+             type_sz(src.type) != 1)
+            return src;
+
+         dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ?
+                             BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D);
+         MOV(temp, src);
+         return src_reg(temp);
+      }
+
    private:
       /**
        * Workaround for negation of UD registers.  See comment in
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index e0818caae13..b9d42b6e26e 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1340,9 +1340,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
    case nir_op_ine32: {
       fs_reg dest = result;
 
+      /* On Gen11 we have an additional issue being that src1 cannot be a byte
+       * type. So we convert both operands for the comparison.
+       */
+      fs_reg temp_op[2];
+      temp_op[0] = bld.fix_byte_src(op[0]);
+      temp_op[1] = bld.fix_byte_src(op[1]);
+
       const uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
       if (bit_size != 32)
-         dest = bld.vgrf(op[0].type, 1);
+         dest = bld.vgrf(temp_op[0].type, 1);
 
       brw_conditional_mod cond;
       switch (instr->op) {
@@ -1363,7 +1370,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
       default:
          unreachable("bad opcode");
       }
-      bld.CMP(dest, op[0], op[1], cond);
+      bld.CMP(dest, temp_op[0], temp_op[1], cond);
 
       if (bit_size > 32) {
          bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0));
diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
index 65326416064..efdae4fd79b 100644
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@@ -2372,3 +2372,124 @@ TEST_P(validation_test, qword_low_power_no_depctrl)
       clear_instructions(p);
    }
 }
+
+TEST_P(validation_test, gen11_no_byte_src_1_2)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned access_mode;
+
+      enum brw_reg_type dst_type;
+      struct {
+         enum brw_reg_type type;
+         unsigned vstride;
+         unsigned width;
+         unsigned hstride;
+      } srcs[3];
+
+      int  gen;
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, access_mode, dst_type,                             \
+             src0_type, src0_vstride, src0_width, src0_hstride,         \
+             src1_type, src1_vstride, src1_width, src1_hstride,         \
+             src2_type,                                                 \
+             gen, expected_result)                                      \
+      {                                                                 \
+         BRW_OPCODE_##opcode,                                           \
+         BRW_ALIGN_##access_mode,                                       \
+         BRW_REGISTER_TYPE_##dst_type,                                  \
+         {                                                              \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src0_type,                           \
+               BRW_VERTICAL_STRIDE_##src0_vstride,                      \
+               BRW_WIDTH_##src0_width,                                  \
+               BRW_HORIZONTAL_STRIDE_##src0_hstride,                    \
+            },                                                          \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src1_type,                           \
+               BRW_VERTICAL_STRIDE_##src1_vstride,                      \
+               BRW_WIDTH_##src1_width,                                  \
+               BRW_HORIZONTAL_STRIDE_##src1_hstride,                    \
+            },                                                          \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src2_type,                           \
+            },                                                          \
+         },                                                             \
+         gen,                                                           \
+         expected_result,                                               \
+      }
+
+      /* Passes on < 11 */
+      INST(MOV, 16,  F, B, 2, 4, 0, UD, 0, 4, 0,  D,  8, true ),
+      INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0,  D,  7, true ),
+      INST(MAD, 16,  D, B, 0, 4, 0, UB, 0, 1, 0,  B, 10, true ),
+
+      /* Fails on 11+ */
+      INST(MAD,  1, UB, W, 1, 1, 0,  D, 0, 4, 0,  B, 11, false ),
+      INST(MAD,  1, UB, W, 1, 1, 1, UB, 1, 1, 0,  W, 11, false ),
+      INST(ADD,  1,  W, W, 1, 4, 1,  B, 1, 1, 0,  D, 11, false ),
+
+      /* Passes on 11+ */
+      INST(MOV,  1,  W, B, 8, 8, 1,  D, 8, 8, 1,  D, 11, true ),
+      INST(ADD,  1, UD, B, 8, 8, 1,  W, 8, 8, 1,  D, 11, true ),
+      INST(MAD,  1,  B, B, 0, 1, 0,  D, 0, 4, 0,  W, 11, true ),
+
+#undef INST
+   };
+
+
+   for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+      /* Skip instruction not meant for this gen. */
+      if (devinfo.gen != inst[i].gen)
+         continue;
+
+      brw_push_insn_state(p);
+
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
+      brw_set_default_access_mode(p, inst[i].access_mode);
+
+      switch (inst[i].opcode) {
+      case BRW_OPCODE_MOV:
+         brw_MOV(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type));
+         brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         break;
+      case BRW_OPCODE_ADD:
+         brw_ADD(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type),
+                    retype(g0, inst[i].srcs[1].type));
+         brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
+         brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+         brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
+         break;
+      case BRW_OPCODE_MAD:
+         brw_MAD(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type),
+                    retype(g0, inst[i].srcs[1].type),
+                    retype(g0, inst[i].srcs[2].type));
+         brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         break;
+      default:
+         unreachable("invalid opcode");
+      }
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+
+      brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+      brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+
+      brw_pop_insn_state(p);
+
+      EXPECT_EQ(inst[i].expected_result, validate(p));
+
+      clear_instructions(p);
+   }
+}




More information about the mesa-commit mailing list