[Mesa-dev] [PATCH v5 35/38] intel/compiler: validate region restrictions for mixed float mode

Thu Mar 28 02:37:08 UTC 2019

"Juan A. Suarez Romero" <jasuarez at igalia.com> writes:

> From: Iago Toral Quiroga <itoral at igalia.com>
>
> v2:
>  - Adapted unit tests to make them consistent with the changes done
>    to the validation of half-float conversions.
> ---
>  src/intel/compiler/brw_eu_validate.c    | 256 ++++++++++
>  src/intel/compiler/test_eu_validate.cpp | 620 ++++++++++++++++++++++++
>  2 files changed, 876 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
> index 18c95efb05b..5eea02f5c94 100644
> --- a/src/intel/compiler/brw_eu_validate.c
> +++ b/src/intel/compiler/brw_eu_validate.c
> @@ -170,6 +170,13 @@ src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
>            brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
>  }
>  
> +static bool
> +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> +{
> +   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
> +          brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
> +}
> +

There are multiple accumulator registers.  The above only checks for
acc0.

>  static bool
>  src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
>  {
> @@ -937,6 +944,254 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
>     return error_msg;
>  }
>  
> +static struct string
> +special_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo,
> +                                          const brw_inst *inst)
> +{
> +   struct string error_msg = { .str = NULL, .len = 0 };
> +
> +   unsigned opcode = brw_inst_opcode(devinfo, inst);

Constify this and the declarations below.

> +   unsigned num_sources = num_sources_from_inst(devinfo, inst);
> +   if (num_sources >= 3)
> +      return error_msg;
> +
> +   if (!is_mixed_float(devinfo, inst))
> +      return error_msg;
> +
> +   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
> +   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
> +
> +   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> +   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);

Same comment as in the previous patch, this can possibly blow up for
instructions with less than two sources.

> +   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> +
> +   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
> +   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
> +
> +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +    * Float Operations:
> +    *
> +    *    "Indirect addressing on source is not supported when source and
> +    *     destination data types are mixed float."
> +    *
> +    * Indirect addressing is only supported on the first source, so we only
> +    * check that.

I don't think that's true.  The hardware spec has the following example
of a valid but kind of funky instruction with indirect regioning:

| add (16) r[a0.0]:f r[a0.2]:f r[a0.4]:f 

> +    */
> +   ERROR_IF(types_are_mixed_float(dst_type, src0_type) &&

I doubt that it makes a difference whether there is a mismatch between
the type of src0 and the type of the destination for indirect addressing
to be disallowed.  Things are likely to blow up with indirect addressing
in src0 even if the instruction is mixed-mode due to the effect of the
type of src1 alone.  But it's hard to tell for sure, spec wording seems
fairly ambiguous...

> +            brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
> +            "Indirect addressing on source is not supported when source and "
> +            "destination data types are mixed float");
> +
> +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +    * Float Operations:
> +    *
> +    *    "No SIMD16 in mixed mode when destination is f32. Instruction
> +    *     execution size must be no more than 8."
> +    */
> +   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
> +            "Mixed float mode with 32-bit float destination is limited "
> +            "to SIMD8");
> +
> +   if (is_align16) {
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *   "In Align16 mode, when half float and float data types are mixed
> +       *    between source operands OR between source and destination operands,
> +       *    the register content are assumed to be packed."
> +       *
> +       * Since Align16 doesn't have a concept of horizontal stride (or width),
> +       * it means that vertical stride must always be 4, since 0 and 2 would
> +       * lead to replicated data, and any other value is disallowed in Align16.
> +       * However, the PRM also says:
> +       *
> +       *   "In Align16, vertical stride can never be zero for f16"
> +       *
> +       * Which is oddly redundant and specific considering the more general
> +       * assumption that all operands are assumed to be packed, so we
> +       * understand that this might be hinting that there may be an exception
> +       * for f32 operands with a vstride of 0, so we don't validate this for
> +       * them while we don't have empirical evidence that it is forbidden.
> +       */
> +      ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4 &&
> +               (src0_type != BRW_REGISTER_TYPE_F ||
> +                brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0),
> +               "Align16 mixed float mode assumes packed data (vstride must "
> +               "be 4 -or 0 for f32 operands-)");
> +
> +      ERROR_IF(num_sources >= 2 &&
> +               brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4 &&
> +               (src1_type != BRW_REGISTER_TYPE_F ||
> +                brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0),
> +               "Align16 mixed float mode assumes packed data (vstride must "
> +               "be 4 -or 0 for f32 operands-)");
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *   "For Align16 mixed mode, both input and output packed f16 data
> +       *    must be oword aligned, no oword crossing in packed f16."
> +       *
> +       * The previous rule requires that Align16 operands are always packed,
> +       * and since there is only one bit for Align16 subnr, which represents
> +       * offsets 0B and 16B, this rule is always enforced and we don't need to
> +       * validate it.
> +       */
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No SIMD16 in mixed mode when destination is packed f16 for both
> +       *     Align1 and Align16."
> +       *
> +       * And:
> +       *
> +       *   "In Align16 mode, when half float and float data types are mixed
> +       *    between source operands OR between source and destination operands,
> +       *    the register content are assumed to be packed."
> +       *
> +       * Which implies that SIMD16 is not available in Align16. This is further
> +       * confirmed by:
> +       *
> +       *    "For Align16 mixed mode, both input and output packed f16 data
> +       *     must be oword aligned, no oword crossing in packed f16"
> +       *
> +       * Since oword-aligned packed f16 data would cross oword boundaries when
> +       * the execution size is larger than 8.
> +       */
> +      ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "Math operations for mixed mode:
> +       *     - In Align16, only packed format is supported"
> +       *
> +       * It is not clear what this is restricting since as stated in previous
> +       * spec quotes, Align16 always assumes packed data. However, since
> +       * we are allowing vstride of 0 on f32, we check again here without that
> +       * exception.
> +       */
> +      if (opcode == BRW_OPCODE_MATH) {
> +         ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
> +                  "Align16 mixed float mode math only supports packed format");
> +
> +         ERROR_IF(num_sources >= 2 &&
> +                  brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
> +                  "Align16 mixed float mode math only supports packed format");
> +      }
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No accumulator read access for Align16 mixed float."
> +       *
> +       * Explicit accumulator reads are only supported on the first source,
> +       * so we only check that.

The restriction on explicit accumulator access being allowed for the
first source only is not really universal.  Wouldn't hurt to check src1
in addition.

> +       */
> +      ERROR_IF(src0_is_acc(devinfo, inst),
> +               "No accumulator read access for Align16 mixed float");
> +   } else {
> +      assert(!is_align16);
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No SIMD16 in mixed mode when destination is packed f16 for both
> +       *     Align1 and Align16."
> +       */
> +      ERROR_IF(exec_size > 8 && dst_is_packed &&
> +               dst_type == BRW_REGISTER_TYPE_HF,
> +               "Align1 mixed float mode is limited to SIMD8 when destination "
> +               "is packed half-float");
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "Math operations for mixed mode:
> +       *     - In Align1, f16 inputs need to be strided"
> +       */
> +      if (opcode == BRW_OPCODE_MATH) {
> +         if (src0_type == BRW_REGISTER_TYPE_HF) {
> +            ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
> +                     "Align1 mixed mode math needs strided half-float inputs");
> +         }
> +
> +         if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
> +            ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
> +                     "Align1 mixed mode math needs strided half-float inputs");
> +         }
> +      }
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "In Align1, destination stride can be smaller than execution
> +       *     type. When destination is stride of 1, 16 bit packed data is
> +       *     updated on the destination. However, output packed f16 data
> +       *     must be oword aligned, no oword crossing in packed f16."
> +       *
> +       * The requirement of not crossing oword boundaries for 16-bit oword
> +       * aligned data means that execution size is limited to 8.
> +       */
> +      if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
> +         unsigned subreg;
> +         if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
> +            subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
> +         else
> +            subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
> +         ERROR_IF(subreg % 16 != 0,
> +                  "Align1 mixed mode packed half-float output must be "
> +                  "oword aligned");
> +         ERROR_IF(exec_size > 8,
> +                  "Align1 mixed mode packed half-float output must not "
> +                  "cross oword boundaries (max exec size is 8)");
> +      }
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "When source is float or half float from accumulator register and
> +       *     destination is half float with a stride of 1, the source must
> +       *     register aligned. i.e., source must have offset zero."
> +       *
> +       * Align16 mixed float mode doesn't allow accumulator access on sources,
> +       * so we only need to check this for Align1. Also, explicit accumulator
> +       * reads are only supported on the first source, so we only check that.

Same comment as above regarding checking accumulator access for src1.

> +       */
> +      if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {

You could eliminate one level of nesting here, or combine the outer
conditional block with the previous conditional.

> +         if (src0_is_acc(devinfo, inst) &&
> +             (src0_type == BRW_REGISTER_TYPE_F ||
> +              src0_type == BRW_REGISTER_TYPE_HF)) {
> +            ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
> +                     "Mixed float mode requires register-aligned accumulator "
> +                     "source reads when destination is packed half-float");
> +         }
> +      }
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No swizzle is allowed when an accumulator is used as an implicit
> +       *     source or an explicit source in an instruction. i.e. when
> +       *     destination is half float with an implicit accumulator source,
> +       *     destination stride needs to be 2."
> +       *
> +       * FIXME: it is not quite clear what the first sentence actually means
> +       *        or its link to the implication described after it, so we only
> +       *        validate the explicit implication, which is clearly described.

I suspect that with "No swizzle is allowed" they mean that the sources
and destination need to be aligned to the size of the execution size.
But I'm okay with only checking the destination since the spec wording
is pretty vague...

> +       */
> +      if (dst_type == BRW_REGISTER_TYPE_HF && opcode == BRW_OPCODE_MAC) {

There are other instructions that read the accumulator (implicitly or
explicitly) beyond the MAC instruction.

> +         ERROR_IF(dst_stride != 2,
> +                  "Mixed float mode with implicit accumulator source and "
> +                  "half-float destination requires a stride of 2 on the "
> +                  "destination");
> +      }
> +   }
> +
> +   return error_msg;
> +}
> +
>  /**
>   * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
>   *
> @@ -1575,6 +1830,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
>           CHECK(send_restrictions);
>           CHECK(general_restrictions_based_on_operand_types);
>           CHECK(general_restrictions_on_region_parameters);
> +         CHECK(special_restrictions_for_mixed_float_mode);
>           CHECK(region_alignment_rules);
>           CHECK(vector_immediate_restrictions);
>           CHECK(special_requirements_for_handling_double_precision_data_types);
> diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
> index bf8a2cfcf12..f7cbb9bb903 100644
> --- a/src/intel/compiler/test_eu_validate.cpp
> +++ b/src/intel/compiler/test_eu_validate.cpp
> @@ -1020,6 +1020,626 @@ TEST_P(validation_test, half_float_conversion)
>     }
>  }
>  
> +TEST_P(validation_test, mixed_float_source_indirect_addressing)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool dst_indirect;
> +      bool src0_indirect;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             dst_stride, dst_indirect, src0_indirect, expected_result)    \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         dst_indirect,                                                    \
> +         src0_indirect,                                                   \
> +         expected_result,                                                 \
> +      }
> +
> +      /* Source and dest are mixed float: indirect src addressing not allowed */
> +      INST(HF,  F,  F, 2, false, false, true),
> +      INST(HF,  F,  F, 2, true,  false, true),
> +      INST(HF,  F,  F, 2, false, true,  false),
> +      INST(HF,  F,  F, 2, true,  true,  false),
> +      INST( F, HF,  F, 1, false, false, true),
> +      INST( F, HF,  F, 1, true,  false, true),
> +      INST( F, HF,  F, 1, false, true,  false),
> +      INST( F, HF,  F, 1, true,  true,  false),
> +
> +      /* Source and dest are not mixed float: indirect src addressing allowed */
> +      INST(HF, HF,  F, 2, false, false, true),
> +      INST(HF, HF,  F, 2, true,  false, true),
> +      INST(HF, HF,  F, 2, false, true,  true),
> +      INST(HF, HF,  F, 2, true,  true,  true),
> +      INST( F,  F, HF, 1, false, false, true),
> +      INST( F,  F, HF, 1, true,  false, true),
> +      INST( F,  F, HF, 1, false, true,  true),
> +      INST( F,  F, HF, 1, true,  true,  true),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +      brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_simd16)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(exec_size, dst_type, src0_type, src1_type,                   \
> +             dst_stride, expected_result)                                 \
> +      {                                                                   \
> +         BRW_EXECUTE_##exec_size,                                         \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         expected_result,                                                 \
> +      }
> +
> +      /* No SIMD16 in mixed mode when destination is packed f16 */
> +      INST( 8, HF,  F, HF, 2, true),
> +      INST(16, HF, HF,  F, 2, true),

Maybe add a known invalid test-case here?

> +
> +      /* No SIMD16 in mixed mode when destination is f32 */
> +      INST( 8,  F, HF,  F, 1, true),
> +      INST( 8,  F,  F, HF, 1, true),
> +      INST(16,  F, HF,  F, 1, false),
> +      INST(16,  F,  F, HF, 1, false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool read_acc;
> +      unsigned subnr;
> +      bool expected_result_bdw;
> +      bool expected_result_chv_skl;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr,   \
> +             expected_result_bdw, expected_result_chv_skl)                  \
> +      {                                                                     \
> +         BRW_REGISTER_TYPE_##dst_type,                                      \
> +         BRW_REGISTER_TYPE_##src0_type,                                     \
> +         BRW_REGISTER_TYPE_##src1_type,                                     \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                                \
> +         read_acc,                                                          \
> +         subnr,                                                             \
> +         expected_result_bdw,                                               \
> +         expected_result_chv_skl,                                           \
> +      }
> +
> +      /* Destination is not packed */
> +      INST(HF, HF,  F, 2, true,  0, true, true),
> +      INST(HF, HF,  F, 2, true,  2, true, true),
> +      INST(HF, HF,  F, 2, true,  4, true, true),
> +      INST(HF, HF,  F, 2, true,  8, true, true),
> +      INST(HF, HF,  F, 2, true, 16, true, true),
> +
> +      /* Destination is packed, we don't read acc */
> +      INST(HF, HF,  F, 1, false,  0, false, true),
> +      INST(HF, HF,  F, 1, false,  2, false, true),
> +      INST(HF, HF,  F, 1, false,  4, false, true),
> +      INST(HF, HF,  F, 1, false,  8, false, true),
> +      INST(HF, HF,  F, 1, false, 16, false, true),
> +
> +      /* Destination is packed, we read acc */
> +      INST(HF, HF,  F, 1, true,  0, false, true),
> +      INST(HF, HF,  F, 1, true,  2, false, false),
> +      INST(HF, HF,  F, 1, true,  4, false, false),
> +      INST(HF, HF,  F, 1, true,  8, false, false),
> +      INST(HF, HF,  F, 1, true, 16, false, false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
> +
> +      if (devinfo.is_cherryview || devinfo.gen >= 9)
> +         EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
> +      else
> +         EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_fp16_dest_with_implicit_acc)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      unsigned opcode;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool expected_result_bdw;
> +      bool expected_result_chv_skl;
> +   } inst[] = {
> +#define INST(exec_size, opcode, dst_type, src0_type, src1_type,           \
> +             dst_stride, expected_result_bdw, expected_result_chv_skl)    \
> +      {                                                                   \
> +         BRW_EXECUTE_##exec_size,                                         \
> +         BRW_OPCODE_##opcode,                                             \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         expected_result_bdw,                                             \
> +         expected_result_chv_skl,                                         \
> +      }
> +
> +      /* Packed fp16 dest with implicit acc needs hstride=2 */
> +      INST(8, MAC, HF, HF,  F, 1, false, false),
> +      INST(8, MAC, HF, HF,  F, 2, true,  true),
> +      INST(8, MAC, HF,  F, HF, 1, false, false),
> +      INST(8, MAC, HF,  F, HF, 2, true,  true),
> +
> +      /* If destination is not fp16, restriction doesn't apply */
> +      INST(8, MAC,  F, HF,  F, 1, true, true),
> +      INST(8, MAC,  F, HF,  F, 2, true, true),
> +
> +      /* If there is no implicit acc, restriction doesn't apply */
> +      INST(8, ADD, HF, HF,  F, 1, false, true),
> +      INST(8, ADD, HF, HF,  F, 2, true,  true),
> +      INST(8, ADD, HF,  F, HF, 1, false, true),
> +      INST(8, ADD, HF,  F, HF, 2, true,  true),
> +      INST(8, ADD,  F, HF,  F, 1, true,  true),
> +      INST(8, ADD,  F, HF,  F, 2, true,  true),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      if (inst[i].opcode == BRW_OPCODE_MAC) {
> +         brw_MAC(p, retype(g0, inst[i].dst_type),
> +                    retype(g0, inst[i].src0_type),
> +                    retype(g0, inst[i].src1_type));
> +      } else {
> +         assert(inst[i].opcode == BRW_OPCODE_ADD);
> +         brw_ADD(p, retype(g0, inst[i].dst_type),
> +                    retype(g0, inst[i].src0_type),
> +                    retype(g0, inst[i].src1_type));
> +      }
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      if (devinfo.is_cherryview || devinfo.gen >= 9)
> +         EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
> +      else
> +         EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      unsigned src0_stride;
> +      unsigned src1_stride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             dst_stride, src0_stride, src1_stride, expected_result)       \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         BRW_HORIZONTAL_STRIDE_##src0_stride,                             \
> +         BRW_HORIZONTAL_STRIDE_##src1_stride,                             \
> +         expected_result,                                                 \
> +      }
> +
> +      INST(HF, HF,  F, 2, 2, 1, true),
> +      INST(HF,  F, HF, 2, 1, 2, true),
> +      INST(HF,  F, HF, 1, 1, 2, true),
> +      INST(HF,  F, HF, 2, 1, 1, false),
> +      INST(HF, HF,  F, 2, 1, 1, false),
> +      INST(HF, HF,  F, 1, 1, 1, false),
> +      INST(HF, HF,  F, 2, 1, 1, false),
> +      INST( F, HF,  F, 1, 1, 1, false),
> +      INST( F,  F, HF, 1, 1, 2, true),
> +      INST( F, HF, HF, 1, 2, 1, false),
> +      INST( F, HF, HF, 1, 2, 2, true),
> +
> +#undef INST
> +   };
> +
> +   /* No half-float math in gen8 */
> +   if (devinfo.gen < 9)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      gen6_math(p, retype(g0, inst[i].dst_type),
> +                   BRW_MATH_FUNCTION_POW,
> +                   retype(g0, inst[i].src0_type),
> +                   retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
> +
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      unsigned dst_subnr;
> +      bool expected_result_bdw;
> +      bool expected_result_chv_skl;
> +   } inst[] = {
> +#define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
> +             expected_result_bdw, expected_result_chv_skl)                     \
> +      {                                                                        \
> +         BRW_EXECUTE_##exec_size,                                              \
> +         BRW_REGISTER_TYPE_##dst_type,                                         \
> +         BRW_REGISTER_TYPE_##src0_type,                                        \
> +         BRW_REGISTER_TYPE_##src1_type,                                        \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
> +         dst_subnr,                                                            \
> +         expected_result_bdw,                                                  \
> +         expected_result_chv_skl                                               \
> +      }
> +
> +      /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
> +       * oword-aligned
> +       */
> +      INST( 8, HF, HF,  F, 1,  0, false, true),
> +      INST( 8, HF, HF,  F, 1,  2, false, false),
> +      INST( 8, HF, HF,  F, 1,  4, false, false),
> +      INST( 8, HF, HF,  F, 1,  8, false, false),
> +      INST( 8, HF, HF,  F, 1, 16, false, true),
> +
> +      /* SIMD16 packed fp16 always crosses oword boundaries */
> +      INST(16, HF, HF,  F, 1,  0, false, false),
> +      INST(16, HF, HF,  F, 1,  2, false, false),
> +      INST(16, HF, HF,  F, 1,  4, false, false),
> +      INST(16, HF, HF,  F, 1,  8, false, false),
> +      INST(16, HF, HF,  F, 1, 16, false, false),
> +
> +      /* If destination is not packed (or not fp16) we can cross oword
> +       * boundaries
> +       */
> +      INST( 8, HF, HF,  F, 2,  0, true, true),
> +      INST( 8,  F, HF,  F, 1,  0, true, true),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +      brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
> +
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      if (devinfo.is_cherryview || devinfo.gen >= 9)
> +         EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
> +      else
> +         EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_packed_data)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned src0_vstride;
> +      unsigned src1_vstride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             src0_vstride, src1_vstride, expected_result)                 \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_VERTICAL_STRIDE_##src0_vstride,                              \
> +         BRW_VERTICAL_STRIDE_##src1_vstride,                              \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST(F,  F, HF, 4, 4, true),
> +      INST(F,  F, HF, 2, 4, false),
> +      INST(F,  F, HF, 4, 2, false),
> +      INST(F,  F, HF, 0, 4, true),
> +      INST(F,  F, HF, 4, 0, false),
> +      INST(F, HF,  F, 4, 4, true),
> +      INST(F, HF,  F, 4, 2, false),
> +      INST(F, HF,  F, 2, 4, false),
> +      INST(F, HF,  F, 0, 4, false),
> +      INST(F, HF,  F, 4, 0, true),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_no_simd16)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(exec_size, dst_type, src0_type, src1_type, expected_result)  \
> +      {                                                                   \
> +         BRW_EXECUTE_##exec_size,                                         \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST( 8,  F,  F, HF, true),
> +      INST( 8,  F, HF,  F, true),
> +      INST( 8,  F,  F, HF, true),
> +      INST(16,  F,  F, HF, false),
> +      INST(16,  F, HF,  F, false),
> +      INST(16,  F,  F, HF, false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_no_acc_read)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      bool read_acc;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type, read_acc, expected_result)   \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         read_acc,                                                        \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST( F,  F, HF, false, true),
> +      INST( F,  F, HF, true,  false),
> +      INST( F, HF,  F, false, true),
> +      INST( F, HF,  F, true,  false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_math_packed_format)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned src0_vstride;
> +      unsigned src1_vstride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             src0_vstride, src1_vstride, expected_result)                 \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_VERTICAL_STRIDE_##src0_vstride,                              \
> +         BRW_VERTICAL_STRIDE_##src1_vstride,                              \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST( F, HF,  F, 4, 0, false),
> +      INST( F, HF, HF, 4, 4, true),
> +      INST( F,  F, HF, 4, 0, false),
> +      INST( F,  F, HF, 2, 4, false),
> +      INST( F,  F, HF, 4, 2, false),
> +      INST( F, HF, HF, 0, 4, false),
> +
> +#undef INST
> +   };
> +
> +   /* Align16 Math for mixed float mode is not supported in gen8 */
> +   if (devinfo.gen < 9 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      gen6_math(p, retype(g0, inst[i].dst_type),
> +                   BRW_MATH_FUNCTION_POW,
> +                   retype(g0, inst[i].src0_type),
> +                   retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
>  TEST_P(validation_test, vector_immediate_destination_alignment)
>  {
>     static const struct {
> -- 
> 2.20.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190327/63a7d3e9/attachment-0001.sig>