[Mesa-dev] [PATCH v7] intel/compiler: validate region restrictions for mixed float mode

Wed Apr 17 20:17:25 UTC 2019

"Juan A. Suarez Romero" <jasuarez at igalia.com> writes:

> From: Iago Toral Quiroga <itoral at igalia.com>
>
> v2:
>  - Adapted unit tests to make them consistent with the changes done
>    to the validation of half-float conversions.
>
> v3 (Curro):
> - Check all the accummulators
> - Constify declarations
> - Do not check src1 type in single-source instructions.
> - Check for all instructions that read accumulator (either implicitly or
>   explicitly)
> - Check restrictions in src1 too.
> - Merge conditional block
> - Add invalid test case.
>
> v4 (Curro):
> - Assert on 3-src instructions, as they are not validated.
> - Get rid of types_are_mixed_float(), as we know instruction is mixed
>   float at that point.
> - Remove conditions from not verified case.
> - Fix brackets on conditional.
> ---
>  src/intel/compiler/brw_eu_validate.c    | 268 ++++++++++
>  src/intel/compiler/test_eu_validate.cpp | 630 ++++++++++++++++++++++++
>  2 files changed, 898 insertions(+)
>
> diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
> index cfaf126e2f5..9530d4da209 100644
> --- a/src/intel/compiler/brw_eu_validate.c
> +++ b/src/intel/compiler/brw_eu_validate.c
> @@ -170,6 +170,20 @@ src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
>            brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
>  }
>  
> +static bool
> +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> +{
> +   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
> +          (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
> +}
> +
> +static bool
> +src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> +{
> +   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
> +          (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
> +}
> +
>  static bool
>  src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
>  {
> @@ -275,6 +289,24 @@ sources_not_null(const struct gen_device_info *devinfo,
>     return error_msg;
>  }
>  
> +static bool
> +inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> +{
> +   /* Check instructions that use implicit accumulator sources */
> +   switch (brw_inst_opcode(devinfo, inst)) {
> +   case BRW_OPCODE_MAC:
> +   case BRW_OPCODE_MACH:
> +   case BRW_OPCODE_SADA2:
> +      return true;
> +   }
> +
> +   /* FIXME: support 3-src instructions */
> +   unsigned num_sources = num_sources_from_inst(devinfo, inst);
> +   assert(num_sources < 3);
> +
> +   return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
> +}
> +
>  static struct string
>  send_restrictions(const struct gen_device_info *devinfo,
>                    const brw_inst *inst)
> @@ -938,6 +970,241 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
>     return error_msg;
>  }
>  
> +static struct string
> +special_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo,
> +                                          const brw_inst *inst)
> +{
> +   struct string error_msg = { .str = NULL, .len = 0 };
> +
> +   const unsigned opcode = brw_inst_opcode(devinfo, inst);
> +   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
> +   if (num_sources >= 3)
> +      return error_msg;
> +
> +   if (!is_mixed_float(devinfo, inst))
> +      return error_msg;
> +
> +   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
> +   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
> +
> +   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> +   enum brw_reg_type src1_type = num_sources > 1 ?
> +                                 brw_inst_src1_type(devinfo, inst) : 0;
> +   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> +
> +   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
> +   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
> +
> +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +    * Float Operations:
> +    *
> +    *    "Indirect addressing on source is not supported when source and
> +    *     destination data types are mixed float."
> +    */
> +   ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
> +            (num_sources > 1 &&
> +             brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
> +            "Indirect addressing on source is not supported when source and "
> +            "destination data types are mixed float");
> +
> +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +    * Float Operations:
> +    *
> +    *    "No SIMD16 in mixed mode when destination is f32. Instruction
> +    *     execution size must be no more than 8."
> +    */
> +   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
> +            "Mixed float mode with 32-bit float destination is limited "
> +            "to SIMD8");
> +
> +   if (is_align16) {
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *   "In Align16 mode, when half float and float data types are mixed
> +       *    between source operands OR between source and destination operands,
> +       *    the register content are assumed to be packed."
> +       *
> +       * Since Align16 doesn't have a concept of horizontal stride (or width),
> +       * it means that vertical stride must always be 4, since 0 and 2 would
> +       * lead to replicated data, and any other value is disallowed in Align16.
> +       * However, the PRM also says:
> +       *
> +       *   "In Align16, vertical stride can never be zero for f16"
> +       *
> +       * Which is oddly redundant and specific considering the more general
> +       * assumption that all operands are assumed to be packed, so we
> +       * understand that this might be hinting that there may be an exception
> +       * for f32 operands with a vstride of 0, so we don't validate this for
> +       * them while we don't have empirical evidence that it is forbidden.
> +       *
> +       *    "Math operations for mixed mode:
> +       *     - In Align16, only packed format is supported"
> +       *
> +       * It is not clear what this is restricting since as stated in previous
> +       * spec quotes, Align16 always assumes packed data. However, since
> +       * we are allowing vstride of 0 on f32, we check again here without that
> +       * exception.
> +

The comment text from "However, the PRM also says" till here seems
obsolete by your last changes.  Please remove it.

With that fixed:

Reviewed-by: Francisco Jerez <currojerez at riseup.net>

I'm guessing that's all the reviews you needed on this series?

> +       */
> +      ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
> +               "Align16 mixed float mode assumes packed data (vstride must be 4");
> +
> +      ERROR_IF(num_sources >= 2 &&
> +               brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
> +               "Align16 mixed float mode assumes packed data (vstride must be 4");
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *   "For Align16 mixed mode, both input and output packed f16 data
> +       *    must be oword aligned, no oword crossing in packed f16."
> +       *
> +       * The previous rule requires that Align16 operands are always packed,
> +       * and since there is only one bit for Align16 subnr, which represents
> +       * offsets 0B and 16B, this rule is always enforced and we don't need to
> +       * validate it.
> +       */
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No SIMD16 in mixed mode when destination is packed f16 for both
> +       *     Align1 and Align16."
> +       *
> +       * And:
> +       *
> +       *   "In Align16 mode, when half float and float data types are mixed
> +       *    between source operands OR between source and destination operands,
> +       *    the register content are assumed to be packed."
> +       *
> +       * Which implies that SIMD16 is not available in Align16. This is further
> +       * confirmed by:
> +       *
> +       *    "For Align16 mixed mode, both input and output packed f16 data
> +       *     must be oword aligned, no oword crossing in packed f16"
> +       *
> +       * Since oword-aligned packed f16 data would cross oword boundaries when
> +       * the execution size is larger than 8.
> +       */
> +      ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No accumulator read access for Align16 mixed float."
> +       */
> +      ERROR_IF(inst_uses_src_acc(devinfo, inst),
> +               "No accumulator read access for Align16 mixed float");
> +   } else {
> +      assert(!is_align16);
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No SIMD16 in mixed mode when destination is packed f16 for both
> +       *     Align1 and Align16."
> +       */
> +      ERROR_IF(exec_size > 8 && dst_is_packed &&
> +               dst_type == BRW_REGISTER_TYPE_HF,
> +               "Align1 mixed float mode is limited to SIMD8 when destination "
> +               "is packed half-float");
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "Math operations for mixed mode:
> +       *     - In Align1, f16 inputs need to be strided"
> +       */
> +      if (opcode == BRW_OPCODE_MATH) {
> +         if (src0_type == BRW_REGISTER_TYPE_HF) {
> +            ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
> +                     "Align1 mixed mode math needs strided half-float inputs");
> +         }
> +
> +         if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
> +            ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
> +                     "Align1 mixed mode math needs strided half-float inputs");
> +         }
> +      }
> +
> +      if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
> +         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +          * Float Operations:
> +          *
> +          *    "In Align1, destination stride can be smaller than execution
> +          *     type. When destination is stride of 1, 16 bit packed data is
> +          *     updated on the destination. However, output packed f16 data
> +          *     must be oword aligned, no oword crossing in packed f16."
> +          *
> +          * The requirement of not crossing oword boundaries for 16-bit oword
> +          * aligned data means that execution size is limited to 8.
> +          */
> +         unsigned subreg;
> +         if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
> +            subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
> +         else
> +            subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
> +         ERROR_IF(subreg % 16 != 0,
> +                  "Align1 mixed mode packed half-float output must be "
> +                  "oword aligned");
> +         ERROR_IF(exec_size > 8,
> +                  "Align1 mixed mode packed half-float output must not "
> +                  "cross oword boundaries (max exec size is 8)");
> +
> +         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +          * Float Operations:
> +          *
> +          *    "When source is float or half float from accumulator register and
> +          *     destination is half float with a stride of 1, the source must
> +          *     register aligned. i.e., source must have offset zero."
> +          *
> +          * Align16 mixed float mode doesn't allow accumulator access on sources,
> +          * so we only need to check this for Align1.
> +          */
> +         if (src0_is_acc(devinfo, inst) &&
> +             (src0_type == BRW_REGISTER_TYPE_F ||
> +              src0_type == BRW_REGISTER_TYPE_HF)) {
> +            ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
> +                     "Mixed float mode requires register-aligned accumulator "
> +                     "source reads when destination is packed half-float");
> +
> +         }
> +
> +         if (num_sources > 1 &&
> +             src1_is_acc(devinfo, inst) &&
> +             (src1_type == BRW_REGISTER_TYPE_F ||
> +              src1_type == BRW_REGISTER_TYPE_HF)) {
> +            ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
> +                     "Mixed float mode requires register-aligned accumulator "
> +                     "source reads when destination is packed half-float");
> +         }
> +      }
> +
> +      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> +       * Float Operations:
> +       *
> +       *    "No swizzle is allowed when an accumulator is used as an implicit
> +       *     source or an explicit source in an instruction. i.e. when
> +       *     destination is half float with an implicit accumulator source,
> +       *     destination stride needs to be 2."
> +       *
> +       * FIXME: it is not quite clear what the first sentence actually means
> +       *        or its link to the implication described after it, so we only
> +       *        validate the explicit implication, which is clearly described.
> +       */
> +      if (dst_type == BRW_REGISTER_TYPE_HF &&
> +          inst_uses_src_acc(devinfo, inst)) {
> +         ERROR_IF(dst_stride != 2,
> +                  "Mixed float mode with implicit/explicit accumulator "
> +                  "source and half-float destination requires a stride "
> +                  "of 2 on the destination");
> +      }
> +   }
> +
> +   return error_msg;
> +}
> +
>  /**
>   * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
>   *
> @@ -1576,6 +1843,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
>           CHECK(send_restrictions);
>           CHECK(general_restrictions_based_on_operand_types);
>           CHECK(general_restrictions_on_region_parameters);
> +         CHECK(special_restrictions_for_mixed_float_mode);
>           CHECK(region_alignment_rules);
>           CHECK(vector_immediate_restrictions);
>           CHECK(special_requirements_for_handling_double_precision_data_types);
> diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
> index 2e06da2f5b4..65326416064 100644
> --- a/src/intel/compiler/test_eu_validate.cpp
> +++ b/src/intel/compiler/test_eu_validate.cpp
> @@ -1019,6 +1019,636 @@ TEST_P(validation_test, half_float_conversion)
>     }
>  }
>  
> +TEST_P(validation_test, mixed_float_source_indirect_addressing)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool dst_indirect;
> +      bool src0_indirect;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             dst_stride, dst_indirect, src0_indirect, expected_result)    \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         dst_indirect,                                                    \
> +         src0_indirect,                                                   \
> +         expected_result,                                                 \
> +      }
> +
> +      /* Source and dest are mixed float: indirect src addressing not allowed */
> +      INST(HF,  F,  F, 2, false, false, true),
> +      INST(HF,  F,  F, 2, true,  false, true),
> +      INST(HF,  F,  F, 2, false, true,  false),
> +      INST(HF,  F,  F, 2, true,  true,  false),
> +      INST( F, HF,  F, 1, false, false, true),
> +      INST( F, HF,  F, 1, true,  false, true),
> +      INST( F, HF,  F, 1, false, true,  false),
> +      INST( F, HF,  F, 1, true,  true,  false),
> +
> +      INST(HF, HF,  F, 2, false, false, true),
> +      INST(HF, HF,  F, 2, true,  false, true),
> +      INST(HF, HF,  F, 2, false, true,  false),
> +      INST(HF, HF,  F, 2, true,  true,  false),
> +      INST( F,  F, HF, 1, false, false, true),
> +      INST( F,  F, HF, 1, true,  false, true),
> +      INST( F,  F, HF, 1, false, true,  false),
> +      INST( F,  F, HF, 1, true,  true,  false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +      brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_simd16)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(exec_size, dst_type, src0_type, src1_type,                   \
> +             dst_stride, expected_result)                                 \
> +      {                                                                   \
> +         BRW_EXECUTE_##exec_size,                                         \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         expected_result,                                                 \
> +      }
> +
> +      /* No SIMD16 in mixed mode when destination is packed f16 */
> +      INST( 8, HF,  F, HF, 2, true),
> +      INST(16, HF, HF,  F, 2, true),
> +      INST(16, HF, HF,  F, 1, false),
> +      INST(16, HF,  F, HF, 1, false),
> +
> +      /* No SIMD16 in mixed mode when destination is f32 */
> +      INST( 8,  F, HF,  F, 1, true),
> +      INST( 8,  F,  F, HF, 1, true),
> +      INST(16,  F, HF,  F, 1, false),
> +      INST(16,  F,  F, HF, 1, false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool read_acc;
> +      unsigned subnr;
> +      bool expected_result_bdw;
> +      bool expected_result_chv_skl;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr,   \
> +             expected_result_bdw, expected_result_chv_skl)                  \
> +      {                                                                     \
> +         BRW_REGISTER_TYPE_##dst_type,                                      \
> +         BRW_REGISTER_TYPE_##src0_type,                                     \
> +         BRW_REGISTER_TYPE_##src1_type,                                     \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                                \
> +         read_acc,                                                          \
> +         subnr,                                                             \
> +         expected_result_bdw,                                               \
> +         expected_result_chv_skl,                                           \
> +      }
> +
> +      /* Destination is not packed */
> +      INST(HF, HF,  F, 2, true,  0, true, true),
> +      INST(HF, HF,  F, 2, true,  2, true, true),
> +      INST(HF, HF,  F, 2, true,  4, true, true),
> +      INST(HF, HF,  F, 2, true,  8, true, true),
> +      INST(HF, HF,  F, 2, true, 16, true, true),
> +
> +      /* Destination is packed, we don't read acc */
> +      INST(HF, HF,  F, 1, false,  0, false, true),
> +      INST(HF, HF,  F, 1, false,  2, false, true),
> +      INST(HF, HF,  F, 1, false,  4, false, true),
> +      INST(HF, HF,  F, 1, false,  8, false, true),
> +      INST(HF, HF,  F, 1, false, 16, false, true),
> +
> +      /* Destination is packed, we read acc */
> +      INST(HF, HF,  F, 1, true,  0, false, false),
> +      INST(HF, HF,  F, 1, true,  2, false, false),
> +      INST(HF, HF,  F, 1, true,  4, false, false),
> +      INST(HF, HF,  F, 1, true,  8, false, false),
> +      INST(HF, HF,  F, 1, true, 16, false, false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
> +
> +      if (devinfo.is_cherryview || devinfo.gen >= 9)
> +         EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
> +      else
> +         EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      unsigned opcode;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      bool read_acc;
> +      bool expected_result_bdw;
> +      bool expected_result_chv_skl;
> +   } inst[] = {
> +#define INST(exec_size, opcode, dst_type, src0_type, src1_type,           \
> +             dst_stride, read_acc,expected_result_bdw,                    \
> +             expected_result_chv_skl)                                     \
> +      {                                                                   \
> +         BRW_EXECUTE_##exec_size,                                         \
> +         BRW_OPCODE_##opcode,                                             \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         read_acc,                                                        \
> +         expected_result_bdw,                                             \
> +         expected_result_chv_skl,                                         \
> +      }
> +
> +      /* Packed fp16 dest with implicit acc needs hstride=2 */
> +      INST(8, MAC, HF, HF,  F, 1, false, false, false),
> +      INST(8, MAC, HF, HF,  F, 2, false, true,  true),
> +      INST(8, MAC, HF,  F, HF, 1, false, false, false),
> +      INST(8, MAC, HF,  F, HF, 2, false, true,  true),
> +
> +      /* Packed fp16 dest with explicit acc needs hstride=2 */
> +      INST(8, ADD, HF, HF,  F, 1, true,  false, false),
> +      INST(8, ADD, HF, HF,  F, 2, true,  true,  true),
> +      INST(8, ADD, HF,  F, HF, 1, true,  false, false),
> +      INST(8, ADD, HF,  F, HF, 2, true,  true,  true),
> +
> +      /* If destination is not fp16, restriction doesn't apply */
> +      INST(8, MAC,  F, HF,  F, 1, false, true, true),
> +      INST(8, MAC,  F, HF,  F, 2, false, true, true),
> +
> +      /* If there is no implicit/explicit acc, restriction doesn't apply */
> +      INST(8, ADD, HF, HF,  F, 1, false, false, true),
> +      INST(8, ADD, HF, HF,  F, 2, false, true,  true),
> +      INST(8, ADD, HF,  F, HF, 1, false, false, true),
> +      INST(8, ADD, HF,  F, HF, 2, false, true,  true),
> +      INST(8, ADD,  F, HF,  F, 1, false, true,  true),
> +      INST(8, ADD,  F, HF,  F, 2, false, true,  true),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      if (inst[i].opcode == BRW_OPCODE_MAC) {
> +         brw_MAC(p, retype(g0, inst[i].dst_type),
> +                    retype(g0, inst[i].src0_type),
> +                    retype(g0, inst[i].src1_type));
> +      } else {
> +         assert(inst[i].opcode == BRW_OPCODE_ADD);
> +         brw_ADD(p, retype(g0, inst[i].dst_type),
> +                    retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
> +                    retype(g0, inst[i].src1_type));
> +      }
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      if (devinfo.is_cherryview || devinfo.gen >= 9)
> +         EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
> +      else
> +         EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      unsigned src0_stride;
> +      unsigned src1_stride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             dst_stride, src0_stride, src1_stride, expected_result)       \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
> +         BRW_HORIZONTAL_STRIDE_##src0_stride,                             \
> +         BRW_HORIZONTAL_STRIDE_##src1_stride,                             \
> +         expected_result,                                                 \
> +      }
> +
> +      INST(HF, HF,  F, 2, 2, 1, true),
> +      INST(HF,  F, HF, 2, 1, 2, true),
> +      INST(HF,  F, HF, 1, 1, 2, true),
> +      INST(HF,  F, HF, 2, 1, 1, false),
> +      INST(HF, HF,  F, 2, 1, 1, false),
> +      INST(HF, HF,  F, 1, 1, 1, false),
> +      INST(HF, HF,  F, 2, 1, 1, false),
> +      INST( F, HF,  F, 1, 1, 1, false),
> +      INST( F,  F, HF, 1, 1, 2, true),
> +      INST( F, HF, HF, 1, 2, 1, false),
> +      INST( F, HF, HF, 1, 2, 2, true),
> +
> +#undef INST
> +   };
> +
> +   /* No half-float math in gen8 */
> +   if (devinfo.gen < 9)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      gen6_math(p, retype(g0, inst[i].dst_type),
> +                   BRW_MATH_FUNCTION_POW,
> +                   retype(g0, inst[i].src0_type),
> +                   retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
> +
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned dst_stride;
> +      unsigned dst_subnr;
> +      bool expected_result_bdw;
> +      bool expected_result_chv_skl;
> +   } inst[] = {
> +#define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
> +             expected_result_bdw, expected_result_chv_skl)                     \
> +      {                                                                        \
> +         BRW_EXECUTE_##exec_size,                                              \
> +         BRW_REGISTER_TYPE_##dst_type,                                         \
> +         BRW_REGISTER_TYPE_##src0_type,                                        \
> +         BRW_REGISTER_TYPE_##src1_type,                                        \
> +         BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
> +         dst_subnr,                                                            \
> +         expected_result_bdw,                                                  \
> +         expected_result_chv_skl                                               \
> +      }
> +
> +      /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
> +       * oword-aligned
> +       */
> +      INST( 8, HF, HF,  F, 1,  0, false, true),
> +      INST( 8, HF, HF,  F, 1,  2, false, false),
> +      INST( 8, HF, HF,  F, 1,  4, false, false),
> +      INST( 8, HF, HF,  F, 1,  8, false, false),
> +      INST( 8, HF, HF,  F, 1, 16, false, true),
> +
> +      /* SIMD16 packed fp16 always crosses oword boundaries */
> +      INST(16, HF, HF,  F, 1,  0, false, false),
> +      INST(16, HF, HF,  F, 1,  2, false, false),
> +      INST(16, HF, HF,  F, 1,  4, false, false),
> +      INST(16, HF, HF,  F, 1,  8, false, false),
> +      INST(16, HF, HF,  F, 1, 16, false, false),
> +
> +      /* If destination is not packed (or not fp16) we can cross oword
> +       * boundaries
> +       */
> +      INST( 8, HF, HF,  F, 2,  0, true, true),
> +      INST( 8,  F, HF,  F, 1,  0, true, true),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8)
> +      return;
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
> +      brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
> +
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
> +      brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      if (devinfo.is_cherryview || devinfo.gen >= 9)
> +         EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
> +      else
> +         EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_packed_data)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned src0_vstride;
> +      unsigned src1_vstride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             src0_vstride, src1_vstride, expected_result)                 \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_VERTICAL_STRIDE_##src0_vstride,                              \
> +         BRW_VERTICAL_STRIDE_##src1_vstride,                              \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST(F,  F, HF, 4, 4, true),
> +      INST(F,  F, HF, 2, 4, false),
> +      INST(F,  F, HF, 4, 2, false),
> +      INST(F,  F, HF, 0, 4, false),
> +      INST(F,  F, HF, 4, 0, false),
> +      INST(F, HF,  F, 4, 4, true),
> +      INST(F, HF,  F, 4, 2, false),
> +      INST(F, HF,  F, 2, 4, false),
> +      INST(F, HF,  F, 0, 4, false),
> +      INST(F, HF,  F, 4, 0, false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_no_simd16)
> +{
> +   static const struct {
> +      unsigned exec_size;
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(exec_size, dst_type, src0_type, src1_type, expected_result)  \
> +      {                                                                   \
> +         BRW_EXECUTE_##exec_size,                                         \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST( 8,  F,  F, HF, true),
> +      INST( 8,  F, HF,  F, true),
> +      INST( 8,  F,  F, HF, true),
> +      INST(16,  F,  F, HF, false),
> +      INST(16,  F, HF,  F, false),
> +      INST(16,  F,  F, HF, false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_no_acc_read)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      bool read_acc;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type, read_acc, expected_result)   \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         read_acc,                                                        \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST( F,  F, HF, false, true),
> +      INST( F,  F, HF, true,  false),
> +      INST( F, HF,  F, false, true),
> +      INST( F, HF,  F, true,  false),
> +
> +#undef INST
> +   };
> +
> +   if (devinfo.gen < 8 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      brw_ADD(p, retype(g0, inst[i].dst_type),
> +                 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
> +                 retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
> +TEST_P(validation_test, mixed_float_align16_math_packed_format)
> +{
> +   static const struct {
> +      enum brw_reg_type dst_type;
> +      enum brw_reg_type src0_type;
> +      enum brw_reg_type src1_type;
> +      unsigned src0_vstride;
> +      unsigned src1_vstride;
> +      bool expected_result;
> +   } inst[] = {
> +#define INST(dst_type, src0_type, src1_type,                              \
> +             src0_vstride, src1_vstride, expected_result)                 \
> +      {                                                                   \
> +         BRW_REGISTER_TYPE_##dst_type,                                    \
> +         BRW_REGISTER_TYPE_##src0_type,                                   \
> +         BRW_REGISTER_TYPE_##src1_type,                                   \
> +         BRW_VERTICAL_STRIDE_##src0_vstride,                              \
> +         BRW_VERTICAL_STRIDE_##src1_vstride,                              \
> +         expected_result,                                                 \
> +      }
> +
> +      /* We only test with F destination because there is a restriction
> +       * by which F->HF conversions need to be DWord aligned but Align16 also
> +       * requires that destination horizontal stride is 1.
> +       */
> +      INST( F, HF,  F, 4, 0, false),
> +      INST( F, HF, HF, 4, 4, true),
> +      INST( F,  F, HF, 4, 0, false),
> +      INST( F,  F, HF, 2, 4, false),
> +      INST( F,  F, HF, 4, 2, false),
> +      INST( F, HF, HF, 0, 4, false),
> +
> +#undef INST
> +   };
> +
> +   /* Align16 Math for mixed float mode is not supported in gen8 */
> +   if (devinfo.gen < 9 || devinfo.gen >= 11)
> +      return;
> +
> +   brw_set_default_access_mode(p, BRW_ALIGN_16);
> +
> +   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
> +      gen6_math(p, retype(g0, inst[i].dst_type),
> +                   BRW_MATH_FUNCTION_POW,
> +                   retype(g0, inst[i].src0_type),
> +                   retype(g0, inst[i].src1_type));
> +
> +      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
> +      brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
> +
> +      EXPECT_EQ(inst[i].expected_result, validate(p));
> +
> +      clear_instructions(p);
> +   }
> +}
> +
>  TEST_P(validation_test, vector_immediate_destination_alignment)
>  {
>     static const struct {
> -- 
> 2.20.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190417/344c7bdd/attachment-0001.sig>