[Mesa-dev] [PATCH 9/9] i965: Check accumulator restrictions.

Kenneth Graunke kenneth at whitecape.org
Tue Nov 3 23:53:47 PST 2015


On Wednesday, October 21, 2015 03:58:17 PM Matt Turner wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_eu_validate.c | 244 ++++++++++++++++++++++++++++
>  1 file changed, 244 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_validate.c b/src/mesa/drivers/dri/i965/brw_eu_validate.c
> index eb57962..3d16f90 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_validate.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_validate.c
> @@ -54,6 +54,16 @@ cat(struct string *dest, const struct string src)
>        }                              \
>     } while(0)
>  
> +#define CHECK(func) \
> +   do { \
> +      struct string __msg = func; \
> +      if (__msg.str) { \
> +         cat(&error_msg, __msg); \
> +         free(__msg.str); \
> +         valid = false; \
> +      } \
> +   } while (0)
> +
>  static bool
>  src0_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
>  {
> @@ -68,6 +78,42 @@ src1_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
>            brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
>  }
>  
> +static bool
> +dst_is_accumulator(const struct brw_device_info *devinfo, const brw_inst *inst)
> +{
> +   return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
> +          brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
> +}
> +
> +static bool
> +src0_is_accumulator(const struct brw_device_info *devinfo, const brw_inst *inst)
> +{
> +   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
> +          brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
> +}
> +
> +static bool
> +src1_is_accumulator(const struct brw_device_info *devinfo, const brw_inst *inst)
> +{
> +   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
> +          brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_ACCUMULATOR;
> +}
> +
> +static bool
> +is_integer(enum brw_reg_type type)
> +{
> +   return type == BRW_REGISTER_TYPE_UD ||
> +          type == BRW_REGISTER_TYPE_D ||
> +          type == BRW_REGISTER_TYPE_UW ||
> +          type == BRW_REGISTER_TYPE_W ||
> +          type == BRW_REGISTER_TYPE_UB ||
> +          type == BRW_REGISTER_TYPE_B ||
> +          type == BRW_REGISTER_TYPE_V ||
> +          type == BRW_REGISTER_TYPE_UV ||
> +          type == BRW_REGISTER_TYPE_UQ ||
> +          type == BRW_REGISTER_TYPE_Q;
> +}
> +
>  enum gen {
>     GEN4  = (1 << 0),
>     GEN45 = (1 << 1),
> @@ -83,40 +129,66 @@ enum gen {
>  #define GEN_GE(gen) (~((gen) - 1) | gen)
>  #define GEN_LE(gen) (((gen) - 1) | gen)
>  
> +enum acc {
> +   ACC_NO_RESTRICTIONS = 0,
> +   ACC_GEN_DEPENDENT = (1 << 0),
> +   ACC_NO_EXPLICIT_SOURCE = (1 << 1),
> +   ACC_NO_EXPLICIT_DESTINATION = (1 << 2),
> +   ACC_NO_IMPLICIT_DESTINATION = (1 << 3),
> +   ACC_NO_DESTINATION = ACC_NO_EXPLICIT_DESTINATION |
> +                        ACC_NO_IMPLICIT_DESTINATION,
> +   ACC_NO_ACCESS = ACC_NO_EXPLICIT_SOURCE |
> +                   ACC_NO_DESTINATION,
> +   ACC_NO_SOURCE_MODIFIER = (1 << 4),
> +   ACC_NO_INTEGER_SOURCE = (1 << 5),
> +   ACC_IMPLICIT_WRITE_REQUIRED = (1 << 6),
> +   ACC_NOT_BOTH_SOURCE_AND_DESTINATION = (1 << 7),
> +};
> +
>  struct inst_info {
>     enum gen gen;
> +   enum acc acc;
>  };
>  
>  static const struct inst_info inst_info[128] = {
>     [BRW_OPCODE_ILLEGAL] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_MOV] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NOT_BOTH_SOURCE_AND_DESTINATION,
>     },
>     [BRW_OPCODE_SEL] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_MOVI] = {
>        .gen = GEN_GE(GEN45),
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     [BRW_OPCODE_NOT] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_SOURCE_MODIFIER,
>     },
>     [BRW_OPCODE_AND] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_SOURCE_MODIFIER,
>     },
>     [BRW_OPCODE_OR] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_SOURCE_MODIFIER,
>     },
>     [BRW_OPCODE_XOR] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_SOURCE_MODIFIER,
>     },
>     [BRW_OPCODE_SHR] = {
>        .gen = GEN_ALL,
>     },
>     [BRW_OPCODE_SHL] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_DESTINATION,
>     },
>     /* BRW_OPCODE_DIM / BRW_OPCODE_SMOV */
>     /* Reserved - 11 */
> @@ -126,63 +198,81 @@ static const struct inst_info inst_info[128] = {
>     /* Reserved - 13-15 */
>     [BRW_OPCODE_CMP] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_CMPN] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_CSEL] = {
>        .gen = GEN_GE(GEN8),
>     },
>     [BRW_OPCODE_F32TO16] = {
>        .gen = GEN7 | GEN75,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_F16TO32] = {
>        .gen = GEN7 | GEN75,
> +      .acc = ACC_NO_ACCESS,
>     },
>     /* Reserved - 21-22 */
>     [BRW_OPCODE_BFREV] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_BFE] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_IMPLICIT_DESTINATION,

Internal BFE docs contradict this, they say
"No accumulator access, implicit or explicit."
just like the other ones...so...ACC_NO_ACCESS.

>     },
>     [BRW_OPCODE_BFI1] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_BFI2] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_IMPLICIT_DESTINATION,
>     },
>     /* Reserved - 27-31 */
>     [BRW_OPCODE_JMPI] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     /* BRW_OPCODE_BRD */
>     [BRW_OPCODE_IF] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_IFF] = { /* also BRW_OPCODE_BRC */
>        .gen = GEN_LE(GEN5),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_ELSE] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_ENDIF] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_DO] = { /* also BRW_OPCODE_CASE */
>        .gen = GEN_LE(GEN5),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_WHILE] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_BREAK] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_CONTINUE] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_HALT] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     /* BRW_OPCODE_CALLA */
>     /* BRW_OPCODE_MSAVE / BRW_OPCODE_CALL */
> @@ -191,22 +281,28 @@ static const struct inst_info inst_info[128] = {
>     /* BRW_OPCODE_POP */
>     [BRW_OPCODE_WAIT] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_SEND] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_SENDC] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_SENDS] = {
>        .gen = GEN_GE(GEN9),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_SENDSC] = {
>        .gen = GEN_GE(GEN9),
> +      .acc = ACC_NO_ACCESS,
>     },
>     /* Reserved 53-55 */
>     [BRW_OPCODE_MATH] = {
>        .gen = GEN_GE(GEN6),
> +      .acc = ACC_NO_ACCESS,
>     },
>     /* Reserved 57-63 */
>     [BRW_OPCODE_ADD] = {
> @@ -214,6 +310,7 @@ static const struct inst_info inst_info[128] = {
>     },
>     [BRW_OPCODE_MUL] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_AVG] = {
>        .gen = GEN_ALL,
> @@ -223,65 +320,89 @@ static const struct inst_info inst_info[128] = {
>     },
>     [BRW_OPCODE_RNDU] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_RNDD] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_RNDE] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_RNDZ] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_MAC] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     [BRW_OPCODE_MACH] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE |
> +             ACC_NO_EXPLICIT_DESTINATION |
> +             ACC_IMPLICIT_WRITE_REQUIRED,
>     },
>     [BRW_OPCODE_LZD] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_GEN_DEPENDENT,
>     },
>     [BRW_OPCODE_FBH] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_FBL] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_CBIT] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_ACCESS,
>     },
>     [BRW_OPCODE_ADDC] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_EXPLICIT_DESTINATION |
> +             ACC_IMPLICIT_WRITE_REQUIRED,
>     },
>     [BRW_OPCODE_SUBB] = {
>        .gen = GEN_GE(GEN7),
> +      .acc = ACC_NO_EXPLICIT_DESTINATION |
> +             ACC_IMPLICIT_WRITE_REQUIRED,
>     },
>     [BRW_OPCODE_SAD2] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     [BRW_OPCODE_SADA2] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     /* Reserved 82-83 */
>     [BRW_OPCODE_DP4] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     [BRW_OPCODE_DPH] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     [BRW_OPCODE_DP3] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     [BRW_OPCODE_DP2] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     /* Reserved 88 */
>     [BRW_OPCODE_LINE] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_EXPLICIT_SOURCE,
>     },
>     [BRW_OPCODE_PLN] = {
>        .gen = GEN_GE(GEN45),
> +      .acc = ACC_NO_EXPLICIT_SOURCE,

It sounds like you might be able to do PLN on accumulators on Skylake,
but if exec_size == 16 then src1 can't be an accumulator.

Seems kind of crazy, I'm not sure why you'd want to do that, or if it's
/really/ allowed...

>     },
>     [BRW_OPCODE_MAD] = {
>        .gen = GEN_GE(GEN6),
> @@ -293,9 +414,130 @@ static const struct inst_info inst_info[128] = {
>     /* BRW_OPCODE_NENOP */
>     [BRW_OPCODE_NOP] = {
>        .gen = GEN_ALL,
> +      .acc = ACC_NO_ACCESS,
>     },
>  };
>  
> +static struct string
> +accumulator_restrictions(const struct brw_device_info *devinfo,
> +                         const brw_inst *inst)
> +{
> +   enum opcode opcode = brw_inst_opcode(devinfo, inst);
> +   enum acc acc = inst_info[opcode].acc;
> +
> +   /* Handle Gen-specific accumulator restrictions */
> +   if ((acc & ACC_GEN_DEPENDENT) != 0) {
> +      assert(acc == ACC_GEN_DEPENDENT);
> +
> +      switch (opcode) {
> +      case BRW_OPCODE_SEL:

Would it make sense to put in documentation citations for these?

The G45 PRM says that it doesn't implicitly update accumulators, but
doesn't seem to prohibit an explicit destination.  There's nothing
on the SEL page prohibiting sources.  (Is that a more general
prohibition?)

Internal docs clarify that SNB prohibits both implicit and explicit
destination, so I think I agree with this.

> +         if (devinfo->gen < 7)
> +            acc = ACC_NO_ACCESS;
> +         else
> +            acc = ACC_NO_RESTRICTIONS;
> +         break;
> +      case BRW_OPCODE_CMP:
> +      case BRW_OPCODE_CMPN:
> +         acc = ACC_NO_RESTRICTIONS;
> +         if (devinfo->gen < 8)
> +            acc |= ACC_NO_DESTINATION;

Huh, I just see "Accumulator cannot be destination, implicit or
explicit. The destination must be a general register or the null
register."  Nothing to indicate this was lifted on Gen8+.

I think ACC_NO_DESTINATION is universal.

> +         if (devinfo->gen < 7)
> +            acc |= ACC_NO_SOURCE_MODIFIER |
> +                   ACC_NO_INTEGER_SOURCE;

Braces, please!  Also how about:

/* SNB PRM > CMP instruction:
 * "[DevSNB Errata] The cmp instruction can not use accumulator as
 *  source if source modifier is used, all other cases are ok."
 *
 *  [DevSNB Errata] Integer compare can not use accumulator source."
 */

> +         break;
> +      case BRW_OPCODE_MUL:
> +         if (devinfo->gen < 8)
> +            acc = ACC_NO_SOURCE_MODIFIER;
> +         else
> +            acc = ACC_NO_RESTRICTIONS;

This doesn't look right to me.  I think it should be:

      acc = ACC_NO_RESTRICTIONS;

      /* SNB PRM > MUL instruction:
       * "Source operands cannot be an accumulator register."
       *
       * IVB PRM > MUL instruction:
       * "Source operands cannot be accumulators."
       */
      if (devinfo->gen <= 7) {
         acc |= ACC_NO_EXPLICIT_SOURCE;
      }

      /* BDW PRM > MUL instruction:
       * "Integer source operands cannot be accumulators."
       * 
       * Although this doesn't appear in the Haswell PRM, we assume it
       * applies anyway; internal documentation doesn't restrict the
       * above comment to any particular platform.
       */
      acc |= ACC_NO_INTEGER_SOURCE;

      /* BDW PRM > MUL instruction:
       * "When multiplying DW x DW, the dst cannot be accumulator."
       *
       * Internal documentation indicates this is BDW only, not Gen8+.
       */
      if (devinfo->gen == 8 &&
          (is_integer_dword(brw_inst_src0_reg_type(devinfo, inst)) ||
           is_integer_dword(brw_inst_src1_reg_type(devinfo, inst)))) {
         acc |= ACC_NO_DESTINATION;
      }

I'm not sure where you got "no source modifiers" from.

> +         break;
> +      case BRW_OPCODE_RNDU:
> +      case BRW_OPCODE_RNDD:
> +      case BRW_OPCODE_RNDE:
> +      case BRW_OPCODE_RNDZ:
> +         if (devinfo->gen < 8)
> +            acc = ACC_NO_ACCESS;
> +         else
> +            acc = ACC_NO_RESTRICTIONS;
> +         break;

Looks right, at least for Gen6+.

> +      case BRW_OPCODE_LZD:
> +         if (devinfo->gen < 8)
> +            acc = ACC_NO_EXPLICIT_DESTINATION;
> +         else
> +            acc = ACC_NO_RESTRICTIONS;
> +         break;

Looks right, at least for Gen6+.

> +      default:
> +         unreachable("not reached");
> +      }
> +   }
> +   assert((acc & ACC_GEN_DEPENDENT) == 0);
> +
> +   /* Gens 4 and 5 don't have an AccWrCtrl. Instead of using ACC_GEN_DEPENDENT
> +    * in a ton of places, simply ignore the related restrictions.
> +    */
> +   if (devinfo->gen < 6)
> +      acc &= ~(ACC_NO_IMPLICIT_DESTINATION | ACC_IMPLICIT_WRITE_REQUIRED);
> +
> +   struct string msg = {
> +      .str = NULL,
> +      .len = 0,
> +   };
> +
> +   if ((acc & ACC_NO_IMPLICIT_DESTINATION) != 0 &&
> +       brw_inst_acc_wr_control(devinfo, inst) == 1) {
> +      CAT(msg, error("AccWrEN (\"implicit accumulator update\") not allowed"));
> +   }
> +
> +   /* It's not possible to violate any of the other restrictions with a 3-src
> +    * instruction.
> +    */
> +   if (is_3src(opcode))
> +      return msg;
> +
> +   if ((acc & ACC_NO_EXPLICIT_SOURCE) != 0 &&
> +       src0_is_accumulator(devinfo, inst) &&
> +       src1_is_accumulator(devinfo, inst)) {
> +      CAT(msg, error("Explicit accumulator source operand not allowed"));
> +   }
> +
> +   if ((acc & ACC_NO_EXPLICIT_DESTINATION) != 0 &&
> +       dst_is_accumulator(devinfo, inst)) {
> +      CAT(msg, error("Explicit accumulator destination not allowed"));
> +   }
> +
> +   if ((acc & ACC_NO_SOURCE_MODIFIER) != 0 &&
> +       ((src0_is_accumulator(devinfo, inst) &&
> +         (brw_inst_src0_abs(devinfo, inst) == 1 ||
> +          brw_inst_src0_negate(devinfo, inst) == 1)) ||
> +        (src1_is_accumulator(devinfo, inst) &&
> +         (brw_inst_src1_abs(devinfo, inst) == 1 ||
> +          brw_inst_src1_negate(devinfo, inst) == 1)))) {
> +      CAT(msg, error("Source modifiers on accumulator source not allowed"));
> +   }
> +
> +   if ((acc & ACC_NO_INTEGER_SOURCE) != 0 &&
> +       ((src0_is_accumulator(devinfo, inst) &&
> +         is_integer(brw_inst_src0_reg_type(devinfo, inst))) ||
> +        (src1_is_accumulator(devinfo, inst) &&
> +         is_integer(brw_inst_src1_reg_type(devinfo, inst))))) {
> +      CAT(msg, "Integer accumulator source not allowed");
> +   }
> +
> +   if ((acc & ACC_IMPLICIT_WRITE_REQUIRED) != 0 &&
> +       brw_inst_acc_wr_control(devinfo, inst) == 0) {
> +      CAT(msg, error("AccWrEn (\"implicit accumulator update\") required"));
> +   }
> +
> +   if ((acc & ACC_NOT_BOTH_SOURCE_AND_DESTINATION) != 0 &&
> +       (dst_is_accumulator(devinfo, inst) +
> +       (src0_is_accumulator(devinfo, inst) ||
> +        src1_is_accumulator(devinfo, inst)) == 2)) {
> +      CAT(msg, error("Both source and destination cannot be accumulator"));
> +   }
> +
> +   return msg;
> +}

One idea: you could use ERROR_IF here instead of if...CAT.  That would
require removing the "valid = false" from ERROR_IF, as we don't have
that here.  Instead, you could have the main function simply set

   valid = (error_msg.str == NULL);
              ...or...
   valid = (error_msg.len == 0);

In other words, if we haven't accumulated any error messages, the
program is valid.

CHECK would then become

#define CHECK(func) \
   do { \
      struct string __msg = func; \
      if (__msg.str) { \
         ralloc_asprintf_rewrite_tail(&error_msg.str, &error_msg.len, "%s", __msg.str); \
         ralloc_free(__msg.str); \
      } \
   } while (0)

> +
>  static unsigned
>  num_sources_from_inst(const struct brw_device_info *devinfo,
>                        const brw_inst *inst)
> @@ -397,6 +639,8 @@ brw_validate_instructions(const struct brw_codegen *p, int start_offset,
>        ERROR_IF(is_unsupported_inst(devinfo, inst),
>                 "Instruction not supported on this Gen");
>  
> +      CHECK(accumulator_restrictions(devinfo, inst));
> +
>        if (error_msg.str && annotation) {
>           annotation_insert_error(annotation, src_offset, error_msg.str);
>        }
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151103/2b532dcf/attachment-0001.sig>


More information about the mesa-dev mailing list