[Mesa-dev] [PATCH 6/7] i965: Add support for compacting 3-src instructions on Gen8.

Kenneth Graunke kenneth at whitecape.org
Mon Aug 18 15:18:44 PDT 2014


On Monday, August 18, 2014 11:19:52 AM Matt Turner wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 189 +++++++++++++++++++++++++++++
>  1 file changed, 189 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index 07faff4..727fef5 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -611,6 +611,97 @@ set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
>     return true;
>  }
>  
> +static bool
> +set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
> +{
> +   assert(brw->gen >= 8);
> +
> +   uint32_t uncompacted =                  /* 24b/BDW; 26b/CHV */
> +      (brw_inst_bits(src, 34, 32) << 21) | /*  3b */
> +      (brw_inst_bits(src, 28,  8));        /* 21b */
> +
> +   if (brw->is_cherryview)
> +      uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
> +
> +   for (int i = 0; i < 4; i++) {

How about this instead?

   for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {

Either way is fine.

> +      if (gen8_3src_control_index_table[i] == uncompacted) {
> +         brw_compact_inst_set_3src_control_index(dst, i);
> +	 return true;
> +      }
> +   }
> +
> +   return false;
> +}
> +
> +static bool
> +set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
> +{
> +   assert(brw->gen >= 8);
> +
> +   uint64_t uncompacted =                    /* 46b/BDW; 49b/CHV */
> +      (brw_inst_bits(src,  83,  83) << 43) | /*  1b */
> +      (brw_inst_bits(src, 114, 107) << 35) | /*  8b */
> +      (brw_inst_bits(src,  93,  86) << 27) | /*  8b */
> +      (brw_inst_bits(src,  72,  65) << 19) | /*  8b */
> +      (brw_inst_bits(src,  55,  37));        /* 19b */
> +
> +   if (brw->is_cherryview) {
> +      uncompacted |=
> +         (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
> +         (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
> +         (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
> +   } else {
> +      uncompacted |=
> +         (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
> +         (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
> +   }
> +
> +   for (int i = 0; i < 4; i++) {

Perhaps:

   for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {

> +      if (gen8_3src_source_index_table[i] == uncompacted) {
> +         brw_compact_inst_set_3src_source_index(dst, i);
> +	 return true;
> +      }
> +   }
> +
> +   return false;
> +}
> +
> +static bool
> +brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst,
> +                                 brw_inst *src)
> +{
> +   assert(brw->gen >= 8);
> +
> +#define compact(field) \
> +   brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src))
> +
> +   compact(opcode);
> +
> +   if (!set_3src_control_index(brw, dst, src))
> +      return false;
> +
> +   if (!set_3src_source_index(brw, dst, src))
> +      return false;
> +
> +   compact(dst_reg_nr);
> +   compact(src0_rep_ctrl);
> +   brw_compact_inst_set_3src_cmpt_control(dst, true);
> +   compact(debug_control);
> +   compact(saturate);
> +   compact(src1_rep_ctrl);
> +   compact(src2_rep_ctrl);
> +   compact(src0_reg_nr);
> +   compact(src1_reg_nr);
> +   compact(src2_reg_nr);
> +   compact(src0_subreg_nr);
> +   compact(src1_subreg_nr);
> +   compact(src2_subreg_nr);
> +
> +#undef compact
> +
> +   return true;
> +}
> +
>  /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
>   * that's replicated through the high 20 bits.
>   *
> @@ -627,6 +718,17 @@ is_compactable_immediate(unsigned imm)
>     return imm == 0 || imm == 0xfffff000;
>  }
>  
> +/* Returns whether an opcode takes three sources. */
> +static bool
> +is_3src(uint32_t op)
> +{
> +   return op == BRW_OPCODE_CSEL ||
> +          op == BRW_OPCODE_BFE ||
> +          op == BRW_OPCODE_BFI2 ||
> +          op == BRW_OPCODE_MAD ||
> +          op == BRW_OPCODE_LRP;

How about this instead:

   return opcode_descs[op].nsrc == 3;

The only trouble is you'll need to add CSEL to opcode_descs in brw_disasm.c, which honestly we should do anyway.

I would like this changed (more than a take-it-or-leave-it suggestion).

> +}
> +
>  /**
>   * Tries to compact instruction src into dst.
>   *
> @@ -651,6 +753,16 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst,
>        return false;
>     }
>  
> +   if (brw->gen >= 8 && is_3src(brw_inst_opcode(brw, src))) {
> +      memset(&temp, 0, sizeof(temp));
> +      if (brw_try_compact_3src_instruction(brw, &temp, src)) {
> +         *dst = temp;
> +         return true;
> +      } else {
> +         return false;
> +      }
> +   }
> +
>     bool is_immediate =
>        brw_inst_src0_reg_file(brw, src) == BRW_IMMEDIATE_VALUE ||
>        brw_inst_src1_reg_file(brw, src) == BRW_IMMEDIATE_VALUE;
> @@ -767,12 +879,89 @@ set_uncompacted_src1(struct brw_context *brw, brw_inst *dst,
>     }
>  }
>  
> +static void
> +set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst,
> +                                   brw_compact_inst *src)
> +{
> +   assert(brw->gen >= 8);
> +
> +   uint32_t compacted = brw_compact_inst_3src_control_index(src);
> +   uint32_t uncompacted = gen8_3src_control_index_table[compacted];
> +
> +   brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
> +   brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
> +
> +   if (brw->is_cherryview)
> +      brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24));

How about doing:

      brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);

> +}
> +
> +static void
> +set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst,
> +                                  brw_compact_inst *src)
> +{
> +   assert(brw->gen >= 8);
> +
> +   uint32_t compacted = brw_compact_inst_3src_source_index(src);
> +   uint64_t uncompacted = gen8_3src_source_index_table[compacted];
> +
> +   brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
> +   brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
> +   brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
> +   brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
> +   brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
> +
> +   if (brw->is_cherryview) {
> +      brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47));

And:

      brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);

With those changes,
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

> +      brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
> +      brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
> +   } else {
> +      brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
> +      brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
> +   }
> +}
> +
> +static void
> +brw_uncompact_3src_instruction(struct brw_context *brw, brw_inst *dst,
> +                               brw_compact_inst *src)
> +{
> +   assert(brw->gen >= 8);
> +
> +#define uncompact(field) \
> +   brw_inst_set_3src_##field(brw, dst, brw_compact_inst_3src_##field(src))
> +
> +   uncompact(opcode);
> +
> +   set_uncompacted_3src_control_index(brw, dst, src);
> +   set_uncompacted_3src_source_index(brw, dst, src);
> +
> +   uncompact(dst_reg_nr);
> +   uncompact(src0_rep_ctrl);
> +   brw_inst_set_3src_cmpt_control(brw, dst, false);
> +   uncompact(debug_control);
> +   uncompact(saturate);
> +   uncompact(src1_rep_ctrl);
> +   uncompact(src2_rep_ctrl);
> +   uncompact(src0_reg_nr);
> +   uncompact(src1_reg_nr);
> +   uncompact(src2_reg_nr);
> +   uncompact(src0_subreg_nr);
> +   uncompact(src1_subreg_nr);
> +   uncompact(src2_subreg_nr);
> +
> +#undef uncompact
> +}
> +
>  void
>  brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst,
>                            brw_compact_inst *src)
>  {
>     memset(dst, 0, sizeof(*dst));
>  
> +   if (brw->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
> +      brw_uncompact_3src_instruction(brw, dst, src);
> +      return;
> +   }
> +
>     brw_inst_set_opcode(brw, dst, brw_compact_inst_opcode(src));
>     brw_inst_set_debug_control(brw, dst, brw_compact_inst_debug_control(src));
>  
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140818/cfe385af/attachment-0001.sig>


More information about the mesa-dev mailing list