[Mesa-dev] [PATCH 8/8] i965: Add support for instruction compression on Gen7.

Paul Berry stereotype441 at gmail.com
Tue Sep 4 16:31:45 PDT 2012


On 31 August 2012 11:32, Eric Anholt <eric at anholt.net> wrote:

> From: Kenneth Graunke <kenneth at whitecape.org>
>
> Reduces l4d2 program size from 1195kb to 919kb.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>
---
>  src/mesa/drivers/dri/i965/brw_eu.c         |    2 +
>  src/mesa/drivers/dri/i965/brw_eu.h         |    1 +
>  src/mesa/drivers/dri/i965/brw_eu_compact.c |  208
> +++++++++++++++++++++++++---
>  3 files changed, 192 insertions(+), 19 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.c
> b/src/mesa/drivers/dri/i965/brw_eu.c
> index c2515eb..a59b83f 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu.c
> @@ -206,6 +206,8 @@ brw_init_compile(struct brw_context *brw, struct
> brw_compile *p, void *mem_ctx)
>     p->loop_stack_array_size = 16;
>     p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
>     p->if_depth_in_loop = rzalloc_array(mem_ctx, int,
> p->loop_stack_array_size);
> +
> +   brw_init_compaction_tables(&brw->intel);
>  }
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h
> b/src/mesa/drivers/dri/i965/brw_eu.h
> index 01b8d08..b64611e 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -1108,6 +1108,7 @@ void brw_set_uip_jip(struct brw_compile *p);
>  uint32_t brw_swap_cmod(uint32_t cmod);
>
>  /* brw_eu_compact.c */
> +void brw_init_compaction_tables(struct intel_context *intel);
>  void brw_compact_instructions(struct brw_compile *c);
>  void brw_uncompact_instruction(struct intel_context *intel,
>                                struct brw_instruction *dst,
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index dd661f5..009c961 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -180,6 +180,151 @@ static const uint32_t gen6_src_index_table[32] = {
>     0b001101010000,
>  };
>
> +static const uint32_t gen7_control_index_table[32] = {
> +   0b0000000000000000010,
> +   0b0000100000000000000,
> +   0b0000100000000000001,
> +   0b0000100000000000010,
> +   0b0000100000000000011,
> +   0b0000100000000000100,
> +   0b0000100000000000101,
> +   0b0000100000000000111,
> +   0b0000100000000001000,
> +   0b0000100000000001001,
> +   0b0000100000000001101,
> +   0b0000110000000000000,
> +   0b0000110000000000001,
> +   0b0000110000000000010,
> +   0b0000110000000000011,
> +   0b0000110000000000100,
> +   0b0000110000000000101,
> +   0b0000110000000000111,
> +   0b0000110000000001001,
> +   0b0000110000000001101,
> +   0b0000110000000010000,
> +   0b0000110000100000000,
> +   0b0001000000000000000,
> +   0b0001000000000000010,
> +   0b0001000000000000100,
> +   0b0001000000100000000,
> +   0b0010110000000000000,
> +   0b0010110000000010000,
> +   0b0011000000000000000,
> +   0b0011000000100000000,
> +   0b0101000000000000000,
> +   0b0101000000100000000
> +};
> +
> +static const uint32_t gen7_datatype_table[32] = {
> +   0b001000000000000001,
> +   0b001000000000100000,
> +   0b001000000000100001,
> +   0b001000000001100001,
> +   0b001000000010111101,
> +   0b001000001011111101,
> +   0b001000001110100001,
> +   0b001000001110100101,
> +   0b001000001110111101,
> +   0b001000010000100001,
> +   0b001000110000100000,
> +   0b001000110000100001,
> +   0b001001010010100101,
> +   0b001001110010100100,
> +   0b001001110010100101,
> +   0b001111001110111101,
> +   0b001111011110011101,
> +   0b001111011110111100,
> +   0b001111011110111101,
> +   0b001111111110111100,
> +   0b000000001000001100,
> +   0b001000000000111101,
> +   0b001000000010100101,
> +   0b001000010000100000,
> +   0b001001010010100100,
> +   0b001001110010000100,
> +   0b001010010100001001,
> +   0b001101111110111101,
> +   0b001111111110111101,
> +   0b001011110110101100,
> +   0b001010010100101000,
> +   0b001010110100101000
> +};
> +
> +static const uint32_t gen7_subreg_table[32] = {
> +   0b000000000000000,
> +   0b000000000000001,
> +   0b000000000001000,
> +   0b000000000001111,
> +   0b000000000010000,
> +   0b000000010000000,
> +   0b000000100000000,
> +   0b000000110000000,
> +   0b000001000000000,
> +   0b000001000010000,
> +   0b000010100000000,
> +   0b001000000000000,
> +   0b001000000000001,
> +   0b001000010000001,
> +   0b001000010000010,
> +   0b001000010000011,
> +   0b001000010000100,
> +   0b001000010000111,
> +   0b001000010001000,
> +   0b001000010001110,
> +   0b001000010001111,
> +   0b001000110000000,
> +   0b001000111101000,
> +   0b010000000000000,
> +   0b010000110000000,
> +   0b011000000000000,
> +   0b011110010000111,
> +   0b100000000000000,
> +   0b101000000000000,
> +   0b110000000000000,
> +   0b111000000000000,
> +   0b111000000011100
> +};
> +
> +static const uint32_t gen7_src_index_table[32] = {
> +   0b000000000000,
> +   0b000000000010,
> +   0b000000010000,
> +   0b000000010010,
> +   0b000000011000,
> +   0b000000100000,
> +   0b000000101000,
> +   0b000001001000,
> +   0b000001010000,
> +   0b000001110000,
> +   0b000001111000,
> +   0b001100000000,
> +   0b001100000010,
> +   0b001100001000,
> +   0b001100010000,
> +   0b001100010010,
> +   0b001100100000,
> +   0b001100101000,
> +   0b001100111000,
> +   0b001101000000,
> +   0b001101000010,
> +   0b001101001000,
> +   0b001101010000,
> +   0b001101100000,
> +   0b001101101000,
> +   0b001101110000,
> +   0b001101110001,
> +   0b001101111000,
> +   0b010001101000,
> +   0b010001101001,
> +   0b010001101010,
> +   0b010110001000
> +};
> +
> +static const uint32_t *control_index_table;
> +static const uint32_t *datatype_table;
> +static const uint32_t *subreg_table;
> +static const uint32_t *src_index_table;
> +
>  static bool
>  set_control_index(struct brw_compact_instruction *dst,
>                   struct brw_instruction *src)
> @@ -197,8 +342,8 @@ set_control_index(struct brw_compact_instruction *dst,
>     uncompressed |= src->header.execution_size << 13;
>     uncompressed |= src->header.saturate << 16;
>

For gen7, we need to include FlagSubRegNum and FlagRegNum in "uncompressed".


>
> -   for (int i = 0; i < ARRAY_SIZE(gen6_control_index_table); i++) {
> -      if (gen6_control_index_table[i] == uncompressed) {
> +   for (int i = 0; i < 32; i++) {
> +      if (control_index_table[i] == uncompressed) {
>          dst->dw0.control_index = i;
>          return true;
>        }
> @@ -216,8 +361,8 @@ set_datatype_index(struct brw_compact_instruction *dst,
>     uncompressed |= src->bits1.ud & 0x7fff;
>     uncompressed |= (src->bits1.ud >> 29) << 15;
>
> -   for (int i = 0; i < ARRAY_SIZE(gen6_datatype_table); i++) {
> -      if (gen6_datatype_table[i] == uncompressed) {
> +   for (int i = 0; i < 32; i++) {
> +      if (datatype_table[i] == uncompressed) {
>          dst->dw0.data_type_index = i;
>          return true;
>        }
> @@ -236,8 +381,8 @@ set_subreg_index(struct brw_compact_instruction *dst,
>     uncompressed |= src->bits2.da1.src0_subreg_nr << 5;
>     uncompressed |= src->bits3.da1.src1_subreg_nr << 10;
>
> -   for (int i = 0; i < ARRAY_SIZE(gen6_subreg_table); i++) {
> -      if (gen6_subreg_table[i] == uncompressed) {
> +   for (int i = 0; i < 32; i++) {
> +      if (subreg_table[i] == uncompressed) {
>          dst->dw0.sub_reg_index = i;
>          return true;
>        }
> @@ -250,8 +395,8 @@ static bool
>  get_src_index(uint32_t uncompressed,
>               uint32_t *compressed)
>  {
> -   for (int i = 0; i < ARRAY_SIZE(gen6_src_index_table); i++) {
> -      if (gen6_src_index_table[i] == uncompressed) {
> +   for (int i = 0; i < 32; i++) {
> +      if (src_index_table[i] == uncompressed) {
>          *compressed = i;
>          return true;
>        }
> @@ -373,7 +518,7 @@ static void
>  set_uncompressed_control(struct brw_instruction *dst,
>                          struct brw_compact_instruction *src)
>  {
> -   uint32_t compressed = gen6_control_index_table[src->dw0.control_index];
> +   uint32_t compressed = control_index_table[src->dw0.control_index];
>

As with set_control_index, this function needs to be modified for Gen7 to
reflect the fact that the data in the table includes values for
FlagSubRegNum and FlagRegNum.


>
>     dst->header.access_mode = compressed >> 0;
>     dst->header.mask_control = compressed >> 1;
> @@ -391,7 +536,7 @@ static void
>  set_uncompressed_datatype(struct brw_instruction *dst,
>                           struct brw_compact_instruction *src)
>  {
> -   uint32_t uncompressed = gen6_datatype_table[src->dw0.data_type_index];
> +   uint32_t uncompressed = datatype_table[src->dw0.data_type_index];
>
>     dst->bits1.ud &= ~(0x7 << 29);
>     dst->bits1.ud |= ((uncompressed >> 15) & 0x7) << 29;
> @@ -403,7 +548,7 @@ static void
>  set_uncompressed_subreg(struct brw_instruction *dst,
>                         struct brw_compact_instruction *src)
>  {
> -   uint32_t uncompressed = gen6_subreg_table[src->dw0.sub_reg_index];
> +   uint32_t uncompressed = subreg_table[src->dw0.sub_reg_index];
>
>     dst->bits1.da1.dest_subreg_nr = (uncompressed >> 0)  & 0x1f;
>     dst->bits2.da1.src0_subreg_nr = (uncompressed >> 5)  & 0x1f;
> @@ -415,7 +560,7 @@ set_uncompressed_src0(struct brw_instruction *dst,
>                       struct brw_compact_instruction *src)
>  {
>     uint32_t compressed = src->dw0.src0_index | src->dw1.src0_index << 2;
> -   uint32_t uncompressed = gen6_src_index_table[compressed];
> +   uint32_t uncompressed = src_index_table[compressed];
>
>     dst->bits2.da1.src0_abs = uncompressed >> 0;
>     dst->bits2.da1.src0_negate = uncompressed >> 1;
> @@ -429,7 +574,7 @@ static void
>  set_uncompressed_src1(struct brw_instruction *dst,
>                       struct brw_compact_instruction *src)
>  {
> -   uint32_t uncompressed = gen6_src_index_table[src->dw1.src1_index];
> +   uint32_t uncompressed = src_index_table[src->dw1.src1_index];
>
>     dst->bits3.da1.src1_abs = uncompressed >> 0;
>     dst->bits3.da1.src1_negate = uncompressed >> 1;
> @@ -515,6 +660,36 @@ update_uip_jip(struct brw_instruction *insn, int
> this_old_ip,
>  }
>
>  void
> +brw_init_compaction_tables(struct intel_context *intel)
> +{
> +   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) -
> 1] != 0);
> +   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
> +   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
> +   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] !=
> 0);
> +   assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) -
> 1] != 0);
> +   assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
> +   assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
> +   assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] !=
> 0);
> +
> +   switch (intel->gen) {
> +   case 7:
> +      control_index_table = gen7_control_index_table;
> +      datatype_table = gen7_datatype_table;
> +      subreg_table = gen7_subreg_table;
> +      src_index_table = gen7_src_index_table;
> +      break;
> +   case 6:
> +      control_index_table = gen6_control_index_table;
> +      datatype_table = gen6_datatype_table;
> +      subreg_table = gen6_subreg_table;
> +      src_index_table = gen6_src_index_table;
> +      break;
> +   default:
> +      return;
> +   }
> +}
> +
> +void
>  brw_compact_instructions(struct brw_compile *p)
>  {
>     struct brw_context *brw = p->brw;
> @@ -524,12 +699,7 @@ brw_compact_instructions(struct brw_compile *p)
>     int compressed_counts[p->next_insn_offset / 8];
>     int old_ip[p->next_insn_offset / 8];
>
> -   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) -
> 1] != 0);
> -   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
> -   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
> -   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] !=
> 0);
> -
> -   if (intel->gen != 6)
> +   if (intel->gen < 6)
>        return;
>
>     int src_offset;
> --
> 1.7.10.4
>


Other changes need to be made for Gen7:

1. In brw_try_compact_instruction(), for Gen7, temp.dw0.flag_reg_nr must be
set to zero.

2. In brw_uncompact_instruction(), for Gen7, we need to skip the statement
"dst->bits2.da1.flag_reg_nr = src->dw0.flag_reg_nr;" to avoid overwriting
the value set by set_uncompressed_control().


>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

My comments on patches 5-7 are largely cosmetic (and a few minor
performance concerns), and patches 1 and 4 look good, so regardless of
whether you decide to go ahead with my suggested changes, consider them:

Reviewed-by: Paul Berry <stereotype441 at gmail.com>

The comments on this patch (8) I believe are genuine bugs in Gen7.

I don't really consider myself qualified to comment on patches 2-3 because
I'm not too familiar with the build system, so consider them

Acked-by: Paul Berry <stereotype441 at gmail.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20120904/512be3ce/attachment-0001.html>


More information about the mesa-dev mailing list