On 31 August 2012 11:32, Eric Anholt <span dir="ltr"><<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>></span> wrote:<br><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">






From: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>><br>
<br>
Reduces l4d2 program size from 1195kb to 919kb.<br>
<br>
Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>> <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">





---<br>
 src/mesa/drivers/dri/i965/brw_eu.c         |    2 +<br>
 src/mesa/drivers/dri/i965/brw_eu.h         |    1 +<br>
 src/mesa/drivers/dri/i965/brw_eu_compact.c |  208 +++++++++++++++++++++++++---<br>
 3 files changed, 192 insertions(+), 19 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c<br>
index c2515eb..a59b83f 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu.c<br>
@@ -206,6 +206,8 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)<br>
    p->loop_stack_array_size = 16;<br>
    p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);<br>
    p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);<br>
+<br>
+   brw_init_compaction_tables(&brw->intel);<br>
 }<br>
<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h<br>
index 01b8d08..b64611e 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu.h<br>
@@ -1108,6 +1108,7 @@ void brw_set_uip_jip(struct brw_compile *p);<br>
 uint32_t brw_swap_cmod(uint32_t cmod);<br>
<br>
 /* brw_eu_compact.c */<br>
+void brw_init_compaction_tables(struct intel_context *intel);<br>
 void brw_compact_instructions(struct brw_compile *c);<br>
 void brw_uncompact_instruction(struct intel_context *intel,<br>
                               struct brw_instruction *dst,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c<br>
index dd661f5..009c961 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c<br>
@@ -180,6 +180,151 @@ static const uint32_t gen6_src_index_table[32] = {<br>
    0b001101010000,<br>
 };<br>
<br>
+static const uint32_t gen7_control_index_table[32] = {<br>
+   0b0000000000000000010,<br>
+   0b0000100000000000000,<br>
+   0b0000100000000000001,<br>
+   0b0000100000000000010,<br>
+   0b0000100000000000011,<br>
+   0b0000100000000000100,<br>
+   0b0000100000000000101,<br>
+   0b0000100000000000111,<br>
+   0b0000100000000001000,<br>
+   0b0000100000000001001,<br>
+   0b0000100000000001101,<br>
+   0b0000110000000000000,<br>
+   0b0000110000000000001,<br>
+   0b0000110000000000010,<br>
+   0b0000110000000000011,<br>
+   0b0000110000000000100,<br>
+   0b0000110000000000101,<br>
+   0b0000110000000000111,<br>
+   0b0000110000000001001,<br>
+   0b0000110000000001101,<br>
+   0b0000110000000010000,<br>
+   0b0000110000100000000,<br>
+   0b0001000000000000000,<br>
+   0b0001000000000000010,<br>
+   0b0001000000000000100,<br>
+   0b0001000000100000000,<br>
+   0b0010110000000000000,<br>
+   0b0010110000000010000,<br>
+   0b0011000000000000000,<br>
+   0b0011000000100000000,<br>
+   0b0101000000000000000,<br>
+   0b0101000000100000000<br>
+};<br>
+<br>
+static const uint32_t gen7_datatype_table[32] = {<br>
+   0b001000000000000001,<br>
+   0b001000000000100000,<br>
+   0b001000000000100001,<br>
+   0b001000000001100001,<br>
+   0b001000000010111101,<br>
+   0b001000001011111101,<br>
+   0b001000001110100001,<br>
+   0b001000001110100101,<br>
+   0b001000001110111101,<br>
+   0b001000010000100001,<br>
+   0b001000110000100000,<br>
+   0b001000110000100001,<br>
+   0b001001010010100101,<br>
+   0b001001110010100100,<br>
+   0b001001110010100101,<br>
+   0b001111001110111101,<br>
+   0b001111011110011101,<br>
+   0b001111011110111100,<br>
+   0b001111011110111101,<br>
+   0b001111111110111100,<br>
+   0b000000001000001100,<br>
+   0b001000000000111101,<br>
+   0b001000000010100101,<br>
+   0b001000010000100000,<br>
+   0b001001010010100100,<br>
+   0b001001110010000100,<br>
+   0b001010010100001001,<br>
+   0b001101111110111101,<br>
+   0b001111111110111101,<br>
+   0b001011110110101100,<br>
+   0b001010010100101000,<br>
+   0b001010110100101000<br>
+};<br>
+<br>
+static const uint32_t gen7_subreg_table[32] = {<br>
+   0b000000000000000,<br>
+   0b000000000000001,<br>
+   0b000000000001000,<br>
+   0b000000000001111,<br>
+   0b000000000010000,<br>
+   0b000000010000000,<br>
+   0b000000100000000,<br>
+   0b000000110000000,<br>
+   0b000001000000000,<br>
+   0b000001000010000,<br>
+   0b000010100000000,<br>
+   0b001000000000000,<br>
+   0b001000000000001,<br>
+   0b001000010000001,<br>
+   0b001000010000010,<br>
+   0b001000010000011,<br>
+   0b001000010000100,<br>
+   0b001000010000111,<br>
+   0b001000010001000,<br>
+   0b001000010001110,<br>
+   0b001000010001111,<br>
+   0b001000110000000,<br>
+   0b001000111101000,<br>
+   0b010000000000000,<br>
+   0b010000110000000,<br>
+   0b011000000000000,<br>
+   0b011110010000111,<br>
+   0b100000000000000,<br>
+   0b101000000000000,<br>
+   0b110000000000000,<br>
+   0b111000000000000,<br>
+   0b111000000011100<br>
+};<br>
+<br>
+static const uint32_t gen7_src_index_table[32] = {<br>
+   0b000000000000,<br>
+   0b000000000010,<br>
+   0b000000010000,<br>
+   0b000000010010,<br>
+   0b000000011000,<br>
+   0b000000100000,<br>
+   0b000000101000,<br>
+   0b000001001000,<br>
+   0b000001010000,<br>
+   0b000001110000,<br>
+   0b000001111000,<br>
+   0b001100000000,<br>
+   0b001100000010,<br>
+   0b001100001000,<br>
+   0b001100010000,<br>
+   0b001100010010,<br>
+   0b001100100000,<br>
+   0b001100101000,<br>
+   0b001100111000,<br>
+   0b001101000000,<br>
+   0b001101000010,<br>
+   0b001101001000,<br>
+   0b001101010000,<br>
+   0b001101100000,<br>
+   0b001101101000,<br>
+   0b001101110000,<br>
+   0b001101110001,<br>
+   0b001101111000,<br>
+   0b010001101000,<br>
+   0b010001101001,<br>
+   0b010001101010,<br>
+   0b010110001000<br>
+};<br>
+<br>
+static const uint32_t *control_index_table;<br>
+static const uint32_t *datatype_table;<br>
+static const uint32_t *subreg_table;<br>
+static const uint32_t *src_index_table;<br>
+<br>
 static bool<br>
 set_control_index(struct brw_compact_instruction *dst,<br>
                  struct brw_instruction *src)<br>
@@ -197,8 +342,8 @@ set_control_index(struct brw_compact_instruction *dst,<br>
    uncompressed |= src->header.execution_size << 13;<br>
    uncompressed |= src->header.saturate << 16;<br></blockquote><div><br>For gen7, we need to include FlagSubRegNum and FlagRegNum in "uncompressed".<br> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">







<br>
-   for (int i = 0; i < ARRAY_SIZE(gen6_control_index_table); i++) {<br>
-      if (gen6_control_index_table[i] == uncompressed) {<br>
+   for (int i = 0; i < 32; i++) {<br>
+      if (control_index_table[i] == uncompressed) {<br>
         dst->dw0.control_index = i;<br>
         return true;<br>
       }<br>
@@ -216,8 +361,8 @@ set_datatype_index(struct brw_compact_instruction *dst,<br>
    uncompressed |= src->bits1.ud & 0x7fff;<br>
    uncompressed |= (src->bits1.ud >> 29) << 15;<br>
<br>
-   for (int i = 0; i < ARRAY_SIZE(gen6_datatype_table); i++) {<br>
-      if (gen6_datatype_table[i] == uncompressed) {<br>
+   for (int i = 0; i < 32; i++) {<br>
+      if (datatype_table[i] == uncompressed) {<br>
         dst->dw0.data_type_index = i;<br>
         return true;<br>
       }<br>
@@ -236,8 +381,8 @@ set_subreg_index(struct brw_compact_instruction *dst,<br>
    uncompressed |= src->bits2.da1.src0_subreg_nr << 5;<br>
    uncompressed |= src->bits3.da1.src1_subreg_nr << 10;<br>
<br>
-   for (int i = 0; i < ARRAY_SIZE(gen6_subreg_table); i++) {<br>
-      if (gen6_subreg_table[i] == uncompressed) {<br>
+   for (int i = 0; i < 32; i++) {<br>
+      if (subreg_table[i] == uncompressed) {<br>
         dst->dw0.sub_reg_index = i;<br>
         return true;<br>
       }<br>
@@ -250,8 +395,8 @@ static bool<br>
 get_src_index(uint32_t uncompressed,<br>
              uint32_t *compressed)<br>
 {<br>
-   for (int i = 0; i < ARRAY_SIZE(gen6_src_index_table); i++) {<br>
-      if (gen6_src_index_table[i] == uncompressed) {<br>
+   for (int i = 0; i < 32; i++) {<br>
+      if (src_index_table[i] == uncompressed) {<br>
         *compressed = i;<br>
         return true;<br>
       }<br>
@@ -373,7 +518,7 @@ static void<br>
 set_uncompressed_control(struct brw_instruction *dst,<br>
                         struct brw_compact_instruction *src)<br>
 {<br>
-   uint32_t compressed = gen6_control_index_table[src->dw0.control_index];<br>
+   uint32_t compressed = control_index_table[src->dw0.control_index];<br></blockquote><div><br>As with set_control_index, this function needs to be modified for Gen7 to reflect the fact that the data in the table includes values for FlagSubRegNum and FlagRegNum.<br>



 </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
    dst->header.access_mode = compressed >> 0;<br>
    dst->header.mask_control = compressed >> 1;<br>
@@ -391,7 +536,7 @@ static void<br>
 set_uncompressed_datatype(struct brw_instruction *dst,<br>
                          struct brw_compact_instruction *src)<br>
 {<br>
-   uint32_t uncompressed = gen6_datatype_table[src->dw0.data_type_index];<br>
+   uint32_t uncompressed = datatype_table[src->dw0.data_type_index];<br>
<br>
    dst->bits1.ud &= ~(0x7 << 29);<br>
    dst->bits1.ud |= ((uncompressed >> 15) & 0x7) << 29;<br>
@@ -403,7 +548,7 @@ static void<br>
 set_uncompressed_subreg(struct brw_instruction *dst,<br>
                        struct brw_compact_instruction *src)<br>
 {<br>
-   uint32_t uncompressed = gen6_subreg_table[src->dw0.sub_reg_index];<br>
+   uint32_t uncompressed = subreg_table[src->dw0.sub_reg_index];<br>
<br>
    dst->bits1.da1.dest_subreg_nr = (uncompressed >> 0)  & 0x1f;<br>
    dst->bits2.da1.src0_subreg_nr = (uncompressed >> 5)  & 0x1f;<br>
@@ -415,7 +560,7 @@ set_uncompressed_src0(struct brw_instruction *dst,<br>
                      struct brw_compact_instruction *src)<br>
 {<br>
    uint32_t compressed = src->dw0.src0_index | src->dw1.src0_index << 2;<br>
-   uint32_t uncompressed = gen6_src_index_table[compressed];<br>
+   uint32_t uncompressed = src_index_table[compressed];<br>
<br>
    dst->bits2.da1.src0_abs = uncompressed >> 0;<br>
    dst->bits2.da1.src0_negate = uncompressed >> 1;<br>
@@ -429,7 +574,7 @@ static void<br>
 set_uncompressed_src1(struct brw_instruction *dst,<br>
                      struct brw_compact_instruction *src)<br>
 {<br>
-   uint32_t uncompressed = gen6_src_index_table[src->dw1.src1_index];<br>
+   uint32_t uncompressed = src_index_table[src->dw1.src1_index];<br>
<br>
    dst->bits3.da1.src1_abs = uncompressed >> 0;<br>
    dst->bits3.da1.src1_negate = uncompressed >> 1;<br>
@@ -515,6 +660,36 @@ update_uip_jip(struct brw_instruction *insn, int this_old_ip,<br>
 }<br>
<br>
 void<br>
+brw_init_compaction_tables(struct intel_context *intel)<br>
+{<br>
+   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);<br>
+   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);<br>
+   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);<br>
+   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);<br>
+   assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);<br>
+   assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);<br>
+   assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);<br>
+   assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);<br>
+<br>
+   switch (intel->gen) {<br>
+   case 7:<br>
+      control_index_table = gen7_control_index_table;<br>
+      datatype_table = gen7_datatype_table;<br>
+      subreg_table = gen7_subreg_table;<br>
+      src_index_table = gen7_src_index_table;<br>
+      break;<br>
+   case 6:<br>
+      control_index_table = gen6_control_index_table;<br>
+      datatype_table = gen6_datatype_table;<br>
+      subreg_table = gen6_subreg_table;<br>
+      src_index_table = gen6_src_index_table;<br>
+      break;<br>
+   default:<br>
+      return;<br>
+   }<br>
+}<br>
+<br>
+void<br>
 brw_compact_instructions(struct brw_compile *p)<br>
 {<br>
    struct brw_context *brw = p->brw;<br>
@@ -524,12 +699,7 @@ brw_compact_instructions(struct brw_compile *p)<br>
    int compressed_counts[p->next_insn_offset / 8];<br>
    int old_ip[p->next_insn_offset / 8];<br>
<br>
-   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);<br>
-   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);<br>
-   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);<br>
-   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);<br>
-<br>
-   if (intel->gen != 6)<br>
+   if (intel->gen < 6)<br>
       return;<br>
<br>
    int src_offset;<br>
<span><font color="#888888">--<br>
1.7.10.4<br></font></span></blockquote><div><br><br>Other changes need to be made for Gen7:<br><br>1. In brw_try_compact_instruction(), for Gen7, temp.dw0.flag_reg_nr must be set to zero.<br><br>2. In brw_uncompact_instruction(), for Gen7, we need to skip the statement "dst->bits2.da1.flag_reg_nr = src->dw0.flag_reg_nr;" to avoid overwriting the value set by set_uncompressed_control().<br>


 <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<span><font color="#888888">
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br>My comments on patches 5-7 are largely cosmetic (and a few minor performance concerns), and patches 1 and 4 look good, so regardless of whether you decide to go ahead with my suggested changes, consider them:<br>
<br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br><br>The comments on this patch (8) I believe are genuine bugs in Gen7.<br><br>I don't really consider myself qualified to comment on patches 2-3 because I'm not too familiar with the build system, so consider them<br>
<br>Acked-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br>