[Mesa-dev] [PATCH 7/8] i965: Support instruction compaction between control flow.

Paul Berry stereotype441 at gmail.com
Tue Sep 4 16:27:14 PDT 2012


On 31 August 2012 11:32, Eric Anholt <eric at anholt.net> wrote:

> ---
>  src/mesa/drivers/dri/i965/brw_eu_compact.c |  111
> ++++++++++++++++++++++------
>  1 file changed, 87 insertions(+), 24 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index 2c15c85..dd661f5 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -324,6 +324,19 @@ brw_try_compact_instruction(struct brw_compile *c,
>        return false; /* FINISHME: What else needs handling? */
>     }
>
> +
> +   if (src->header.opcode == BRW_OPCODE_IF ||
> +       src->header.opcode == BRW_OPCODE_ELSE ||
> +       src->header.opcode == BRW_OPCODE_ENDIF ||
> +       src->header.opcode == BRW_OPCODE_DO ||
> +       src->header.opcode == BRW_OPCODE_HALT ||
> +       src->header.opcode == BRW_OPCODE_WHILE) {
> +      /* FINISHME: The fixup code below, and brw_set_uip_jip and friends,
> needs
> +       * to be able to handle flow control.
> +       */
> +      return false;
> +   }
> +
>     /* FINISHME: immediates */
>     if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
>         src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
> @@ -476,12 +489,40 @@ void brw_debug_compact_uncompact(struct
> intel_context *intel,
>     }
>  }
>
> +static int
> +compressed_between(int old_ip, int old_target_ip, int *compressed_counts)
> +{
> +   int this_compressed_count = compressed_counts[old_ip];
> +   int target_compressed_count = compressed_counts[old_target_ip];
> +   return target_compressed_count - this_compressed_count;
> +}
> +
> +static void
> +update_uip_jip(struct brw_instruction *insn, int this_old_ip,
> +              int *compressed_counts)
> +{
> +   int target_old_ip;
> +
> +   target_old_ip = this_old_ip + insn->bits3.break_cont.jip;
> +   insn->bits3.break_cont.jip -= compressed_between(this_old_ip,
> +                                                   target_old_ip,
> +                                                   compressed_counts);
> +
> +   target_old_ip = this_old_ip + insn->bits3.break_cont.uip;
> +   insn->bits3.break_cont.uip -= compressed_between(this_old_ip,
> +                                                   target_old_ip,
> +                                                   compressed_counts);
> +}
> +
>  void
>  brw_compact_instructions(struct brw_compile *p)
>  {
>     struct brw_context *brw = p->brw;
>     struct intel_context *intel = &brw->intel;
>     void *store = p->store;
> +   /* Control flow fixup information */
> +   int compressed_counts[p->next_insn_offset / 8];
>

A comment would have saved me some time in understanding what information
is stored in compressed_counts.  I think this would be a correct
description but I'm not certain: "If an instruction was located at offset
8*i before compaction, compressed_counts[i] is the number of instructions
that preceded it which were not previously compressed, but are now."

An alternative way of representing the same information, possibly easier to
follow, would be:

/* If an instruction was located at offset 8*i before compaction,
8*new_ip[i] is the location of the instruction after compaction */
int new_ip[p->next_insn_offset / 8];

(Of course, update_uip_jip() would need to be reworked)


> +   int old_ip[p->next_insn_offset / 8];
>

And, assuming I'm understanding the code correctly, I believe old_ip could
be commented:

/* If an instruction is located at offset 8*i after compaction, 8*old_ip[i]
was the location of the instruction before compaction.  In the case of NOPs
inserted automatically for alignment, 8*old_ip[i] was the old location of
the instruction that needed to be aligned. */


>     assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) -
> 1] != 0);
>     assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
> @@ -491,36 +532,21 @@ brw_compact_instructions(struct brw_compile *p)
>     if (intel->gen != 6)
>        return;
>
> -   /* FINISHME: If we are going to compress instructions between flow
> control,
> -    * we have to do fixups to flow control offsets to represent the new
> -    * distances, since flow control uses (virtual address distance)/2,
> not a
> -    * logical instruction count.
> -    */
> -   bool continue_compressing = true;
> -   for (int i = 0; i < p->nr_insn; i++) {
> -      if (p->store[i].header.opcode == BRW_OPCODE_WHILE)
> -        return;
> -   }
> -
>     int src_offset;
>     int offset = 0;
> +   int compressed_count = 0;
>     for (src_offset = 0; src_offset < p->nr_insn * 16;) {
>        struct brw_instruction *src = store + src_offset;
>        void *dst = store + offset;
>
> -      switch (src->header.opcode) {
> -      case BRW_OPCODE_IF:
> -      case BRW_OPCODE_HALT:
> -      case BRW_OPCODE_JMPI:
> -        continue_compressing = false;
> -        break;
> -      }
> +      old_ip[offset / 8] = src_offset / 8;
> +      compressed_counts[src_offset / 8] = compressed_count;
>
>        struct brw_instruction saved = *src;
>
> -      if (continue_compressing &&
> -         !src->header.cmpt_control &&
> +      if (!src->header.cmpt_control &&
>           brw_try_compact_instruction(p, dst, src)) {
> +        compressed_count++;
>
>          /* debug */
>          struct brw_instruction uncompacted;
> @@ -544,6 +570,7 @@ brw_compact_instructions(struct brw_compile *p)
>             align->dw0.opcode = BRW_OPCODE_NOP;
>             align->dw0.cmpt_ctrl = 1;
>             offset += 8;
> +           old_ip[offset / 8] = src_offset / 8;
>             dst = store + offset;
>          }
>
> @@ -558,20 +585,56 @@ brw_compact_instructions(struct brw_compile *p)
>        }
>     }
>

I'm running out of time to look at this today, so I had to stop reviewing
here.  I think the stuff that follows is likely correct (because if it
weren't, it would probably fail spectacularly :)).  I'll try to return to
it tomorrow just to make sure.


>
> +   /* Fix up control flow offsets. */
> +   p->next_insn_offset = offset;
> +   for (offset = 0; offset < p->next_insn_offset;) {
> +      struct brw_instruction *insn = store + offset;
> +      int this_old_ip = old_ip[offset / 8];
> +      int this_compressed_count = compressed_counts[this_old_ip];
> +      int target_old_ip, target_compressed_count;
> +
> +      switch (insn->header.opcode) {
> +      case BRW_OPCODE_BREAK:
> +      case BRW_OPCODE_CONTINUE:
> +      case BRW_OPCODE_HALT:
> +        update_uip_jip(insn, this_old_ip, compressed_counts);
> +        break;
> +
> +      case BRW_OPCODE_IF:
> +      case BRW_OPCODE_ELSE:
> +      case BRW_OPCODE_ENDIF:
> +      case BRW_OPCODE_WHILE:
> +        if (intel->gen == 6) {
> +           target_old_ip = this_old_ip +
> insn->bits1.branch_gen6.jump_count;
> +           target_compressed_count = compressed_counts[target_old_ip];
> +           insn->bits1.branch_gen6.jump_count -= (target_compressed_count
> -
> +                                                  this_compressed_count);
> +        } else {
> +           update_uip_jip(insn, this_old_ip, compressed_counts);
> +        }
> +        break;
> +      }
> +
> +      if (insn->header.cmpt_control) {
> +        offset += 8;
> +      } else {
> +        offset += 16;
> +      }
> +   }
> +
>     /* p->nr_insn is counting the number of uncompacted instructions
> still, so
>      * divide.  We do want to be sure there's a valid instruction in any
>      * alignment padding, so that the next compression pass (for the FS
> 8/16
>      * compile passes) parses correctly.
>      */
> -   if (offset & 8) {
> +   if (p->next_insn_offset & 8) {
>        struct brw_compact_instruction *align = store + offset;
>        memset(align, 0, sizeof(*align));
>        align->dw0.opcode = BRW_OPCODE_NOP;
>        align->dw0.cmpt_ctrl = 1;
> -      offset += 8;
> +      p->next_insn_offset += 8;
>     }
> -   p->next_insn_offset = offset;
> -   p->nr_insn = offset / 16;
> +   p->nr_insn = p->next_insn_offset / 16;
>
>     if (0) {
>        fprintf(stdout, "dumping compacted program\n");
> --
> 1.7.10.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20120904/b69d7fc6/attachment.html>


More information about the mesa-dev mailing list