Mesa (main): ir3/delay: Ignore earlier definitions to the same register

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Nov 17 14:16:15 UTC 2021


Module: Mesa
Branch: main
Commit: 140e117f2b0c45439a913efe0a4fd1df0842fdfb
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=140e117f2b0c45439a913efe0a4fd1df0842fdfb

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Mon Nov  8 17:20:39 2021 +0100

ir3/delay: Ignore earlier definitions to the same register

We have a situation in some skia shaders like:

add.f r0.x, ...
(rpt2)nop
mul.f ..., r0.x
sam (xyzw) r0.x, ...
rcp ..., r0.x

Notice that rcp uses the result of the sam instruction, not the add.f,
but we didn't keep track of which instructions kill the sources in
ir3_delay, so we'd add an extra nop, resulting in a disagreement betwen
ir3_delay and the scheduling graph. Since postsched is correct, fix
ir3_delay. This only results in some very slight shader-db changes but
keeps the next commit from changing things.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13722>

---

 src/freedreno/ir3/ir3.h         | 14 ++++++++++++++
 src/freedreno/ir3/ir3_delay.c   | 28 ++++++++++++++++++++++++----
 src/freedreno/ir3/tests/delay.c |  5 +++++
 3 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 0c3b54d7ba9..6e4e81270a2 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -2341,6 +2341,20 @@ regmask_set(regmask_t *regmask, struct ir3_register *reg)
    }
 }
 
+static inline void
+regmask_clear(regmask_t *regmask, struct ir3_register *reg)
+{
+   bool half = reg->flags & IR3_REG_HALF;
+   if (reg->flags & IR3_REG_RELATIV) {
+      for (unsigned i = 0; i < reg->size; i++)
+         __regmask_clear(regmask, half, reg->array.base + i);
+   } else {
+      for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
+         if (mask & 1)
+            __regmask_clear(regmask, half, n);
+   }
+}
+
 static inline bool
 regmask_get(regmask_t *regmask, struct ir3_register *reg)
 {
diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c
index 8a5a57dc756..f4a748cc3eb 100644
--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@@ -237,8 +237,11 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner,
 static unsigned
 delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
                   struct ir3_instruction *consumer, unsigned distance,
-                  bool soft, bool pred, bool mergedregs)
+                  bool soft, bool pred, regmask_t *in_mask, bool mergedregs)
 {
+   regmask_t mask;
+   memcpy(&mask, in_mask, sizeof(mask));
+
    unsigned delay = 0;
    /* Search backwards starting at the instruction before start, unless it's
     * NULL then search backwards from the block end.
@@ -261,6 +264,8 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
       foreach_dst_n (dst, dst_n, assigner) {
          if (dst->wrmask == 0)
             continue;
+         if (!regmask_get(&mask, dst))
+            continue;
          foreach_src_n (src, src_n, consumer) {
             if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
                continue;
@@ -269,6 +274,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
                assigner, consumer, dst_n, src_n, soft, mergedregs);
             new_delay = MAX2(new_delay, src_delay);
          }
+         regmask_clear(&mask, dst);
       }
 
       new_delay = new_delay > distance ? new_delay - distance : 0;
@@ -298,7 +304,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
       for (unsigned i = 0; i < block->predecessors_count; i++) {
          struct ir3_block *pred = block->predecessors[i];
          unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance,
-                                                 soft, pred, mergedregs);
+                                                 soft, pred, &mask, mergedregs);
          delay = MAX2(delay, pred_delay);
       }
 
@@ -323,7 +329,14 @@ unsigned
 ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr,
                       bool soft, bool mergedregs)
 {
-   return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs);
+   regmask_t mask;
+   regmask_init(&mask, mergedregs);
+   foreach_src (src, instr) {
+      if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
+         regmask_set(&mask, src);
+   }
+
+   return delay_calc_postra(block, NULL, instr, 0, soft, false, &mask, mergedregs);
 }
 
 /**
@@ -334,7 +347,14 @@ unsigned
 ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr,
                      bool mergedregs)
 {
-   return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs);
+   regmask_t mask;
+   regmask_init(&mask, mergedregs);
+   foreach_src (src, instr) {
+      if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
+         regmask_set(&mask, src);
+   }
+
+   return delay_calc_postra(block, NULL, instr, 0, false, true, &mask, mergedregs);
 }
 
 /**
diff --git a/src/freedreno/ir3/tests/delay.c b/src/freedreno/ir3/tests/delay.c
index 018ade53a96..4f8e072ef6b 100644
--- a/src/freedreno/ir3/tests/delay.c
+++ b/src/freedreno/ir3/tests/delay.c
@@ -61,6 +61,11 @@ static const struct test {
       mov.f32f32 r0.z, c0.z
       mad.f32 r0.x, r0.x, r0.y, r0.z
    ),
+   TEST(0,
+      mov.f32f32 r0.x, c0.x
+      rcp r0.x, r0.y
+      add.f r0.x, r0.x, c0.x
+   ),
    TEST(2,
       mov.f32f32 r0.x, c0.x
       mov.f32f32 r0.y, c0.y



More information about the mesa-commit mailing list