Mesa (main): broadcom/compiler: rework simultaneous peripheral access checks

Wed Oct 27 06:20:58 UTC 2021

Module: Mesa
Branch: main
Commit: 3fbd6662b777a6f2a5b216101daca9b0a86f9958
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3fbd6662b777a6f2a5b216101daca9b0a86f9958

Author: Iago Toral Quiroga <itoral at igalia.com>
Date:   Tue Oct 26 11:30:15 2021 +0200

broadcom/compiler: rework simultaneous peripheral access checks

This was not quite correct in that our checks for the allowed cases
were not checking that there were no other peripheral access other
than the ones allowed.

For example, we allowed  wrtmuc signal and TMU write other than
TMUC, and we also allowed TMU read and VPM read/write. But we
cannot allow wrtmuc with TMU write other than TMUC and at the
same time a VPM write for example, so we can't just check if we
have a combination of allowed peripherals, we still need to check
that those are the only ones in use by the combined instructions.

Another example is that even if we allow a TMU write (other than TMUC)
with a wrtmuc signal, the resulting instruction must still have just
one TMU write other than TMUC, but we were allowing the merge if one
instruction signaled wrtmuc and the other wrote to tmu other than tmuc
without testing if the combined result would have 2 tmu writes.

Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13527>

---

 src/broadcom/compiler/qpu_schedule.c | 100 +++++++++++++++++++++--------------
 src/broadcom/qpu/qpu_instr.c         |   2 +-
 src/broadcom/qpu/qpu_instr.h         |   1 +
 3 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 63436735f57..de03edd8780 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -648,48 +648,56 @@ get_instruction_priority(const struct v3d_device_info *devinfo,
         return baseline_score;
 }
 
-static bool
-qpu_magic_waddr_is_periph(const struct v3d_device_info *devinfo,
-                          enum v3d_qpu_waddr waddr)
-{
-        return (v3d_qpu_magic_waddr_is_tmu(devinfo, waddr) ||
-                v3d_qpu_magic_waddr_is_sfu(waddr) ||
-                v3d_qpu_magic_waddr_is_tlb(waddr) ||
-                v3d_qpu_magic_waddr_is_vpm(waddr) ||
-                v3d_qpu_magic_waddr_is_tsy(waddr));
-}
+enum {
+        V3D_PERIPHERAL_VPM_READ           = (1 << 0),
+        V3D_PERIPHERAL_VPM_WRITE          = (1 << 1),
+        V3D_PERIPHERAL_VPM_WAIT           = (1 << 2),
+        V3D_PERIPHERAL_SFU                = (1 << 3),
+        V3D_PERIPHERAL_TMU_WRITE          = (1 << 4),
+        V3D_PERIPHERAL_TMU_READ           = (1 << 5),
+        V3D_PERIPHERAL_TMU_WAIT           = (1 << 6),
+        V3D_PERIPHERAL_TMU_WRTMUC_SIG     = (1 << 7),
+        V3D_PERIPHERAL_TSY                = (1 << 8),
+        V3D_PERIPHERAL_TLB                = (1 << 9),
+};
 
-static bool
-qpu_accesses_peripheral(const struct v3d_device_info *devinfo,
-                        const struct v3d_qpu_instr *inst)
+static uint32_t
+qpu_peripherals(const struct v3d_device_info *devinfo,
+                const struct v3d_qpu_instr *inst)
 {
-        if (v3d_qpu_uses_vpm(inst))
-                return true;
+        uint32_t result = 0;
+        if (v3d_qpu_reads_vpm(inst))
+                result |= V3D_PERIPHERAL_VPM_READ;
+        if (v3d_qpu_writes_vpm(inst))
+                result |= V3D_PERIPHERAL_VPM_WRITE;
+        if (v3d_qpu_waits_vpm(inst))
+                result |= V3D_PERIPHERAL_VPM_WAIT;
+
+        if (v3d_qpu_writes_tmu(devinfo, inst))
+                result |= V3D_PERIPHERAL_TMU_WRITE;
+        if (inst->sig.ldtmu)
+                result |= V3D_PERIPHERAL_TMU_READ;
+        if (inst->sig.wrtmuc)
+                result |= V3D_PERIPHERAL_TMU_WRTMUC_SIG;
+
         if (v3d_qpu_uses_sfu(inst))
-                return true;
+                result |= V3D_PERIPHERAL_SFU;
+
+        if (v3d_qpu_uses_tlb(inst))
+                result |= V3D_PERIPHERAL_TLB;
 
         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
                     inst->alu.add.magic_write &&
-                    qpu_magic_waddr_is_periph(devinfo, inst->alu.add.waddr)) {
-                        return true;
+                    v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) {
+                        result |= V3D_PERIPHERAL_TSY;
                 }
 
                 if (inst->alu.add.op == V3D_QPU_A_TMUWT)
-                        return true;
-
-                if (inst->alu.mul.op != V3D_QPU_M_NOP &&
-                    inst->alu.mul.magic_write &&
-                    qpu_magic_waddr_is_periph(devinfo, inst->alu.mul.waddr)) {
-                        return true;
-                }
+                        result |= V3D_PERIPHERAL_TMU_WAIT;
         }
 
-        return (inst->sig.ldvpm ||
-                inst->sig.ldtmu ||
-                inst->sig.ldtlb ||
-                inst->sig.ldtlbu ||
-                inst->sig.wrtmuc);
+        return result;
 }
 
 static bool
@@ -697,26 +705,38 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
                                  const struct v3d_qpu_instr *a,
                                  const struct v3d_qpu_instr *b)
 {
-        const bool a_uses_peripheral = qpu_accesses_peripheral(devinfo, a);
-        const bool b_uses_peripheral = qpu_accesses_peripheral(devinfo, b);
+        const uint32_t a_peripherals = qpu_peripherals(devinfo, a);
+        const uint32_t b_peripherals = qpu_peripherals(devinfo, b);
 
         /* We can always do one peripheral access per instruction. */
-        if (!a_uses_peripheral || !b_uses_peripheral)
+        if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1)
                 return true;
 
         if (devinfo->ver < 41)
                 return false;
 
-        /* V3D 4.1 and later allow TMU read along with a VPM read or write, and
-         * WRTMUC with a TMU magic register write (other than tmuc).
+        /* V3D 4.1+ allow WRTMUC signal with TMU register write (other than
+         * tmuc).
          */
-        if ((a->sig.ldtmu && v3d_qpu_reads_or_writes_vpm(b)) ||
-            (b->sig.ldtmu && v3d_qpu_reads_or_writes_vpm(a))) {
-                return true;
+        if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
+            b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
+                return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
+        }
+
+        if (a_peripherals == V3D_PERIPHERAL_TMU_WRITE &&
+            b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG) {
+                return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
         }
 
-        if ((a->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
-            (b->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(devinfo, a))) {
+        /* V3D 4.1+ allows TMU read with VPM read/write. */
+        if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
+            (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
+             b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
+                return true;
+        }
+        if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
+            (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
+             a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
                 return true;
         }
 
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index 569c5fc4074..c661b98b7eb 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -778,7 +778,7 @@ v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
         return false;
 }
 
-static bool
+bool
 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
 {
         return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 4f165e93914..6c81d602084 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -468,6 +468,7 @@ bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
 bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;