Mesa (main): freedreno/a6xx: Do sparse setup of the TFB program.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Nov 12 21:18:34 UTC 2021


Module: Mesa
Branch: main
Commit: 32b51d5e6077275c8202e36199a90a4695a77a2d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=32b51d5e6077275c8202e36199a90a4695a77a2d

Author: Emma Anholt <emma at anholt.net>
Date:   Wed Nov 10 15:49:31 2021 -0800

freedreno/a6xx: Do sparse setup of the TFB program.

We don't need to init the whole program RAM, just the locations we are
actually writing from.  Syncs this code up with tu a bit more.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13747>

---

 src/gallium/drivers/freedreno/a6xx/fd6_program.c | 53 ++++++++++++++++--------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 39ad4d928ec..e393317a72c 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -188,17 +188,17 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state,
 {
    const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
 
+   /* Note: 64 here comes from the HW layout of the program RAM. The program
+    * for stream N is at DWORD 64 * N.
+    */
+#define A6XX_SO_PROG_DWORDS 64
+   uint32_t prog[A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS] = {};
+   BITSET_DECLARE(valid_dwords, A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) = {0};
    uint32_t ncomp[PIPE_MAX_SO_BUFFERS];
-   uint32_t prog[256 / 2];
-   uint32_t prog_count;
 
    memset(ncomp, 0, sizeof(ncomp));
    memset(prog, 0, sizeof(prog));
 
-   prog_count = align(l->max_loc, 2) / 2;
-
-   debug_assert(prog_count < ARRAY_SIZE(prog));
-
    for (unsigned i = 0; i < strmout->num_outputs; i++) {
       const struct ir3_stream_output *out = &strmout->output[i];
       unsigned k = out->register_index;
@@ -220,19 +220,28 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state,
          unsigned loc = l->var[idx].loc + c;
          unsigned off = j + out->dst_offset; /* in dwords */
 
+         unsigned dword = out->stream * A6XX_SO_PROG_DWORDS + loc/2;
          if (loc & 1) {
-            prog[loc / 2] |= A6XX_VPC_SO_PROG_B_EN |
-                             A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
-                             A6XX_VPC_SO_PROG_B_OFF(off * 4);
+            prog[dword] |= A6XX_VPC_SO_PROG_B_EN |
+                           A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
+                           A6XX_VPC_SO_PROG_B_OFF(off * 4);
          } else {
-            prog[loc / 2] |= A6XX_VPC_SO_PROG_A_EN |
-                             A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
-                             A6XX_VPC_SO_PROG_A_OFF(off * 4);
+            prog[dword] |= A6XX_VPC_SO_PROG_A_EN |
+                           A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
+                           A6XX_VPC_SO_PROG_A_OFF(off * 4);
          }
+         BITSET_SET(valid_dwords, dword);
       }
    }
 
-   unsigned sizedw = 12 + (2 * prog_count);
+   unsigned prog_count = 0;
+   unsigned start, end;
+   BITSET_FOREACH_RANGE (start, end, valid_dwords,
+                         A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) {
+      prog_count += end - start + 1;
+   }
+
+   unsigned sizedw = 10 + (2 * prog_count);
    if (ctx->screen->info->a6xx.tess_use_shared)
       sizedw += 2;
 
@@ -255,12 +264,20 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state,
    OUT_RING(ring, ncomp[2]);
    OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(3));
    OUT_RING(ring, ncomp[3]);
-   OUT_RING(ring, REG_A6XX_VPC_SO_CNTL);
-   OUT_RING(ring, A6XX_VPC_SO_CNTL_RESET);
-   for (unsigned i = 0; i < prog_count; i++) {
-      OUT_RING(ring, REG_A6XX_VPC_SO_PROG);
-      OUT_RING(ring, prog[i]);
+
+   bool first = true;
+   BITSET_FOREACH_RANGE (start, end, valid_dwords,
+                         A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) {
+      OUT_RING(ring, REG_A6XX_VPC_SO_CNTL);
+      OUT_RING(ring, COND(first, A6XX_VPC_SO_CNTL_RESET) |
+                     A6XX_VPC_SO_CNTL_ADDR(start));
+      for (unsigned i = start; i < end; i++) {
+         OUT_RING(ring, REG_A6XX_VPC_SO_PROG);
+         OUT_RING(ring, prog[i]);
+      }
+      first = false;
    }
+
    if (ctx->screen->info->a6xx.tess_use_shared) {
       /* Possibly not tess_use_shared related, but the combination of
        * tess + xfb fails some tests if we don't emit this.



More information about the mesa-commit mailing list