Mesa (main): freedreno/a6xx: Allocate a fixed-size tess factor BO.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Dec 2 02:28:46 UTC 2021


Module: Mesa
Branch: main
Commit: d7226e9a9e4ffc34f2a2080799ea7952de53d656
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7226e9a9e4ffc34f2a2080799ea7952de53d656

Author: Emma Anholt <emma at anholt.net>
Date:   Wed Nov 17 14:10:41 2021 -0800

freedreno/a6xx: Allocate a fixed-size tess factor BO.

Saves per-batch allocations, avoids reallocation for various vertex
counts, and avoids needing the indirect tess addrs constobj so that we
could emit the relocs to the tess BO after we'd emitted all the draws.

Also apparently it fixes one of our CTS fails.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13851>

---

 src/freedreno/ci/freedreno-a630-fails.txt        |  4 ---
 src/gallium/drivers/freedreno/a6xx/fd6_const.c   | 18 +++++++-----
 src/gallium/drivers/freedreno/a6xx/fd6_draw.c    | 37 ++++--------------------
 src/gallium/drivers/freedreno/a6xx/fd6_emit.c    |  9 ++++++
 src/gallium/drivers/freedreno/a6xx/fd6_gmem.c    | 22 --------------
 src/gallium/drivers/freedreno/a6xx/fd6_program.c | 14 +++++++++
 src/gallium/drivers/freedreno/freedreno_batch.c  |  6 ----
 src/gallium/drivers/freedreno/freedreno_batch.h  | 12 --------
 src/gallium/drivers/freedreno/freedreno_screen.c |  3 ++
 src/gallium/drivers/freedreno/freedreno_screen.h |  6 ++++
 10 files changed, 48 insertions(+), 83 deletions(-)

diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt
index 07962b440b5..d1ac6750e2a 100644
--- a/src/freedreno/ci/freedreno-a630-fails.txt
+++ b/src/freedreno/ci/freedreno-a630-fails.txt
@@ -17,10 +17,6 @@ KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail
 
 KHR-GLES31.core.geometry_shader.layered_framebuffer.stencil_support,Fail
 
-# " Pixel data comparison failed; expected: (0.1, 0.2, 0.3, 0.4) rendered: (0, 0, 0, 0) epsilon: 0.00392157
-#   Pixel data comparison failed at esextcTessellationShaderPoints.cpp:597"
-KHR-GLES31.core.tessellation_shader.tessellation_shader_point_mode.point_rendering,Fail
-
 # "Invalid value returned: expected:[1, 1, 1, 1] retrieved: [0, 0, 0, 0
 #  Invalid rendering result at esextcTessellationShaderBarrier.cpp:504"
 KHR-GLES31.core.tessellation_shader.tessellation_shader_tc_barriers.barrier_guarded_read_calls,Fail
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c
index dc8343f8814..f774d52361a 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c
@@ -108,21 +108,23 @@ emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
 }
 
 static void
-emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit,
+emit_tess_bos(struct fd_screen *screen, struct fd_ringbuffer *ring,
               struct ir3_shader_variant *s) assert_dt
 {
-   struct fd_context *ctx = emit->ctx;
    const struct ir3_const_state *const_state = ir3_const_state(s);
    const unsigned regid = const_state->offsets.primitive_param * 4 + 4;
-   uint32_t dwords = 16;
+   uint32_t dwords = 8;
 
-   OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
+   OUT_PKT7(ring, fd6_stage2opcode(s->type), 7);
    OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
                      CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
-                     CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+                     CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
                      CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
                      CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
-   OUT_RB(ring, ctx->batch->tess_addrs_constobj);
+   OUT_RING(ring, 0);
+   OUT_RING(ring, 0);
+   OUT_RELOC(ring, screen->tess_bo, FD6_TESS_FACTOR_SIZE, 0, 0);
+   OUT_RELOC(ring, screen->tess_bo, 0, 0, 0);
 }
 
 static void
@@ -166,7 +168,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
 
       emit_stage_tess_consts(constobj, emit->hs, hs_params,
                              ARRAY_SIZE(hs_params));
-      emit_tess_bos(constobj, emit, emit->hs);
+      emit_tess_bos(ctx->screen, constobj, emit->hs);
 
       if (emit->gs)
          num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
@@ -179,7 +181,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
 
       emit_stage_tess_consts(constobj, emit->ds, ds_params,
                              ARRAY_SIZE(ds_params));
-      emit_tess_bos(constobj, emit, emit->ds);
+      emit_tess_bos(ctx->screen, constobj, emit->ds);
    }
 
    if (emit->gs) {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index a79f6a5800e..5d7e2fb9997 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -261,41 +261,16 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
       draw0.prim_type = DI_PT_PATCHES0 + ctx->patch_vertices;
       draw0.tess_enable = true;
 
-      const unsigned max_count = 2048;
-      unsigned count;
-
-      /**
-       * We can cap tessparam/tessfactor buffer sizes at the sub-draw
-       * limit.  But in the indirect-draw case we must assume the worst.
-       */
-      if (indirect && indirect->buffer) {
-         count = ALIGN_NPOT(max_count, ctx->patch_vertices);
-      } else {
-         count = MIN2(max_count, draw->count);
-         count = ALIGN_NPOT(count, ctx->patch_vertices);
-      }
+      /* maximum number of patches that can fit in tess factor/param buffers */
+      uint32_t subdraw_size = MIN2(FD6_TESS_FACTOR_SIZE / factor_stride,
+                                   FD6_TESS_PARAM_SIZE / (emit.hs->output_size * 4));
+      /* convert from # of patches to draw count */
+      subdraw_size *= ctx->patch_vertices;
 
       OUT_PKT7(ring, CP_SET_SUBDRAW_SIZE, 1);
-      OUT_RING(ring, count);
+      OUT_RING(ring, subdraw_size);
 
       ctx->batch->tessellation = true;
-      ctx->batch->tessparam_size =
-         MAX2(ctx->batch->tessparam_size, emit.hs->output_size * 4 * count);
-      ctx->batch->tessfactor_size =
-         MAX2(ctx->batch->tessfactor_size, factor_stride * count);
-
-      if (!ctx->batch->tess_addrs_constobj) {
-         /* Reserve space for the bo address - we'll write them later in
-          * setup_tess_buffers().  We need 2 bo address, but indirect
-          * constant upload needs at least 4 vec4s.
-          */
-         unsigned size = 4 * 16;
-
-         ctx->batch->tess_addrs_constobj = fd_submit_new_ringbuffer(
-            ctx->batch->submit, size, FD_RINGBUFFER_STREAMING);
-
-         ctx->batch->tess_addrs_constobj->cur += size;
-      }
    }
 
 	uint32_t index_start = info->index_size ? draw->index_bias : draw->start;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index f7590222564..21ef07daca9 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -1339,6 +1339,15 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
    OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
    OUT_RING(ring, 0x00000000);
 
+   /* This happens after all drawing has been emitted to the draw CS, so we know
+    * whether we need the tess BO pointers.
+    */
+   if (batch->tessellation) {
+      assert(screen->tess_bo);
+      OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
+      OUT_RELOC(ring, screen->tess_bo, 0, 0, 0);
+   }
+
    if (!batch->nondraw) {
       trace_end_state_restore(&batch->trace, ring);
    }
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index 2f626114e7e..ced8d123c88 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -1552,25 +1552,6 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
    trace_end_clear_restore(&batch->trace, ring);
 }
 
-static void
-setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)
-{
-   struct fd_context *ctx = batch->ctx;
-
-   batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size,
-                                    0, "tessfactor");
-
-   batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size,
-                                   0, "tessparam");
-
-   OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
-   OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);
-
-   batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
-   OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
-   OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
-}
-
 static void
 fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
 {
@@ -1612,9 +1593,6 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
    emit_marker6(ring, 7);
 
-   if (batch->tessellation)
-      setup_tess_buffers(batch, ring);
-
    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
    OUT_RING(ring, 0x0);
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index e393317a72c..e568f6a5f14 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -1206,6 +1206,7 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
                    const struct ir3_shader_key *key) in_dt
 {
    struct fd_context *ctx = fd_context(data);
+   struct fd_screen *screen = ctx->screen;
    struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state);
 
    tc_assert_driver_thread(ctx->tc);
@@ -1233,6 +1234,19 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
    }
 #endif
 
+   if (hs) {
+      /* Allocate the fixed-size tess factor BO globally on the screen.  This
+       * lets the program (which ideally we would have shared across contexts,
+       * though the current ir3_cache impl doesn't do that) bake in the
+       * addresses.
+       */
+      fd_screen_lock(screen);
+      if (!screen->tess_bo)
+         screen->tess_bo =
+            fd_bo_new(screen->dev, FD6_TESS_BO_SIZE, 0, "tessfactor");
+      fd_screen_unlock(screen);
+   }
+
    setup_config_stateobj(ctx, state);
    setup_stateobj(state->binning_stateobj, ctx, state, key, true);
    setup_stateobj(state->stateobj, ctx, state, key, false);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 6bc1e06969b..9dc4f07b4c0 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -178,12 +178,6 @@ cleanup_submit(struct fd_batch *batch)
       batch->tile_fini = NULL;
    }
 
-   if (batch->tessellation) {
-      fd_bo_del(batch->tessfactor_bo);
-      fd_bo_del(batch->tessparam_bo);
-      fd_ringbuffer_del(batch->tess_addrs_constobj);
-   }
-
    fd_submit_del(batch->submit);
    batch->submit = NULL;
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index f85ff823852..7c8b9e719a1 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -255,18 +255,6 @@ struct fd_batch {
 
    /** set of dependent batches.. holds refs to dependent batches: */
    uint32_t dependents_mask;
-
-   /* Buffer for tessellation engine input
-    */
-   struct fd_bo *tessfactor_bo;
-   uint32_t tessfactor_size;
-
-   /* Buffer for passing parameters between TCS and TES
-    */
-   struct fd_bo *tessparam_bo;
-   uint32_t tessparam_size;
-
-   struct fd_ringbuffer *tess_addrs_constobj;
 };
 
 struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 7141db9138a..ea0ef8ea8df 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -144,6 +144,9 @@ fd_screen_destroy(struct pipe_screen *pscreen)
 {
    struct fd_screen *screen = fd_screen(pscreen);
 
+   if (screen->tess_bo)
+      fd_bo_del(screen->tess_bo);
+
    if (screen->pipe)
       fd_pipe_del(screen->pipe);
 
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index ca15d3e4a5f..f3c8bb9b535 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -149,6 +149,12 @@ struct fd_screen {
 
    struct renderonly *ro;
 
+   /* the blob seems to always use 8K factor and 128K param sizes, copy them */
+#define FD6_TESS_FACTOR_SIZE (8 * 1024)
+#define FD6_TESS_PARAM_SIZE (128 * 1024)
+#define FD6_TESS_BO_SIZE (FD6_TESS_FACTOR_SIZE + FD6_TESS_PARAM_SIZE)
+   struct fd_bo *tess_bo;
+
    /* table with PIPE_PRIM_MAX+1 entries mapping PIPE_PRIM_x to
     * DI_PT_x value to use for draw initiator.  There are some
     * slight differences between generation.



More information about the mesa-commit mailing list