Mesa (main): turnip: Split tu6_emit_xs()

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jul 13 14:57:39 UTC 2021


Module: Mesa
Branch: main
Commit: 4e802538e7e610cf988ba6d8a91458ff78f214bd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e802538e7e610cf988ba6d8a91458ff78f214bd

Author: Rob Clark <robdclark at chromium.org>
Date:   Mon Jul 12 12:00:38 2021 -0700

turnip: Split tu6_emit_xs()

Emit all the state layout config (such as push-const CONSTLEN) first,
before emitting anything that depends on that state.  This fixes an
issue that was showing up when FLUT is enabled in ir3 (which results
in higher probability of not having any immediats lowered to push-
consts).

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8705>

---

 src/freedreno/vulkan/tu_clear_blit.c |  13 ++-
 src/freedreno/vulkan/tu_cmd_buffer.c |   4 +-
 src/freedreno/vulkan/tu_pipeline.c   | 206 ++++++++++++++++++++++-------------
 src/freedreno/vulkan/tu_private.h    |  13 ++-
 4 files changed, 149 insertions(+), 87 deletions(-)

diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index 757daf92fd4..bb3fb8298f0 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -469,12 +469,15 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
          .gfx_bindless = 0x1f,
          .cs_bindless = 0x1f));
 
+   tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs);
+   tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL);
+   tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL);
+   tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL);
+   tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs);
+
    struct tu_pvtmem_config pvtmem = {};
-   tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, &pvtmem, global_iova(cmd, shaders[GLOBAL_SH_VS]));
-   tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, &pvtmem, 0);
-   tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL, &pvtmem, 0);
-   tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL, &pvtmem, 0);
-   tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, &pvtmem, global_iova(cmd, shaders[fs_id]));
+   tu6_emit_xs(cs, MESA_SHADER_VERTEX, &vs, &pvtmem, global_iova(cmd, shaders[GLOBAL_SH_VS]));
+   tu6_emit_xs(cs, MESA_SHADER_FRAGMENT, &fs, &pvtmem, global_iova(cmd, shaders[fs_id]));
 
    tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0());
    tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0());
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 7a3e7207091..c42dd37730c 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -2121,7 +2121,8 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
       struct tu_cs *cs = &cmd->draw_cs;
       uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT);
 
-      tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (6 + util_bitcount(mask)));
+      tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask)));
+      tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, pipeline->program.config_state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state);
@@ -3719,6 +3720,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
    if (cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE) {
       tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2));
 
+      tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, pipeline->program.config_state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_TESS, tess_consts);
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index f1135506b2c..9b4f03436fb 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -330,63 +330,69 @@ tu_blend_state_is_dual_src(const VkPipelineColorBlendStateCreateInfo *info)
    return false;
 }
 
+static const struct xs_config {
+   uint16_t reg_sp_xs_ctrl;
+   uint16_t reg_sp_xs_config;
+   uint16_t reg_sp_xs_instrlen;
+   uint16_t reg_hlsq_xs_ctrl;
+   uint16_t reg_sp_xs_first_exec_offset;
+   uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
+} xs_config[] = {
+   [MESA_SHADER_VERTEX] = {
+      REG_A6XX_SP_VS_CTRL_REG0,
+      REG_A6XX_SP_VS_CONFIG,
+      REG_A6XX_SP_VS_INSTRLEN,
+      REG_A6XX_HLSQ_VS_CNTL,
+      REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
+      REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
+   },
+   [MESA_SHADER_TESS_CTRL] = {
+      REG_A6XX_SP_HS_CTRL_REG0,
+      REG_A6XX_SP_HS_CONFIG,
+      REG_A6XX_SP_HS_INSTRLEN,
+      REG_A6XX_HLSQ_HS_CNTL,
+      REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
+      REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
+   },
+   [MESA_SHADER_TESS_EVAL] = {
+      REG_A6XX_SP_DS_CTRL_REG0,
+      REG_A6XX_SP_DS_CONFIG,
+      REG_A6XX_SP_DS_INSTRLEN,
+      REG_A6XX_HLSQ_DS_CNTL,
+      REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
+      REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
+   },
+   [MESA_SHADER_GEOMETRY] = {
+      REG_A6XX_SP_GS_CTRL_REG0,
+      REG_A6XX_SP_GS_CONFIG,
+      REG_A6XX_SP_GS_INSTRLEN,
+      REG_A6XX_HLSQ_GS_CNTL,
+      REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
+      REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
+   },
+   [MESA_SHADER_FRAGMENT] = {
+      REG_A6XX_SP_FS_CTRL_REG0,
+      REG_A6XX_SP_FS_CONFIG,
+      REG_A6XX_SP_FS_INSTRLEN,
+      REG_A6XX_HLSQ_FS_CNTL,
+      REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
+      REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
+   },
+   [MESA_SHADER_COMPUTE] = {
+      REG_A6XX_SP_CS_CTRL_REG0,
+      REG_A6XX_SP_CS_CONFIG,
+      REG_A6XX_SP_CS_INSTRLEN,
+      REG_A6XX_HLSQ_CS_CNTL,
+      REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
+      REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
+   },
+};
+
 void
 tu6_emit_xs_config(struct tu_cs *cs,
                    gl_shader_stage stage, /* xs->type, but xs may be NULL */
-                   const struct ir3_shader_variant *xs,
-                   const struct tu_pvtmem_config *pvtmem,
-                   uint64_t binary_iova)
+                   const struct ir3_shader_variant *xs)
 {
-   static const struct xs_config {
-      uint16_t reg_sp_xs_ctrl;
-      uint16_t reg_sp_xs_config;
-      uint16_t reg_hlsq_xs_ctrl;
-      uint16_t reg_sp_xs_first_exec_offset;
-      uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
-   } xs_config[] = {
-      [MESA_SHADER_VERTEX] = {
-         REG_A6XX_SP_VS_CTRL_REG0,
-         REG_A6XX_SP_VS_CONFIG,
-         REG_A6XX_HLSQ_VS_CNTL,
-         REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
-         REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
-      },
-      [MESA_SHADER_TESS_CTRL] = {
-         REG_A6XX_SP_HS_CTRL_REG0,
-         REG_A6XX_SP_HS_CONFIG,
-         REG_A6XX_HLSQ_HS_CNTL,
-         REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
-         REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
-      },
-      [MESA_SHADER_TESS_EVAL] = {
-         REG_A6XX_SP_DS_CTRL_REG0,
-         REG_A6XX_SP_DS_CONFIG,
-         REG_A6XX_HLSQ_DS_CNTL,
-         REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
-         REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
-      },
-      [MESA_SHADER_GEOMETRY] = {
-         REG_A6XX_SP_GS_CTRL_REG0,
-         REG_A6XX_SP_GS_CONFIG,
-         REG_A6XX_HLSQ_GS_CNTL,
-         REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
-         REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
-      },
-      [MESA_SHADER_FRAGMENT] = {
-         REG_A6XX_SP_FS_CTRL_REG0,
-         REG_A6XX_SP_FS_CONFIG,
-         REG_A6XX_HLSQ_FS_CNTL,
-         REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
-         REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
-      },
-      [MESA_SHADER_COMPUTE] = {
-         REG_A6XX_SP_CS_CTRL_REG0,
-         REG_A6XX_SP_CS_CONFIG,
-         REG_A6XX_HLSQ_CS_CNTL,
-         REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
-         REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
-      },
-   };
    const struct xs_config *cfg = &xs_config[stage];
 
    if (!xs) {
@@ -399,6 +405,34 @@ tu6_emit_xs_config(struct tu_cs *cs,
       return;
    }
 
+   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1);
+   tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED |
+                  COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
+                  COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
+                  COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) |
+                  COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
+                  A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
+                  A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp));
+
+   tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
+   tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
+                  A6XX_HLSQ_VS_CNTL_ENABLED);
+}
+
+void
+tu6_emit_xs(struct tu_cs *cs,
+            gl_shader_stage stage, /* xs->type, but xs may be NULL */
+            const struct ir3_shader_variant *xs,
+            const struct tu_pvtmem_config *pvtmem,
+            uint64_t binary_iova)
+{
+   const struct xs_config *cfg = &xs_config[stage];
+
+   if (!xs) {
+      /* shader stage disabled */
+      return;
+   }
+
    enum a6xx_threadsize thrsz =
       xs->info.double_threadsize ? THREAD128 : THREAD64;
    switch (stage) {
@@ -459,20 +493,9 @@ tu6_emit_xs_config(struct tu_cs *cs,
       unreachable("bad shader stage");
    }
 
-   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 2);
-   tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED |
-                  COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
-                  COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
-                  COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) |
-                  COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
-                  A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
-                  A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp));
+   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_instrlen, 1);
    tu_cs_emit(cs, xs->instrlen);
 
-   tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
-   tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
-                  A6XX_HLSQ_VS_CNTL_ENABLED);
-
    /* emit program binary & private memory layout
     * binary_iova should be aligned to 1 instrlen unit (128 bytes)
     */
@@ -578,7 +601,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
          .cs_state = true,
          .cs_ibo = true));
 
-   tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
+   tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v);
+   tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
 
    uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
@@ -1523,6 +1547,26 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
    }
 }
 
+static void
+tu6_emit_program_config(struct tu_cs *cs,
+                        struct tu_pipeline_builder *builder)
+{
+   gl_shader_stage stage = MESA_SHADER_VERTEX;
+
+   STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
+
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .gfx_ibo = true));
+   for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
+      tu6_emit_xs_config(cs, stage, builder->variants[stage]);
+   }
+}
+
 static void
 tu6_emit_program(struct tu_cs *cs,
                  struct tu_pipeline_builder *builder,
@@ -1540,22 +1584,12 @@ tu6_emit_program(struct tu_cs *cs,
       builder->create_info->pTessellationState->patchControlPoints : 0;
    bool multi_pos_output = builder->shaders[MESA_SHADER_VERTEX]->multi_pos_output;
 
-   STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
-
-   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
-         .vs_state = true,
-         .hs_state = true,
-         .ds_state = true,
-         .gs_state = true,
-         .fs_state = true,
-         .gfx_ibo = true));
-
   /* Don't use the binning pass variant when GS is present because we don't
    * support compiling correct binning pass variants with GS.
    */
    if (binning_pass && !gs) {
       vs = bs;
-      tu6_emit_xs_config(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova);
+      tu6_emit_xs(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova);
       stage++;
    }
 
@@ -1565,7 +1599,7 @@ tu6_emit_program(struct tu_cs *cs,
       if (stage == MESA_SHADER_FRAGMENT && binning_pass)
          fs = xs = NULL;
 
-      tu6_emit_xs_config(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]);
+      tu6_emit_xs(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]);
    }
 
    uint32_t multiview_views = util_logbase2(builder->multiview_mask) + 1;
@@ -2456,6 +2490,22 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
                                         struct tu_pipeline *pipeline)
 {
    struct tu_cs prog_cs;
+
+   /* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything
+    * else that could depend on that state (like push constants)
+    *
+    * Note also that this always uses the full VS even in binning pass.  The
+    * binning pass variant has the same const layout as the full VS, and
+    * the constlen for the VS will be the same or greater than the constlen
+    * for the binning pass variant.  It is required that the constlen state
+    * matches between binning and draw passes, as some parts of the push
+    * consts are emitted in state groups that are shared between the binning
+    * and draw passes.
+    */
+   tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
+   tu6_emit_program_config(&prog_cs, builder);
+   pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
+
    tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
    tu6_emit_program(&prog_cs, builder, false, pipeline);
    pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 6517166a935..aa04a651c67 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -498,6 +498,7 @@ enum tu_dynamic_state
 
 enum tu_draw_state_group_id
 {
+   TU_DRAW_STATE_PROGRAM_CONFIG,
    TU_DRAW_STATE_PROGRAM,
    TU_DRAW_STATE_PROGRAM_BINNING,
    TU_DRAW_STATE_TESS,
@@ -1159,6 +1160,7 @@ struct tu_pipeline
 
    struct
    {
+      struct tu_draw_state config_state;
       struct tu_draw_state state;
       struct tu_draw_state binning_state;
 
@@ -1235,9 +1237,14 @@ struct tu_pvtmem_config {
 void
 tu6_emit_xs_config(struct tu_cs *cs,
                    gl_shader_stage stage,
-                   const struct ir3_shader_variant *xs,
-                   const struct tu_pvtmem_config *pvtmem,
-                   uint64_t binary_iova);
+                   const struct ir3_shader_variant *xs);
+
+void
+tu6_emit_xs(struct tu_cs *cs,
+            gl_shader_stage stage,
+            const struct ir3_shader_variant *xs,
+            const struct tu_pvtmem_config *pvtmem,
+            uint64_t binary_iova);
 
 void
 tu6_emit_vpc(struct tu_cs *cs,



More information about the mesa-commit mailing list