Mesa (master): turnip: refactor draw states and dynamic states

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 17 15:47:37 UTC 2020


Module: Mesa
Branch: master
Commit: 233610f8cf8d8810173a7eac0c046574aeb485f9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=233610f8cf8d8810173a7eac0c046574aeb485f9

Author: Jonathan Marek <jonathan at marek.ca>
Date:   Sun Jun 14 10:52:37 2020 -0400

turnip: refactor draw states and dynamic states

This reworks dynamic states to use draw states, and reworks draw states.

This moves towards doing as little as possible in bind_draw_states.

Signed-off-by: Jonathan Marek <jonathan at marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5446>

---

 src/freedreno/vulkan/tu_clear_blit.c |  18 +-
 src/freedreno/vulkan/tu_cmd_buffer.c | 457 +++++++++++++++++------------------
 src/freedreno/vulkan/tu_pipeline.c   | 249 +++++++------------
 src/freedreno/vulkan/tu_private.h    | 157 +++++-------
 4 files changed, 381 insertions(+), 500 deletions(-)

diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index 54d12c5bfb0..ab35bc60f14 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -2081,6 +2081,17 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
    /* This clear path behaves like a draw, needs the same flush as tu_draw */
    tu_emit_cache_flush_renderpass(cmd, cs);
 
+   /* disable all draw states so they don't interfere
+    * TODO: use and re-use draw states for this path
+    */
+   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
+   tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
+                     CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+                     CP_SET_DRAW_STATE__0_GROUP_ID(0));
+   tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+   tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+   cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE;
+
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
    tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
                   A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
@@ -2167,13 +2178,6 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
          }
       }
    }
-
-   cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE |
-      TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
-      TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
-      TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |
-      TU_CMD_DIRTY_DYNAMIC_VIEWPORT |
-      TU_CMD_DIRTY_DYNAMIC_SCISSOR;
 }
 
 /**
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index b050550cddb..affd1b3e9db 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -686,6 +686,58 @@ tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1)
                    A6XX_SP_TP_WINDOW_OFFSET(.x = x1, .y = y1));
 }
 
+static void
+tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
+{
+   uint32_t enable_mask;
+   switch (id) {
+   case TU_DRAW_STATE_PROGRAM:
+   case TU_DRAW_STATE_VI:
+   case TU_DRAW_STATE_FS_CONST:
+   /* The blob seems to not enable this (DESC_SETS_LOAD) for binning, even
+    * when resources would actually be used in the binning shader.
+    * Presumably the overhead of prefetching the resources isn't
+    * worth it.
+    */
+   case TU_DRAW_STATE_DESC_SETS_LOAD:
+      enable_mask = CP_SET_DRAW_STATE__0_GMEM |
+                    CP_SET_DRAW_STATE__0_SYSMEM;
+      break;
+   case TU_DRAW_STATE_PROGRAM_BINNING:
+   case TU_DRAW_STATE_VI_BINNING:
+      enable_mask = CP_SET_DRAW_STATE__0_BINNING;
+      break;
+   case TU_DRAW_STATE_DESC_SETS_GMEM:
+      enable_mask = CP_SET_DRAW_STATE__0_GMEM;
+      break;
+   case TU_DRAW_STATE_DESC_SETS_SYSMEM:
+      enable_mask = CP_SET_DRAW_STATE__0_BINNING |
+                    CP_SET_DRAW_STATE__0_SYSMEM;
+      break;
+   default:
+      enable_mask = CP_SET_DRAW_STATE__0_GMEM |
+                    CP_SET_DRAW_STATE__0_SYSMEM |
+                    CP_SET_DRAW_STATE__0_BINNING;
+      break;
+   }
+
+   tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(state.size) |
+                  enable_mask |
+                  CP_SET_DRAW_STATE__0_GROUP_ID(id) |
+                  COND(!state.size, CP_SET_DRAW_STATE__0_DISABLE));
+   tu_cs_emit_qw(cs, state.iova);
+}
+
+/* note: get rid of this eventually */
+static void
+tu_cs_emit_sds_ib(struct tu_cs *cs, uint32_t id, struct tu_cs_entry entry)
+{
+   tu_cs_emit_draw_state(cs, id, (struct tu_draw_state) {
+      .iova = entry.size ? entry.bo->iova + entry.offset : 0,
+      .size = entry.size / 4,
+   });
+}
+
 static bool
 use_hw_binning(struct tu_cmd_buffer *cmd)
 {
@@ -1987,6 +2039,28 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
    return cmd_buffer->record_result;
 }
 
+static struct tu_cs
+tu_cmd_dynamic_state(struct tu_cmd_buffer *cmd, uint32_t id, uint32_t size)
+{
+   struct ts_cs_memory memory;
+   struct tu_cs cs;
+
+   /* TODO: share this logic with tu_pipeline_static_state */
+   tu_cs_alloc(&cmd->sub_cs, size, 1, &memory);
+   tu_cs_init_external(&cs, memory.map, memory.map + size);
+   tu_cs_begin(&cs);
+   tu_cs_reserve_space(&cs, size);
+
+   assert(id < ARRAY_SIZE(cmd->state.dynamic_state));
+   cmd->state.dynamic_state[id].iova = memory.iova;
+   cmd->state.dynamic_state[id].size = size;
+
+   tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
+   tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DYNAMIC + id, cmd->state.dynamic_state[id]);
+
+   return cs;
+}
+
 void
 tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
                    VkPipelineBindPoint pipelineBindPoint,
@@ -2011,7 +2085,23 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
    assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS);
 
    cmd->state.pipeline = pipeline;
-   cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_SHADER_CONSTS;
+   cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
+
+   struct tu_cs *cs = &cmd->draw_cs;
+   uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT);
+   uint32_t i;
+
+   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask)));
+   tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib);
+   tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib);
+   tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib);
+   tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib);
+   tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib);
+   tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib);
+   tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib);
+
+   for_each_bit(i, mask)
+      tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]);
 
    /* If the new pipeline requires more VBs than we had previously set up, we
     * need to re-emit them in SDS.  If it requires the same set or fewer, we
@@ -2023,6 +2113,18 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
    /* If the pipeline needs a dynamic descriptor, re-emit descriptor sets */
    if (pipeline->layout->dynamic_offset_count + pipeline->layout->input_attachment_count)
       cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
+
+   /* dynamic linewidth state depends pipeline state's gras_su_cntl
+    * so the dynamic state ib must be updated when pipeline changes
+    */
+   if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_LINE_WIDTH)) {
+      struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
+
+      cmd->state.dynamic_gras_su_cntl &= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
+      cmd->state.dynamic_gras_su_cntl |= pipeline->gras_su_cntl;
+
+      tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
+   }
 }
 
 void
@@ -2032,10 +2134,11 @@ tu_CmdSetViewport(VkCommandBuffer commandBuffer,
                   const VkViewport *pViewports)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 18);
 
    assert(firstViewport == 0 && viewportCount == 1);
-   cmd->state.dynamic.viewport.viewports[0] = pViewports[0];
-   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_VIEWPORT;
+
+   tu6_emit_viewport(&cs, pViewports);
 }
 
 void
@@ -2045,21 +2148,23 @@ tu_CmdSetScissor(VkCommandBuffer commandBuffer,
                  const VkRect2D *pScissors)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 3);
 
    assert(firstScissor == 0 && scissorCount == 1);
-   cmd->state.dynamic.scissor.scissors[0] = pScissors[0];
-   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_SCISSOR;
+
+   tu6_emit_scissor(&cs, pScissors);
 }
 
 void
 tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
 
-   cmd->state.dynamic.line_width = lineWidth;
+   cmd->state.dynamic_gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
+   cmd->state.dynamic_gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f);
 
-   /* line width depends on VkPipelineRasterizationStateCreateInfo */
-   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
+   tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
 }
 
 void
@@ -2069,12 +2174,9 @@ tu_CmdSetDepthBias(VkCommandBuffer commandBuffer,
                    float depthBiasSlopeFactor)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
-   struct tu_cs *draw_cs = &cmd->draw_cs;
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_DEPTH_BIAS, 4);
 
-   tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp,
-                       depthBiasSlopeFactor);
-
-   tu_cs_sanity_check(draw_cs);
+   tu6_emit_depth_bias(&cs, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
 }
 
 void
@@ -2082,11 +2184,10 @@ tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
                         const float blendConstants[4])
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
-   struct tu_cs *draw_cs = &cmd->draw_cs;
-
-   tu6_emit_blend_constants(draw_cs, blendConstants);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5);
 
-   tu_cs_sanity_check(draw_cs);
+   tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4);
+   tu_cs_emit_array(&cs, (const uint32_t *) blendConstants, 4);
 }
 
 void
@@ -2096,20 +2197,26 @@ tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
 {
 }
 
+static void
+update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask)
+{
+   if (face & VK_STENCIL_FACE_FRONT_BIT)
+      *value |= A6XX_RB_STENCILMASK_MASK(mask);
+   if (face & VK_STENCIL_FACE_BACK_BIT)
+      *value |= A6XX_RB_STENCILMASK_BFMASK(mask);
+}
+
 void
 tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
                             VkStencilFaceFlags faceMask,
                             uint32_t compareMask)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2);
 
-   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-      cmd->state.dynamic.stencil_compare_mask.front = compareMask;
-   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-      cmd->state.dynamic.stencil_compare_mask.back = compareMask;
+   update_stencil_mask(&cmd->state.dynamic_stencil_mask, faceMask, compareMask);
 
-   /* the front/back compare masks must be updated together */
-   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+   tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.dword = cmd->state.dynamic_stencil_mask));
 }
 
 void
@@ -2118,14 +2225,11 @@ tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
                           uint32_t writeMask)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2);
 
-   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-      cmd->state.dynamic.stencil_write_mask.front = writeMask;
-   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-      cmd->state.dynamic.stencil_write_mask.back = writeMask;
+   update_stencil_mask(&cmd->state.dynamic_stencil_wrmask, faceMask, writeMask);
 
-   /* the front/back write masks must be updated together */
-   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+   tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = cmd->state.dynamic_stencil_wrmask));
 }
 
 void
@@ -2134,14 +2238,11 @@ tu_CmdSetStencilReference(VkCommandBuffer commandBuffer,
                           uint32_t reference)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2);
 
-   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-      cmd->state.dynamic.stencil_reference.front = reference;
-   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-      cmd->state.dynamic.stencil_reference.back = reference;
+   update_stencil_mask(&cmd->state.dynamic_stencil_ref, faceMask, reference);
 
-   /* the front/back references must be updated together */
-   cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
+   tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.dword = cmd->state.dynamic_stencil_ref));
 }
 
 void
@@ -2149,8 +2250,11 @@ tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
                             const VkSampleLocationsInfoEXT* pSampleLocationsInfo)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+   struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, 9);
 
-   tu6_emit_sample_locations(&cmd->draw_cs, pSampleLocationsInfo);
+   assert(pSampleLocationsInfo);
+
+   tu6_emit_sample_locations(&cs, pSampleLocationsInfo);
 }
 
 static void
@@ -2578,6 +2682,8 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
       tu_bo_list_add(&cmd->bo_list, iview->image->bo,
                      MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
    }
+
+   cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE;
 }
 
 void
@@ -2701,39 +2807,6 @@ struct tu_draw_info
    uint64_t streamout_buffer_offset;
 };
 
-#define ENABLE_ALL (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
-#define ENABLE_DRAW (CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
-#define ENABLE_NON_GMEM (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_SYSMEM)
-
-enum tu_draw_state_group_id
-{
-   TU_DRAW_STATE_PROGRAM,
-   TU_DRAW_STATE_PROGRAM_BINNING,
-   TU_DRAW_STATE_VB,
-   TU_DRAW_STATE_VI,
-   TU_DRAW_STATE_VI_BINNING,
-   TU_DRAW_STATE_VP,
-   TU_DRAW_STATE_RAST,
-   TU_DRAW_STATE_DS,
-   TU_DRAW_STATE_BLEND,
-   TU_DRAW_STATE_VS_CONST,
-   TU_DRAW_STATE_GS_CONST,
-   TU_DRAW_STATE_FS_CONST,
-   TU_DRAW_STATE_DESC_SETS,
-   TU_DRAW_STATE_DESC_SETS_GMEM,
-   TU_DRAW_STATE_DESC_SETS_LOAD,
-   TU_DRAW_STATE_VS_PARAMS,
-
-   TU_DRAW_STATE_COUNT,
-};
-
-struct tu_draw_state_group
-{
-   enum tu_draw_state_group_id id;
-   uint32_t enable_mask;
-   struct tu_cs_entry ib;
-};
-
 static void
 tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
                      struct tu_descriptor_state *descriptors_state,
@@ -3088,9 +3161,6 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
                      const struct tu_draw_info *draw)
 {
    const struct tu_pipeline *pipeline = cmd->state.pipeline;
-   const struct tu_dynamic_state *dynamic = &cmd->state.dynamic;
-   struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT];
-   uint32_t draw_state_group_count = 0;
    VkResult result;
 
    struct tu_descriptor_state *descriptors_state =
@@ -3102,120 +3172,13 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
                    A6XX_PC_PRIMITIVE_CNTL_0(.primitive_restart =
                                             pipeline->ia.primitive_restart && draw->indexed));
 
-   if (cmd->state.dirty &
-          (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) &&
-       (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) {
-      tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl,
-                            dynamic->line_width);
-   }
-
-   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) &&
-       (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
-      tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front,
-                                    dynamic->stencil_compare_mask.back);
-   }
-
-   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) &&
-       (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
-      tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front,
-                                  dynamic->stencil_write_mask.back);
-   }
-
-   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) &&
-       (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
-      tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front,
-                                 dynamic->stencil_reference.back);
-   }
-
-   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
-       (pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) {
-      tu6_emit_viewport(cs, &cmd->state.dynamic.viewport.viewports[0]);
-   }
-
-   if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_SCISSOR) &&
-       (pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) {
-      tu6_emit_scissor(cs, &cmd->state.dynamic.scissor.scissors[0]);
-   }
-
-   if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) {
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_PROGRAM,
-            .enable_mask = ENABLE_DRAW,
-            .ib = pipeline->program.state_ib,
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_PROGRAM_BINNING,
-            .enable_mask = CP_SET_DRAW_STATE__0_BINNING,
-            .ib = pipeline->program.binning_state_ib,
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_VI,
-            .enable_mask = ENABLE_DRAW,
-            .ib = pipeline->vi.state_ib,
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_VI_BINNING,
-            .enable_mask = CP_SET_DRAW_STATE__0_BINNING,
-            .ib = pipeline->vi.binning_state_ib,
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_VP,
-            .enable_mask = ENABLE_ALL,
-            .ib = pipeline->vp.state_ib,
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_RAST,
-            .enable_mask = ENABLE_ALL,
-            .ib = pipeline->rast.state_ib,
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_DS,
-            .enable_mask = ENABLE_ALL,
-            .ib = pipeline->ds.state_ib,
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_BLEND,
-            .enable_mask = ENABLE_ALL,
-            .ib = pipeline->blend.state_ib,
-         };
-   }
-
    if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_VS_CONST,
-            .enable_mask = ENABLE_ALL,
-            .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX)
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_GS_CONST,
-            .enable_mask = ENABLE_ALL,
-            .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_GEOMETRY)
-         };
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_FS_CONST,
-            .enable_mask = ENABLE_DRAW,
-            .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT)
-         };
-   }
-
-   if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) {
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_VB,
-            .enable_mask = ENABLE_ALL,
-            .ib = tu6_emit_vertex_buffers(cmd, pipeline)
-         };
+      cmd->state.shader_const_ib[MESA_SHADER_VERTEX] =
+         tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX);
+      cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY] =
+         tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_GEOMETRY);
+      cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT] =
+         tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT);
    }
 
    if (cmd->state.dirty & TU_CMD_DIRTY_STREAMOUT_BUFFERS)
@@ -3234,35 +3197,26 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
     * could also only re-emit dynamic state.
     */
    if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
-      struct tu_cs_entry desc_sets, desc_sets_gmem;
       bool need_gmem_desc_set = pipeline->layout->input_attachment_count > 0;
 
       result = tu6_emit_descriptor_sets(cmd, pipeline,
                                         VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                        &desc_sets, false);
+                                        &cmd->state.desc_sets_ib, false);
       if (result != VK_SUCCESS)
          return result;
 
-      draw_state_groups[draw_state_group_count++] =
-         (struct tu_draw_state_group) {
-            .id = TU_DRAW_STATE_DESC_SETS,
-            .enable_mask = need_gmem_desc_set ? ENABLE_NON_GMEM : ENABLE_ALL,
-            .ib = desc_sets,
-         };
-
       if (need_gmem_desc_set) {
+         cmd->state.desc_sets_sysmem_ib = cmd->state.desc_sets_ib;
+         cmd->state.desc_sets_ib.size = 0;
+
          result = tu6_emit_descriptor_sets(cmd, pipeline,
                                            VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                           &desc_sets_gmem, true);
+                                            &cmd->state.desc_sets_gmem_ib, true);
          if (result != VK_SUCCESS)
             return result;
-
-         draw_state_groups[draw_state_group_count++] =
-            (struct tu_draw_state_group) {
-               .id = TU_DRAW_STATE_DESC_SETS_GMEM,
-               .enable_mask = CP_SET_DRAW_STATE__0_GMEM,
-               .ib = desc_sets_gmem,
-            };
+      } else {
+         cmd->state.desc_sets_gmem_ib.size = 0;
+         cmd->state.desc_sets_sysmem_ib.size = 0;
       }
 
       /* We need to reload the descriptors every time the descriptor sets
@@ -3286,52 +3240,79 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
          tu_cs_emit_array(&load_cs,
                           (uint32_t *)((char  *)load_entry->bo->map + load_entry->offset),
                           load_entry->size / 4);
-         struct tu_cs_entry load_copy = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs);
-
-         draw_state_groups[draw_state_group_count++] =
-            (struct tu_draw_state_group) {
-               .id = TU_DRAW_STATE_DESC_SETS_LOAD,
-               /* The blob seems to not enable this for binning, even when
-                * resources would actually be used in the binning shader.
-                * Presumably the overhead of prefetching the resources isn't
-                * worth it.
-                */
-               .enable_mask = ENABLE_DRAW,
-               .ib = load_copy,
-            };
+         cmd->state.desc_sets_load_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs);
+      } else {
+         cmd->state.desc_sets_load_ib.size = 0;
       }
    }
 
+   if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
+      cmd->state.vertex_buffers_ib = tu6_emit_vertex_buffers(cmd, pipeline);
+
    struct tu_cs_entry vs_params;
    result = tu6_emit_vs_params(cmd, draw, &vs_params);
    if (result != VK_SUCCESS)
       return result;
 
-   draw_state_groups[draw_state_group_count++] =
-      (struct tu_draw_state_group) {
-         .id = TU_DRAW_STATE_VS_PARAMS,
-         .enable_mask = ENABLE_ALL,
-         .ib = vs_params,
-      };
-
-   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count);
-   for (uint32_t i = 0; i < draw_state_group_count; i++) {
-      const struct tu_draw_state_group *group = &draw_state_groups[i];
-      debug_assert((group->enable_mask & ~ENABLE_ALL) == 0);
-      uint32_t cp_set_draw_state =
-         CP_SET_DRAW_STATE__0_COUNT(group->ib.size / 4) |
-         group->enable_mask |
-         CP_SET_DRAW_STATE__0_GROUP_ID(group->id);
-      uint64_t iova;
-      if (group->ib.size) {
-         iova = group->ib.bo->iova + group->ib.offset;
-      } else {
-         cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE;
-         iova = 0;
+   /* for the first draw in a renderpass, re-emit all the draw states
+    *
+    * and if a draw-state disabling path (CmdClearAttachments 3D fallback) was
+    * used, then draw states must be re-emitted. note however this only happens
+    * in the sysmem path, so this can be skipped this for the gmem path (TODO)
+    */
+   if (cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE) {
+      tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * TU_DRAW_STATE_COUNT);
+
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
+
+      for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) {
+         tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i,
+                               ((pipeline->dynamic_state_mask & BIT(i)) ?
+                                cmd->state.dynamic_state[i] :
+                                pipeline->dynamic_state[i]));
       }
+   } else {
 
-      tu_cs_emit(cs, cp_set_draw_state);
-      tu_cs_emit_qw(cs, iova);
+      /* emit draw states that were just updated
+       * note we eventually don't want to have to emit anything here
+       */
+      uint32_t draw_state_count =
+         ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 3 : 0) +
+         ((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 4 : 0) +
+         ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
+         1; /* vs_params */
+
+         tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
+
+         if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]);
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
+         }
+         if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib);
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib);
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
+         }
+         if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
+         tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
    }
 
    tu_cs_sanity_check(cs);
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index d818f9f807c..e22b301099b 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -281,36 +281,6 @@ struct tu_pipeline_builder
    uint32_t render_components;
 };
 
-static enum tu_dynamic_state_bits
-tu_dynamic_state_bit(VkDynamicState state)
-{
-   switch (state) {
-   case VK_DYNAMIC_STATE_VIEWPORT:
-      return TU_DYNAMIC_VIEWPORT;
-   case VK_DYNAMIC_STATE_SCISSOR:
-      return TU_DYNAMIC_SCISSOR;
-   case VK_DYNAMIC_STATE_LINE_WIDTH:
-      return TU_DYNAMIC_LINE_WIDTH;
-   case VK_DYNAMIC_STATE_DEPTH_BIAS:
-      return TU_DYNAMIC_DEPTH_BIAS;
-   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
-      return TU_DYNAMIC_BLEND_CONSTANTS;
-   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
-      return TU_DYNAMIC_DEPTH_BOUNDS;
-   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
-      return TU_DYNAMIC_STENCIL_COMPARE_MASK;
-   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
-      return TU_DYNAMIC_STENCIL_WRITE_MASK;
-   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
-      return TU_DYNAMIC_STENCIL_REFERENCE;
-   case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
-      return TU_DYNAMIC_SAMPLE_LOCATIONS;
-   default:
-      unreachable("invalid dynamic state");
-      return 0;
-   }
-}
-
 static bool
 tu_logic_op_reads_dst(VkLogicOp op)
 {
@@ -1645,22 +1615,6 @@ tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp
    tu_cs_emit(cs, sample_locations);
 }
 
-static void
-tu6_emit_gras_unknowns(struct tu_cs *cs)
-{
-   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1);
-   tu_cs_emit(cs, 0x0);
-}
-
-static void
-tu6_emit_point_size(struct tu_cs *cs)
-{
-   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2);
-   tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) |
-                     A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f));
-   tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f).value);
-}
-
 static uint32_t
 tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
                  VkSampleCountFlagBits samples)
@@ -1686,18 +1640,6 @@ tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
    return gras_su_cntl;
 }
 
-void
-tu6_emit_gras_su_cntl(struct tu_cs *cs,
-                      uint32_t gras_su_cntl,
-                      float line_width)
-{
-   assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0);
-   gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f);
-
-   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1);
-   tu_cs_emit(cs, gras_su_cntl);
-}
-
 void
 tu6_emit_depth_bias(struct tu_cs *cs,
                     float constant_factor,
@@ -1710,13 +1652,6 @@ tu6_emit_depth_bias(struct tu_cs *cs,
    tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value);
 }
 
-static void
-tu6_emit_alpha_control_disable(struct tu_cs *cs)
-{
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_ALPHA_CONTROL, 1);
-   tu_cs_emit(cs, 0);
-}
-
 static void
 tu6_emit_depth_control(struct tu_cs *cs,
                        const VkPipelineDepthStencilStateCreateInfo *ds_info,
@@ -1768,30 +1703,6 @@ tu6_emit_stencil_control(struct tu_cs *cs,
    tu_cs_emit(cs, rb_stencil_control);
 }
 
-void
-tu6_emit_stencil_compare_mask(struct tu_cs *cs, uint32_t front, uint32_t back)
-{
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILMASK, 1);
-   tu_cs_emit(
-      cs, A6XX_RB_STENCILMASK_MASK(front) | A6XX_RB_STENCILMASK_BFMASK(back));
-}
-
-void
-tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back)
-{
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILWRMASK, 1);
-   tu_cs_emit(cs, A6XX_RB_STENCILWRMASK_WRMASK(front) |
-                     A6XX_RB_STENCILWRMASK_BFWRMASK(back));
-}
-
-void
-tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back)
-{
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILREF, 1);
-   tu_cs_emit(cs,
-              A6XX_RB_STENCILREF_REF(front) | A6XX_RB_STENCILREF_BFREF(back));
-}
-
 static uint32_t
 tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att,
                          bool has_alpha)
@@ -1912,13 +1823,6 @@ tu6_emit_blend_control(struct tu_cs *cs,
                                       .alpha_to_one = msaa_info->alphaToOneEnable));
 }
 
-void
-tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4])
-{
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4);
-   tu_cs_emit_array(cs, (const uint32_t *) constants, 4);
-}
-
 static VkResult
 tu_pipeline_create(struct tu_device *dev,
                    struct tu_pipeline_layout *layout,
@@ -2095,8 +1999,18 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
       return;
 
    for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
-      pipeline->dynamic_state.mask |=
-         tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]);
+      VkDynamicState state = dynamic_info->pDynamicStates[i];
+      switch (state) {
+      case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+         pipeline->dynamic_state_mask |= BIT(state);
+         break;
+      case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS);
+         break;
+      default:
+         assert(!"unsupported dynamic state");
+         break;
+      }
    }
 }
 
@@ -2186,6 +2100,27 @@ tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder,
    pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable;
 }
 
+static bool
+tu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs,
+                         uint32_t id, uint32_t size)
+{
+   struct ts_cs_memory memory;
+
+   if (pipeline->dynamic_state_mask & BIT(id))
+      return false;
+
+   /* TODO: share this logc with tu_cmd_dynamic_state */
+   tu_cs_alloc(&pipeline->cs, size, 1, &memory);
+   tu_cs_init_external(cs, memory.map, memory.map + size);
+   tu_cs_begin(cs);
+   tu_cs_reserve_space(cs, size);
+
+   assert(id < ARRAY_SIZE(pipeline->dynamic_state));
+   pipeline->dynamic_state[id].iova = memory.iova;
+   pipeline->dynamic_state[id].size = size;
+   return true;
+}
+
 static void
 tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
                                    struct tu_pipeline *pipeline)
@@ -2204,20 +2139,13 @@ tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
    const VkPipelineViewportStateCreateInfo *vp_info =
       builder->create_info->pViewportState;
 
-   struct tu_cs vp_cs;
-   tu_cs_begin_sub_stream(&pipeline->cs, 21, &vp_cs);
-
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) {
-      assert(vp_info->viewportCount == 1);
-      tu6_emit_viewport(&vp_cs, vp_info->pViewports);
-   }
+   struct tu_cs cs;
 
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) {
-      assert(vp_info->scissorCount == 1);
-      tu6_emit_scissor(&vp_cs, vp_info->pScissors);
-   }
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 18))
+      tu6_emit_viewport(&cs, vp_info->pViewports);
 
-   pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs);
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 3))
+      tu6_emit_scissor(&cs, vp_info->pScissors);
 }
 
 static void
@@ -2229,11 +2157,10 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
 
    assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL);
 
-   struct tu_cs rast_cs;
-   tu_cs_begin_sub_stream(&pipeline->cs, 20, &rast_cs);
-
+   struct tu_cs cs;
+   tu_cs_begin_sub_stream(&pipeline->cs, 7, &cs);
 
-   tu_cs_emit_regs(&rast_cs,
+   tu_cs_emit_regs(&cs,
                    A6XX_GRAS_CL_CNTL(
                      .znear_clip_disable = rast_info->depthClampEnable,
                      .zfar_clip_disable = rast_info->depthClampEnable,
@@ -2241,24 +2168,28 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
                      .zero_gb_scale_z = 1,
                      .vp_clip_code_ignore = 1));
    /* move to hw ctx init? */
-   tu6_emit_gras_unknowns(&rast_cs);
-   tu6_emit_point_size(&rast_cs);
+   tu_cs_emit_regs(&cs, A6XX_GRAS_UNKNOWN_8001());
+   tu_cs_emit_regs(&cs,
+                   A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
+                   A6XX_GRAS_SU_POINT_SIZE(1.0f));
+
+   pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
 
-   const uint32_t gras_su_cntl =
+   pipeline->gras_su_cntl =
       tu6_gras_su_cntl(rast_info, builder->samples);
 
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH))
-      tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth);
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_LINE_WIDTH, 2)) {
+      pipeline->gras_su_cntl |=
+         A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
+      tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl));
+   }
 
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) {
-      tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor,
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) {
+      tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor,
                           rast_info->depthBiasClamp,
                           rast_info->depthBiasSlopeFactor);
    }
 
-   pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs);
-
-   pipeline->rast.gras_su_cntl = gras_su_cntl;
 }
 
 static void
@@ -2286,30 +2217,31 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
       builder->depth_attachment_format != VK_FORMAT_S8_UINT
          ? ds_info : &dummy_ds_info;
 
-   struct tu_cs ds_cs;
-   tu_cs_begin_sub_stream(&pipeline->cs, 12, &ds_cs);
+   struct tu_cs cs;
+   tu_cs_begin_sub_stream(&pipeline->cs, 6, &cs);
 
    /* move to hw ctx init? */
-   tu6_emit_alpha_control_disable(&ds_cs);
-
-   tu6_emit_depth_control(&ds_cs, ds_info_depth,
+   tu_cs_emit_regs(&cs, A6XX_RB_ALPHA_CONTROL());
+   tu6_emit_depth_control(&cs, ds_info_depth,
                           builder->create_info->pRasterizationState);
-   tu6_emit_stencil_control(&ds_cs, ds_info);
+   tu6_emit_stencil_control(&cs, ds_info);
 
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
-      tu6_emit_stencil_compare_mask(&ds_cs, ds_info->front.compareMask,
-                                    ds_info->back.compareMask);
-   }
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
-      tu6_emit_stencil_write_mask(&ds_cs, ds_info->front.writeMask,
-                                  ds_info->back.writeMask);
+   pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
+
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) {
+      tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff,
+                                               .bfmask = ds_info->back.compareMask & 0xff));
    }
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
-      tu6_emit_stencil_reference(&ds_cs, ds_info->front.reference,
-                                 ds_info->back.reference);
+
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2)) {
+      tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.wrmask = ds_info->front.writeMask & 0xff,
+                                                 .bfwrmask = ds_info->back.writeMask & 0xff));
    }
 
-   pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &ds_cs);
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2)) {
+      tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.ref = ds_info->front.reference & 0xff,
+                                              .bfref = ds_info->back.reference & 0xff));
+   }
 }
 
 static void
@@ -2342,32 +2274,35 @@ tu_pipeline_builder_parse_multisample_and_color_blend(
       builder->use_color_attachments ? builder->create_info->pColorBlendState
                                      : &dummy_blend_info;
 
-   struct tu_cs blend_cs;
-   tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 18, &blend_cs);
+   struct tu_cs cs;
+   tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 4, &cs);
 
    uint32_t blend_enable_mask;
-   tu6_emit_rb_mrt_controls(&blend_cs, blend_info,
+   tu6_emit_rb_mrt_controls(&cs, blend_info,
                             builder->color_attachment_formats,
                             &blend_enable_mask);
 
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS))
-      tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants);
-
-   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SAMPLE_LOCATIONS)) {
-      const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
-         vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
-      const VkSampleLocationsInfoEXT *samp_loc = NULL;
+   tu6_emit_blend_control(&cs, blend_enable_mask,
+                          builder->use_dual_src_blend, msaa_info);
 
-      if (sample_locations && sample_locations->sampleLocationsEnable)
-         samp_loc = &sample_locations->sampleLocationsInfo;
+   pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
 
-      tu6_emit_sample_locations(&blend_cs, samp_loc);
+   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) {
+      tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4);
+      tu_cs_emit_array(&cs, (const uint32_t *) blend_info->blendConstants, 4);
    }
 
-   tu6_emit_blend_control(&blend_cs, blend_enable_mask,
-                          builder->use_dual_src_blend, msaa_info);
+   const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
+      vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+   const VkSampleLocationsInfoEXT *samp_loc = NULL;
+
+   if (sample_locations && sample_locations->sampleLocationsEnable)
+      samp_loc = &sample_locations->sampleLocationsInfo;
 
-   pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs);
+    if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS,
+                                 samp_loc ? 9 : 6)) {
+      tu6_emit_sample_locations(&cs, samp_loc);
+    }
 }
 
 static void
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 527bf817bbc..ddb25677b48 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -129,6 +129,7 @@ tu_minify(uint32_t n, uint32_t levels)
    })
 
 #define COND(bool, val) ((bool) ? (val) : 0)
+#define BIT(bit) (1u << (bit))
 
 /* Whenever we generate an error, pass it through this function. Useful for
  * debugging, where we can break on it. Only call at error site, not when
@@ -409,6 +410,42 @@ struct ts_cs_memory {
    uint64_t iova;
 };
 
+struct tu_draw_state {
+   uint64_t iova : 48;
+   uint32_t size : 16;
+};
+
+enum tu_dynamic_state
+{
+   /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
+   TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
+   TU_DYNAMIC_STATE_COUNT,
+};
+
+enum tu_draw_state_group_id
+{
+   TU_DRAW_STATE_PROGRAM,
+   TU_DRAW_STATE_PROGRAM_BINNING,
+   TU_DRAW_STATE_VB,
+   TU_DRAW_STATE_VI,
+   TU_DRAW_STATE_VI_BINNING,
+   TU_DRAW_STATE_RAST,
+   TU_DRAW_STATE_DS,
+   TU_DRAW_STATE_BLEND,
+   TU_DRAW_STATE_VS_CONST,
+   TU_DRAW_STATE_GS_CONST,
+   TU_DRAW_STATE_FS_CONST,
+   TU_DRAW_STATE_DESC_SETS,
+   TU_DRAW_STATE_DESC_SETS_GMEM,
+   TU_DRAW_STATE_DESC_SETS_SYSMEM,
+   TU_DRAW_STATE_DESC_SETS_LOAD,
+   TU_DRAW_STATE_VS_PARAMS,
+
+   /* dynamic state related draw states */
+   TU_DRAW_STATE_DYNAMIC,
+   TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,
+};
+
 enum tu_cs_mode
 {
 
@@ -578,73 +615,12 @@ tu_buffer_iova(struct tu_buffer *buffer)
    return buffer->bo->iova + buffer->bo_offset;
 }
 
-enum tu_dynamic_state_bits
-{
-   TU_DYNAMIC_VIEWPORT = 1 << 0,
-   TU_DYNAMIC_SCISSOR = 1 << 1,
-   TU_DYNAMIC_LINE_WIDTH = 1 << 2,
-   TU_DYNAMIC_DEPTH_BIAS = 1 << 3,
-   TU_DYNAMIC_BLEND_CONSTANTS = 1 << 4,
-   TU_DYNAMIC_DEPTH_BOUNDS = 1 << 5,
-   TU_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6,
-   TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
-   TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
-   TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
-   TU_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10,
-   TU_DYNAMIC_ALL = (1 << 11) - 1,
-};
-
 struct tu_vertex_binding
 {
    struct tu_buffer *buffer;
    VkDeviceSize offset;
 };
 
-struct tu_viewport_state
-{
-   uint32_t count;
-   VkViewport viewports[MAX_VIEWPORTS];
-};
-
-struct tu_scissor_state
-{
-   uint32_t count;
-   VkRect2D scissors[MAX_SCISSORS];
-};
-
-struct tu_dynamic_state
-{
-   /**
-    * Bitmask of (1 << VK_DYNAMIC_STATE_*).
-    * Defines the set of saved dynamic state.
-    */
-   uint32_t mask;
-
-   struct tu_viewport_state viewport;
-
-   struct tu_scissor_state scissor;
-
-   float line_width;
-
-   struct
-   {
-      uint32_t front;
-      uint32_t back;
-   } stencil_compare_mask;
-
-   struct
-   {
-      uint32_t front;
-      uint32_t back;
-   } stencil_write_mask;
-
-   struct
-   {
-      uint32_t front;
-      uint32_t back;
-   } stencil_reference;
-};
-
 const char *
 tu_get_debug_option_name(int id);
 
@@ -693,21 +669,14 @@ struct tu_tiling_config
 
 enum tu_cmd_dirty_bits
 {
-   TU_CMD_DIRTY_PIPELINE = 1 << 0,
    TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
    TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2,
-
    TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3,
    TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4,
    TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5,
    TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 6,
-
-   TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 16,
-   TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 17,
-   TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 18,
-   TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 19,
-   TU_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 20,
-   TU_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 21,
+   /* all draw states were disabled and need to be re-enabled: */
+   TU_CMD_DIRTY_DRAW_STATE = 1 << 7,
 };
 
 struct tu_streamout_state {
@@ -842,7 +811,17 @@ struct tu_cmd_state
       VkDeviceSize offsets[MAX_VBS];
    } vb;
 
-   struct tu_dynamic_state dynamic;
+   /* for dynamic states that can't be emitted directly */
+   uint32_t dynamic_stencil_mask;
+   uint32_t dynamic_stencil_wrmask;
+   uint32_t dynamic_stencil_ref;
+   uint32_t dynamic_gras_su_cntl;
+
+   /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
+   struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
+   struct tu_cs_entry vertex_buffers_ib;
+   struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES];
+   struct tu_cs_entry desc_sets_ib, desc_sets_gmem_ib, desc_sets_sysmem_ib, desc_sets_load_ib;
 
    /* Stream output buffers */
    struct
@@ -1106,8 +1085,6 @@ struct tu_pipeline
 {
    struct tu_cs cs;
 
-   struct tu_dynamic_state dynamic_state;
-
    struct tu_pipeline_layout *layout;
 
    bool need_indirect_descriptor_sets;
@@ -1116,6 +1093,15 @@ struct tu_pipeline
 
    struct tu_streamout_state streamout;
 
+   /* mask of enabled dynamic states
+    * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
+    */
+   uint32_t dynamic_state_mask;
+   struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
+
+   /* gras_su_cntl without line width, used for dynamic line width state */
+   uint32_t gras_su_cntl;
+
    struct
    {
       struct tu_bo binary_bo;
@@ -1147,12 +1133,6 @@ struct tu_pipeline
    struct
    {
       struct tu_cs_entry state_ib;
-   } vp;
-
-   struct
-   {
-      uint32_t gras_su_cntl;
-      struct tu_cs_entry state_ib;
    } rast;
 
    struct
@@ -1180,31 +1160,12 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor);
 void
 tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
 
-void
-tu6_emit_gras_su_cntl(struct tu_cs *cs,
-                      uint32_t gras_su_cntl,
-                      float line_width);
-
 void
 tu6_emit_depth_bias(struct tu_cs *cs,
                     float constant_factor,
                     float clamp,
                     float slope_factor);
 
-void
-tu6_emit_stencil_compare_mask(struct tu_cs *cs,
-                              uint32_t front,
-                              uint32_t back);
-
-void
-tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back);
-
-void
-tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back);
-
-void
-tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]);
-
 void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
 
 void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);



More information about the mesa-commit mailing list