Mesa (master): turnip: implement VK_EXT_extended_dynamic_state

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Sep 25 13:12:41 UTC 2020


Module: Mesa
Branch: master
Commit: dcba32bac08abfe71768a8b42e003b9350f3fef9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=dcba32bac08abfe71768a8b42e003b9350f3fef9

Author: Jonathan Marek <jonathan at marek.ca>
Date:   Thu Sep 17 10:16:42 2020 -0400

turnip: implement VK_EXT_extended_dynamic_state

Passes dEQP-VK.pipeline.extended_dynamic_state.*

Signed-off-by: Jonathan Marek <jonathan at marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5641>

---

 src/freedreno/vulkan/tu_cmd_buffer.c  | 299 +++++++++++++++++++++++++++++++---
 src/freedreno/vulkan/tu_device.c      |   5 +
 src/freedreno/vulkan/tu_extensions.py |   1 +
 src/freedreno/vulkan/tu_pipeline.c    | 217 +++++++++++++++---------
 src/freedreno/vulkan/tu_private.h     |  39 +++--
 5 files changed, 452 insertions(+), 109 deletions(-)

diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 1ded67762c6..380ef74eb18 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -464,6 +464,8 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
       break;
    }
 
+   STATIC_ASSERT(TU_DRAW_STATE_COUNT <= 32);
+
    /* We need to reload the descriptors every time the descriptor sets
     * change. However, the commands we send only depend on the pipeline
     * because the whole point is to cache descriptors which are used by the
@@ -1567,6 +1569,19 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
                         uint32_t bindingCount,
                         const VkBuffer *pBuffers,
                         const VkDeviceSize *pOffsets)
+{
+   tu_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding, bindingCount,
+                               pBuffers, pOffsets, NULL, NULL);
+}
+
+void
+tu_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer,
+                            uint32_t firstBinding,
+                            uint32_t bindingCount,
+                            const VkBuffer* pBuffers,
+                            const VkDeviceSize* pOffsets,
+                            const VkDeviceSize* pSizes,
+                            const VkDeviceSize* pStrides)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
    struct tu_cs cs;
@@ -1577,7 +1592,9 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
       struct tu_buffer *buf = tu_buffer_from_handle(pBuffers[i]);
 
       cmd->state.vb[firstBinding + i].base = tu_buffer_iova(buf) + pOffsets[i];
-      cmd->state.vb[firstBinding + i].size = buf->size - pOffsets[i];
+      cmd->state.vb[firstBinding + i].size = pSizes ? pSizes[i] : (buf->size - pOffsets[i]);
+      if (pStrides)
+         cmd->state.vb[firstBinding + i].stride = pStrides[i];
    }
 
    for (uint32_t i = 0; i < MAX_VBS; i++) {
@@ -1588,6 +1605,16 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
    }
 
    cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
+
+   if (pStrides) {
+      cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].iova =
+         tu_cs_draw_state(&cmd->sub_cs, &cs, 2 * MAX_VBS).iova;
+
+      for (uint32_t i = 0; i < MAX_VBS; i++)
+         tu_cs_emit_regs(&cs, A6XX_VFD_FETCH_STRIDE(i, cmd->state.vb[i].stride));
+
+      cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE;
+   }
 }
 
 void
@@ -1985,29 +2012,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
    uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT);
    uint32_t i;
 
-   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask)));
+   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (6 + util_bitcount(mask)));
    tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state);
    tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state);
    tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state);
    tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state);
    tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state);
-   tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DS, pipeline->ds_state);
    tu_cs_emit_draw_state(cs, TU_DRAW_STATE_BLEND, pipeline->blend_state);
+
    for_each_bit(i, mask)
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]);
 
-   /* dynamic linewidth state depends pipeline state's gras_su_cntl
-    * so the dynamic state ib must be updated when pipeline changes
-    */
-   if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_LINE_WIDTH)) {
-      struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
-
-      cmd->state.dynamic_gras_su_cntl &= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
-      cmd->state.dynamic_gras_su_cntl |= pipeline->gras_su_cntl;
-
-      tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
-   }
-
    /* the vertex_buffers draw state always contains all the currently
     * bound vertex buffers. update its size to only emit the vbs which
     * are actually used by the pipeline
@@ -2018,6 +2033,38 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
       cmd->state.vertex_buffers.size = pipeline->num_vbs * 4;
       cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
    }
+
+   if ((pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE)) &&
+       cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size != pipeline->num_vbs * 2) {
+      cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size = pipeline->num_vbs * 2;
+      cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE;
+   }
+
+#define UPDATE_REG(X, Y) {                                           \
+   /* note: would be better to have pipeline bits already masked */  \
+   uint32_t pipeline_bits = pipeline->X & pipeline->X##_mask;        \
+   if ((cmd->state.X & pipeline->X##_mask) != pipeline_bits) {       \
+      cmd->state.X &= ~pipeline->X##_mask;                           \
+      cmd->state.X |= pipeline_bits;                                 \
+      cmd->state.dirty |= TU_CMD_DIRTY_##Y;                          \
+   }                                                                 \
+   if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_##Y)))  \
+      cmd->state.dirty &= ~TU_CMD_DIRTY_##Y;                         \
+}
+
+   /* these registers can have bits set from both pipeline and dynamic state
+    * this updates the bits set by the pipeline
+    * if the pipeline doesn't use a dynamic state for the register, then
+    * the relevant dirty bit is cleared to avoid overriding the non-dynamic
+    * state with a dynamic state the next draw.
+    */
+   UPDATE_REG(gras_su_cntl, GRAS_SU_CNTL);
+   UPDATE_REG(rb_depth_cntl, RB_DEPTH_CNTL);
+   UPDATE_REG(rb_stencil_cntl, RB_STENCIL_CNTL);
+#undef UPDATE_REG
+
+   if (pipeline->rb_depth_cntl_disable)
+      cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
 }
 
 void
@@ -2056,12 +2103,11 @@ void
 tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
 {
    TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
-   struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
 
-   cmd->state.dynamic_gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
-   cmd->state.dynamic_gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f);
+   cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
+   cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f);
 
-   tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
+   cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
 }
 
 void
@@ -2160,6 +2206,178 @@ tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
    tu6_emit_sample_locations(&cs, pSampleLocationsInfo);
 }
 
+void
+tu_CmdSetCullModeEXT(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.gras_su_cntl &=
+      ~(A6XX_GRAS_SU_CNTL_CULL_FRONT | A6XX_GRAS_SU_CNTL_CULL_BACK);
+
+   if (cullMode & VK_CULL_MODE_FRONT_BIT)
+      cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
+   if (cullMode & VK_CULL_MODE_BACK_BIT)
+      cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
+
+   cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
+}
+
+void
+tu_CmdSetFrontFaceEXT(VkCommandBuffer commandBuffer, VkFrontFace frontFace)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_FRONT_CW;
+
+   if (frontFace == VK_FRONT_FACE_CLOCKWISE)
+      cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
+
+   cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
+}
+
+void
+tu_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer,
+                              VkPrimitiveTopology primitiveTopology)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.primtype = tu6_primtype(primitiveTopology);
+}
+
+void
+tu_CmdSetViewportWithCountEXT(VkCommandBuffer commandBuffer,
+                              uint32_t viewportCount,
+                              const VkViewport* pViewports)
+{
+   tu_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
+}
+
+void
+tu_CmdSetScissorWithCountEXT(VkCommandBuffer commandBuffer,
+                             uint32_t scissorCount,
+                             const VkRect2D* pScissors)
+{
+   tu_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
+}
+
+void
+tu_CmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer,
+                            VkBool32 depthTestEnable)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_ENABLE;
+
+   if (depthTestEnable)
+      cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_ENABLE;
+
+   cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
+}
+
+void
+tu_CmdSetDepthWriteEnableEXT(VkCommandBuffer commandBuffer,
+                             VkBool32 depthWriteEnable)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+
+   if (depthWriteEnable)
+      cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+
+   cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
+}
+
+void
+tu_CmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer,
+                           VkCompareOp depthCompareOp)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
+
+   cmd->state.rb_depth_cntl |=
+      A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(depthCompareOp));
+
+   cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
+}
+
+void
+tu_CmdSetDepthBoundsTestEnableEXT(VkCommandBuffer commandBuffer,
+                                  VkBool32 depthBoundsTestEnable)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE;
+
+   if (depthBoundsTestEnable)
+      cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE;
+
+   cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
+}
+
+void
+tu_CmdSetStencilTestEnableEXT(VkCommandBuffer commandBuffer,
+                              VkBool32 stencilTestEnable)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   cmd->state.rb_stencil_cntl &= ~(
+      A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+      A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+      A6XX_RB_STENCIL_CONTROL_STENCIL_READ);
+
+   if (stencilTestEnable) {
+      cmd->state.rb_stencil_cntl |=
+         A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+         A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+         A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
+   }
+
+   cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL;
+}
+
+void
+tu_CmdSetStencilOpEXT(VkCommandBuffer commandBuffer,
+                      VkStencilFaceFlags faceMask,
+                      VkStencilOp failOp,
+                      VkStencilOp passOp,
+                      VkStencilOp depthFailOp,
+                      VkCompareOp compareOp)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
+      cmd->state.rb_stencil_cntl &= ~(
+         A6XX_RB_STENCIL_CONTROL_FUNC__MASK |
+         A6XX_RB_STENCIL_CONTROL_FAIL__MASK |
+         A6XX_RB_STENCIL_CONTROL_ZPASS__MASK |
+         A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK);
+
+      cmd->state.rb_stencil_cntl |=
+         A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(compareOp)) |
+         A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(failOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(passOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(depthFailOp));
+   }
+
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
+      cmd->state.rb_stencil_cntl &= ~(
+         A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK |
+         A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK |
+         A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK |
+         A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK);
+
+      cmd->state.rb_stencil_cntl |=
+         A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(compareOp)) |
+         A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(failOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(passOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(depthFailOp));
+   }
+
+   cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL;
+}
+
 static void
 tu_flush_for_access(struct tu_cache_state *cache,
                     enum tu_cmd_access_mask src_mask,
@@ -2902,6 +3120,30 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
          .tess_upper_left_domain_origin =
                pipeline->tess.upper_left_domain_origin));
 
+   if (cmd->state.dirty & TU_CMD_DIRTY_GRAS_SU_CNTL) {
+      struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2);
+      tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.gras_su_cntl));
+   }
+
+   if (cmd->state.dirty & TU_CMD_DIRTY_RB_DEPTH_CNTL) {
+      struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2);
+      uint32_t rb_depth_cntl = cmd->state.rb_depth_cntl;
+
+      if ((rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_ENABLE) ||
+          (rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE))
+         rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+
+      if (pipeline->rb_depth_cntl_disable)
+         rb_depth_cntl = 0;
+
+      tu_cs_emit_regs(&cs, A6XX_RB_DEPTH_CNTL(.dword = rb_depth_cntl));
+   }
+
+   if (cmd->state.dirty & TU_CMD_DIRTY_RB_STENCIL_CNTL) {
+      struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2);
+      tu_cs_emit_regs(&cs, A6XX_RB_STENCIL_CONTROL(.dword = cmd->state.rb_stencil_cntl));
+   }
+
    if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
       cmd->state.shader_const[MESA_SHADER_VERTEX] =
          tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX);
@@ -2958,7 +3200,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state);
-      tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DS, pipeline->ds_state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_BLEND, pipeline->blend_state);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const[MESA_SHADER_VERTEX]);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_HS_CONST, cmd->state.shader_const[MESA_SHADER_TESS_CTRL]);
@@ -2980,6 +3221,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
       /* emit draw states that were just updated
        * note we eventually don't want to have to emit anything here
        */
+      bool emit_binding_stride = false;
       uint32_t draw_state_count =
          has_tess +
          ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 5 : 0) +
@@ -2987,6 +3229,12 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
          ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
          1; /* vs_params */
 
+      if ((cmd->state.dirty & TU_CMD_DIRTY_VB_STRIDE) &&
+          !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) {
+         emit_binding_stride = true;
+         draw_state_count += 1;
+      }
+
       tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
 
       /* We may need to re-emit tess consts if the current draw call is
@@ -3004,6 +3252,10 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
          tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
       if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
          tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
+      if (emit_binding_stride) {
+         tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_VB_STRIDE,
+                               cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE]);
+      }
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
    }
 
@@ -3021,8 +3273,13 @@ static uint32_t
 tu_draw_initiator(struct tu_cmd_buffer *cmd, enum pc_di_src_sel src_sel)
 {
    const struct tu_pipeline *pipeline = cmd->state.pipeline;
+   enum pc_di_primtype primtype = pipeline->ia.primtype;
+
+   if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY))
+      primtype = cmd->state.primtype;
+
    uint32_t initiator =
-      CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(pipeline->ia.primtype) |
+      CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
       CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(src_sel) |
       CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(cmd->state.index_size) |
       CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY);
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index 9578153ec55..51b32a812d9 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -619,6 +619,11 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
          features->hostQueryReset = true;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
+         VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext;
+         features->extendedDynamicState = true;
+         break;
+      }
       default:
          break;
       }
diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py
index c95167e4cbb..ef874430e66 100644
--- a/src/freedreno/vulkan/tu_extensions.py
+++ b/src/freedreno/vulkan/tu_extensions.py
@@ -95,6 +95,7 @@ EXTENSIONS = [
     Extension('VK_KHR_multiview',                         1, True),
     Extension('VK_EXT_host_query_reset',                  1, True),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
+    Extension('VK_EXT_extended_dynamic_state',            1, True),
 ]
 
 MAX_API_VERSION = VkVersion(MAX_API_VERSION)
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index fe87c279b9c..b1913777ab9 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -1482,7 +1482,8 @@ tu6_emit_program(struct tu_cs *cs,
 }
 
 static void
-tu6_emit_vertex_input(struct tu_cs *cs,
+tu6_emit_vertex_input(struct tu_pipeline *pipeline,
+                      struct tu_cs *cs,
                       const struct ir3_shader_variant *vs,
                       const VkPipelineVertexInputStateCreateInfo *info)
 {
@@ -1494,8 +1495,10 @@ tu6_emit_vertex_input(struct tu_cs *cs,
       const VkVertexInputBindingDescription *binding =
          &info->pVertexBindingDescriptions[i];
 
-      tu_cs_emit_regs(cs,
-                      A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride));
+      if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) {
+         tu_cs_emit_regs(cs,
+                        A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride));
+      }
 
       if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
          binding_instanced |= 1 << binding->binding;
@@ -1717,7 +1720,8 @@ tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
    if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE)
       gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
 
-   /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */
+   gras_su_cntl |=
+      A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
 
    if (rast_info->depthBiasEnable)
       gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
@@ -1746,58 +1750,6 @@ tu6_emit_depth_bias(struct tu_cs *cs,
    tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value);
 }
 
-static void
-tu6_emit_depth_control(struct tu_cs *cs,
-                       const VkPipelineDepthStencilStateCreateInfo *ds_info,
-                       const VkPipelineRasterizationStateCreateInfo *rast_info)
-{
-   uint32_t rb_depth_cntl = 0;
-   if (ds_info->depthTestEnable) {
-      rb_depth_cntl |=
-         A6XX_RB_DEPTH_CNTL_Z_ENABLE |
-         A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
-         A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
-
-      if (rast_info->depthClampEnable)
-         rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE;
-
-      if (ds_info->depthWriteEnable)
-         rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
-   }
-
-   if (ds_info->depthBoundsTestEnable)
-         rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
-
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1);
-   tu_cs_emit(cs, rb_depth_cntl);
-}
-
-static void
-tu6_emit_stencil_control(struct tu_cs *cs,
-                         const VkPipelineDepthStencilStateCreateInfo *ds_info)
-{
-   uint32_t rb_stencil_control = 0;
-   if (ds_info->stencilTestEnable) {
-      const VkStencilOpState *front = &ds_info->front;
-      const VkStencilOpState *back = &ds_info->back;
-      rb_stencil_control |=
-         A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
-         A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
-         A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
-         A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) |
-         A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) |
-         A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) |
-         A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) |
-         A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) |
-         A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) |
-         A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) |
-         A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp));
-   }
-
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_CONTROL, 1);
-   tu_cs_emit(cs, rb_stencil_control);
-}
-
 static uint32_t
 tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att,
                          bool has_alpha)
@@ -2131,15 +2083,72 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
    if (!dynamic_info)
       return;
 
+   pipeline->gras_su_cntl_mask = ~0u;
+   pipeline->rb_depth_cntl_mask = ~0u;
+   pipeline->rb_stencil_cntl_mask = ~0u;
+
    for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
       VkDynamicState state = dynamic_info->pDynamicStates[i];
       switch (state) {
       case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+         if (state == VK_DYNAMIC_STATE_LINE_WIDTH)
+            pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
          pipeline->dynamic_state_mask |= BIT(state);
          break;
       case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
          pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS);
          break;
+      case VK_DYNAMIC_STATE_CULL_MODE_EXT:
+         pipeline->gras_su_cntl_mask &=
+            ~(A6XX_GRAS_SU_CNTL_CULL_BACK | A6XX_GRAS_SU_CNTL_CULL_FRONT);
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
+         break;
+      case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
+         pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_FRONT_CW;
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
+         break;
+      case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY);
+         break;
+      case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VB_STRIDE);
+         break;
+      case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
+         pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_VIEWPORT);
+         break;
+      case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
+         pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_SCISSOR);
+         break;
+      case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
+         pipeline->rb_depth_cntl_mask &=
+            ~(A6XX_RB_DEPTH_CNTL_Z_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE);
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
+         break;
+      case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
+         pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
+         break;
+      case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
+         pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
+         break;
+      case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
+         pipeline->rb_depth_cntl_mask &=
+            ~(A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE);
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
+         break;
+      case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
+         pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                                             A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+                                             A6XX_RB_STENCIL_CONTROL_STENCIL_READ);
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
+         break;
+      case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
+         pipeline->rb_stencil_cntl_mask &= A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                                           A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+                                           A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
+         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
+         break;
       default:
          assert(!"unsupported dynamic state");
          break;
@@ -2203,13 +2212,13 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
    struct tu_cs vi_cs;
    tu_cs_begin_sub_stream(&pipeline->cs,
                           MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
-   tu6_emit_vertex_input(&vi_cs, vs, vi_info);
+   tu6_emit_vertex_input(pipeline, &vi_cs, vs, vi_info);
    pipeline->vi.state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
 
    if (bs) {
       tu_cs_begin_sub_stream(&pipeline->cs,
                              MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
-      tu6_emit_vertex_input(&vi_cs, bs, vi_info);
+      tu6_emit_vertex_input(pipeline, &vi_cs, bs, vi_info);
       pipeline->vi.binning_state =
          tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
    }
@@ -2249,6 +2258,8 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
    if (!tess_info)
       return;
 
+   assert(!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY)));
+
    assert(pipeline->ia.primtype == DI_PT_PATCHES0);
    assert(tess_info->patchControlPoints <= 32);
    pipeline->ia.primtype += tess_info->patchControlPoints;
@@ -2332,11 +2343,8 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
    pipeline->gras_su_cntl =
       tu6_gras_su_cntl(rast_info, builder->samples, builder->multiview_mask != 0);
 
-   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_LINE_WIDTH, 2)) {
-      pipeline->gras_su_cntl |=
-         A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
+   if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2))
       tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl));
-   }
 
    if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) {
       tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor,
@@ -2357,26 +2365,79 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
     *    the pipeline has rasterization disabled or if the subpass of the
     *    render pass the pipeline is created against does not use a
     *    depth/stencil attachment.
-    *
-    * Disable both depth and stencil tests if there is no ds attachment,
-    * Disable depth test if ds attachment is S8_UINT, since S8_UINT defines
-    * only the separate stencil attachment
     */
-   static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info;
    const VkPipelineDepthStencilStateCreateInfo *ds_info =
-      builder->depth_attachment_format != VK_FORMAT_UNDEFINED
-         ? builder->create_info->pDepthStencilState
-         : &dummy_ds_info;
-   const VkPipelineDepthStencilStateCreateInfo *ds_info_depth =
-      builder->depth_attachment_format != VK_FORMAT_S8_UINT
-         ? ds_info : &dummy_ds_info;
-
+      builder->create_info->pDepthStencilState;
+   const VkPipelineRasterizationStateCreateInfo *rast_info =
+      builder->create_info->pRasterizationState;
+   uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0;
    struct tu_cs cs;
-   pipeline->ds_state = tu_cs_draw_state(&pipeline->cs, &cs, 4);
 
-   tu6_emit_depth_control(&cs, ds_info_depth,
-                          builder->create_info->pRasterizationState);
-   tu6_emit_stencil_control(&cs, ds_info);
+   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED &&
+       builder->depth_attachment_format != VK_FORMAT_S8_UINT) {
+      if (ds_info->depthTestEnable) {
+         rb_depth_cntl |=
+            A6XX_RB_DEPTH_CNTL_Z_ENABLE |
+            A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
+            A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
+
+         if (rast_info->depthClampEnable)
+            rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE;
+
+         if (ds_info->depthWriteEnable)
+            rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
+      }
+
+      if (ds_info->depthBoundsTestEnable)
+            rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+   } else {
+      /* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set
+       * to 0 when this pipeline is used, as enabling depth test when there
+       * is no depth attachment is a problem (at least for the S8_UINT case)
+       */
+      if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL))
+         pipeline->rb_depth_cntl_disable = true;
+   }
+
+   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
+      const VkStencilOpState *front = &ds_info->front;
+      const VkStencilOpState *back = &ds_info->back;
+
+      rb_stencil_cntl |=
+         A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) |
+         A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) |
+         A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) |
+         A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) |
+         A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp));
+
+      if (ds_info->stencilTestEnable) {
+         rb_stencil_cntl |=
+            A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+            A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+            A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
+      }
+   }
+
+   if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) {
+      tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1);
+      tu_cs_emit(&cs, rb_depth_cntl);
+   } else {
+      pipeline->rb_depth_cntl = rb_depth_cntl;
+   }
+
+   if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2)) {
+      tu_cs_emit_pkt4(&cs, REG_A6XX_RB_STENCIL_CONTROL, 1);
+      tu_cs_emit(&cs, rb_stencil_cntl);
+   } else {
+      pipeline->rb_stencil_cntl = rb_stencil_cntl;
+   }
+
+   /* the remaining draw states arent used if there is no d/s, leave them empty */
+   if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED)
+      return;
 
    if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
       tu_cs_emit_regs(&cs,
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 61da0ee33ae..1a78c9a7fda 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -460,7 +460,14 @@ enum tu_dynamic_state
 {
    /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
    TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
+   TU_DYNAMIC_STATE_RB_DEPTH_CNTL,
+   TU_DYNAMIC_STATE_RB_STENCIL_CNTL,
+   TU_DYNAMIC_STATE_VB_STRIDE,
    TU_DYNAMIC_STATE_COUNT,
+   /* no associated draw state: */
+   TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT,
+   /* re-use the line width enum as it uses GRAS_SU_CNTL: */
+   TU_DYNAMIC_STATE_GRAS_SU_CNTL = VK_DYNAMIC_STATE_LINE_WIDTH,
 };
 
 enum tu_draw_state_group_id
@@ -472,7 +479,6 @@ enum tu_draw_state_group_id
    TU_DRAW_STATE_VI,
    TU_DRAW_STATE_VI_BINNING,
    TU_DRAW_STATE_RAST,
-   TU_DRAW_STATE_DS,
    TU_DRAW_STATE_BLEND,
    TU_DRAW_STATE_VS_CONST,
    TU_DRAW_STATE_HS_CONST,
@@ -681,12 +687,18 @@ struct tu_descriptor_state
 
 enum tu_cmd_dirty_bits
 {
-   TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2,
-   TU_CMD_DIRTY_DESC_SETS_LOAD = 1 << 3,
-   TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = 1 << 4,
-   TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5,
+   TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),
+   TU_CMD_DIRTY_VB_STRIDE = BIT(1),
+   TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2),
+   TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),
+   TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),
+   TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),
+   TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),
+   TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),
    /* all draw states were disabled and need to be re-enabled: */
-   TU_CMD_DIRTY_DRAW_STATE = 1 << 7,
+   TU_CMD_DIRTY_DRAW_STATE = BIT(8)
+
+
 };
 
 /* There are only three cache domains we have to care about: the CCU, or
@@ -852,6 +864,7 @@ struct tu_cmd_state
    struct {
       uint64_t base;
       uint32_t size;
+      uint32_t stride;
    } vb[MAX_VBS];
    VkViewport viewport[MAX_VIEWPORTS];
    VkRect2D scissor[MAX_SCISSORS];
@@ -861,7 +874,9 @@ struct tu_cmd_state
    uint32_t dynamic_stencil_mask;
    uint32_t dynamic_stencil_wrmask;
    uint32_t dynamic_stencil_ref;
-   uint32_t dynamic_gras_su_cntl;
+
+   uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;
+   enum pc_di_primtype primtype;
 
    /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
    struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
@@ -1062,11 +1077,15 @@ struct tu_pipeline
    uint32_t dynamic_state_mask;
    struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
 
-   /* gras_su_cntl without line width, used for dynamic line width state */
-   uint32_t gras_su_cntl;
+   /* for dynamic states which use the same register: */
+   uint32_t gras_su_cntl, gras_su_cntl_mask;
+   uint32_t rb_depth_cntl, rb_depth_cntl_mask;
+   uint32_t rb_stencil_cntl, rb_stencil_cntl_mask;
+
+   bool rb_depth_cntl_disable;
 
    /* draw states for the pipeline */
-   struct tu_draw_state load_state, rast_state, ds_state, blend_state;
+   struct tu_draw_state load_state, rast_state, blend_state;
 
    /* for vertex buffers state */
    uint32_t num_vbs;



More information about the mesa-commit mailing list