Mesa (main): radv: don't update misaligned_mask in CmdBindVertexBuffers2

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jul 19 21:03:58 UTC 2022


Module: Mesa
Branch: main
Commit: 0fb3aa005f8a1c52a6e33001a94455181b673f33
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fb3aa005f8a1c52a6e33001a94455181b673f33

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Jul 13 16:43:44 2022 +0100

radv: don't update misaligned_mask in CmdBindVertexBuffers2

This can't do it in the loop because it doesn't easily know what
attributes use a binding.

We could do it in a separate loop, but there's no point, especially since
zink does CmdSetVertexInputEXT() after CmdBindVertexBuffers2().

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Fixes: c335a4d70ed ("radv: dynamically calculate misaligned_mask for dynamic vertex input")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17521>

---

 src/amd/vulkan/radv_cmd_buffer.c | 86 ++++++++++++++++++++++------------------
 src/amd/vulkan/radv_private.h    |  1 +
 src/amd/vulkan/radv_shader.h     |  4 +-
 3 files changed, 51 insertions(+), 40 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 91e4932212d..e17c7b230e1 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2875,8 +2875,24 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
    uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask;
    uint32_t zero_divisors = state->zero_divisors & attribute_mask;
    *nontrivial_divisors = state->nontrivial_divisors & attribute_mask;
-   enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
-   const uint32_t misaligned_mask = chip == GFX6 || chip >= GFX10 ? cmd_buffer->state.vbo_misaligned_mask : 0;
+   uint32_t misaligned_mask = cmd_buffer->state.vbo_misaligned_mask;
+   if (cmd_buffer->state.vbo_misaligned_mask_invalid) {
+      assert(device->physical_device->rad_info.gfx_level == GFX6 ||
+             device->physical_device->rad_info.gfx_level >= GFX10);
+
+      u_foreach_bit (index, cmd_buffer->state.vbo_misaligned_mask_invalid & attribute_mask) {
+         uint8_t binding = state->bindings[index];
+         if (!(cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(binding)))
+            continue;
+         uint8_t req = state->format_align_req_minus_1[index];
+         struct radv_vertex_binding *vb = &cmd_buffer->vertex_bindings[binding];
+         VkDeviceSize offset = vb->offset + state->offsets[index];
+         if ((offset & req) || (vb->stride & req))
+            misaligned_mask |= BITFIELD_BIT(index);
+      }
+      cmd_buffer->state.vbo_misaligned_mask = misaligned_mask;
+      cmd_buffer->state.vbo_misaligned_mask_invalid &= ~attribute_mask;
+   }
 
    /* try to use a pre-compiled prolog first */
    struct radv_shader_part *prolog = NULL;
@@ -4849,50 +4865,45 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
     * stride from the pipeline. */
 
    assert(firstBinding + bindingCount <= MAX_VBS);
-   cmd_buffer->state.vbo_misaligned_mask = state->misaligned_mask;
    enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
 
    if (firstBinding + bindingCount > cmd_buffer->used_vertex_bindings)
       cmd_buffer->used_vertex_bindings = firstBinding + bindingCount;
 
+   uint32_t misaligned_mask_invalid = 0;
+
    for (uint32_t i = 0; i < bindingCount; i++) {
       RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
       uint32_t idx = firstBinding + i;
       VkDeviceSize size = pSizes ? pSizes[i] : 0;
-      VkDeviceSize stride = pStrides ? pStrides[i] : 0;
+      /* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */
+      VkDeviceSize stride = pStrides ? pStrides[i] : vb[idx].stride;
+
+      if (!!cmd_buffer->vertex_binding_buffers[idx] != !!buffer ||
+          (buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) ||
+                      (vb[idx].stride & 0x3) != (stride & 0x3)))) {
+         misaligned_mask_invalid |= state->bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff;
+      }
 
       cmd_buffer->vertex_binding_buffers[idx] = buffer;
       vb[idx].offset = pOffsets[i];
       vb[idx].size = size;
+      vb[idx].stride = stride;
 
-      if (chip == GFX6 || chip >= GFX10) {
-         const uint32_t bit = 1u << idx;
-         if (!buffer) {
-            cmd_buffer->state.vbo_misaligned_mask &= ~bit;
-            cmd_buffer->state.vbo_bound_mask &= ~bit;
-         } else {
-            cmd_buffer->state.vbo_bound_mask |= bit;
-            if (pStrides && vb[idx].stride != stride) {
-               if (stride & state->format_align_req_minus_1[idx])
-                  cmd_buffer->state.vbo_misaligned_mask |= bit;
-               else
-                  cmd_buffer->state.vbo_misaligned_mask &= ~bit;
-            }
-            if (state->possibly_misaligned_mask & bit &&
-                (vb[idx].offset + state->offsets[idx]) & state->format_align_req_minus_1[idx])
-               cmd_buffer->state.vbo_misaligned_mask |= bit;
-         }
-      }
-
-      /* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */
-      if (pStrides)
-         vb[idx].stride = stride;
-
+      uint32_t bit = BITFIELD_BIT(idx);
       if (buffer) {
          radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->vertex_binding_buffers[idx]->bo);
+         cmd_buffer->state.vbo_bound_mask |= bit;
+      } else {
+         cmd_buffer->state.vbo_bound_mask &= ~bit;
       }
    }
 
+   if ((chip == GFX6 || chip >= GFX10) && misaligned_mask_invalid) {
+      cmd_buffer->state.vbo_misaligned_mask_invalid = misaligned_mask_invalid;
+      cmd_buffer->state.vbo_misaligned_mask &= ~misaligned_mask_invalid;
+   }
+
    cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER |
                               RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
 }
@@ -5797,8 +5808,10 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
       bindings[pVertexBindingDescriptions[i].binding] = &pVertexBindingDescriptions[i];
 
    cmd_buffer->state.vbo_misaligned_mask = 0;
+   cmd_buffer->state.vbo_misaligned_mask_invalid = 0;
 
    memset(state, 0, sizeof(*state));
+   state->bindings_match_attrib = true;
 
    enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
    for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) {
@@ -5808,6 +5821,8 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
 
       state->attribute_mask |= 1u << loc;
       state->bindings[loc] = attrib->binding;
+      if (attrib->binding != loc)
+         state->bindings_match_attrib = false;
       if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
          state->instance_rate_inputs |= 1u << loc;
          state->divisors[loc] = binding->divisor;
@@ -5858,18 +5873,13 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
       if (found->post_shuffle)
          state->post_shuffle |= 1u << loc;
 
-      if (chip == GFX6 || chip >= GFX10) {
-         const struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
-         unsigned bit = 1u << loc;
+      if ((chip == GFX6 || chip >= GFX10) &&
+          cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding)) {
          if (binding->stride & found->fmt_align_req_minus_1) {
-            state->misaligned_mask |= bit;
-            if (cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding))
-               cmd_buffer->state.vbo_misaligned_mask |= bit;
-         } else {
-            state->possibly_misaligned_mask |= bit;
-            if (cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding) &&
-                ((vb[attrib->binding].offset + state->offsets[loc]) & found->fmt_align_req_minus_1))
-               cmd_buffer->state.vbo_misaligned_mask |= bit;
+            cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc);
+         } else if ((cmd_buffer->vertex_bindings[attrib->binding].offset + state->offsets[loc]) &
+                    found->fmt_align_req_minus_1) {
+            cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc);
          }
       }
    }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a6a5c645728..ca1df44922c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1539,6 +1539,7 @@ struct radv_cmd_state {
    uint32_t *emitted_vs_prolog_key;
    uint32_t emitted_vs_prolog_key_hash;
    uint32_t vbo_misaligned_mask;
+   uint32_t vbo_misaligned_mask_invalid;
    uint32_t vbo_bound_mask;
 
    /* Whether the cmdbuffer owns the current render pass rather than the app. */
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index ad49a51bf02..693f72b1d06 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -380,8 +380,6 @@ struct radv_shader_info {
 
 struct radv_vs_input_state {
    uint32_t attribute_mask;
-   uint32_t misaligned_mask;
-   uint32_t possibly_misaligned_mask;
 
    uint32_t instance_rate_inputs;
    uint32_t nontrivial_divisors;
@@ -399,6 +397,8 @@ struct radv_vs_input_state {
    uint8_t formats[MAX_VERTEX_ATTRIBS];
    uint8_t format_align_req_minus_1[MAX_VERTEX_ATTRIBS];
    uint8_t format_sizes[MAX_VERTEX_ATTRIBS];
+
+   bool bindings_match_attrib;
 };
 
 struct radv_vs_prolog_key {



More information about the mesa-commit mailing list