Mesa (staging/22.0): radv: fix handling divisor == 0 with dynamic vertex input state

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 27 19:44:51 UTC 2022


Module: Mesa
Branch: staging/22.0
Commit: 7231f624e8a8b5eb04c64609dea9292cef3c0980
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7231f624e8a8b5eb04c64609dea9292cef3c0980

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Mon Apr 18 14:46:09 2022 +0200

radv: fix handling divisor == 0 with dynamic vertex input state

When the divisor is 0, the compiler should generate a different VS
prolog instead of re-using a previous prolog that uses nontrivial
divisors. This is because divisor == 0 and divisor > 1 should use
a different path to guarantee that the index is correctly computed.

Cc: mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16009>
(cherry picked from commit f525706e7725345e187f076118feb3937dfb8c43)

---

 .pick_status.json                |  2 +-
 src/amd/vulkan/radv_cmd_buffer.c | 17 +++++++++++++----
 src/amd/vulkan/radv_shader.h     |  1 +
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index f94377577c2..c6fdeb73d37 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -228,7 +228,7 @@
         "description": "radv: fix handling divisor == 0 with dynamic vertex input state",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "because_sha": null
     },
     {
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index a86414a5971..26b96f3ada9 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2757,8 +2757,9 @@ union vs_prolog_key_header {
       uint32_t misaligned_mask : 1;
       uint32_t post_shuffle : 1;
       uint32_t nontrivial_divisors : 1;
+      uint32_t zero_divisors : 1;
       /* We need this to ensure the padding is zero. It's useful even if it's unused. */
-      uint32_t padding0 : 6;
+      uint32_t padding0 : 5;
    };
    uint32_t v;
 };
@@ -2800,6 +2801,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
    uint32_t attribute_mask = BITFIELD_MASK(num_attributes);
 
    uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask;
+   uint32_t zero_divisors = state->zero_divisors & attribute_mask;
    *nontrivial_divisors = state->nontrivial_divisors & attribute_mask;
    enum chip_class chip = device->physical_device->rad_info.chip_class;
    const uint32_t misaligned_mask = chip == GFX6 || chip >= GFX10 ? cmd_buffer->state.vbo_misaligned_mask : 0;
@@ -2811,7 +2813,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
        !misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) {
       if (!instance_rate_inputs) {
          prolog = device->simple_vs_prologs[num_attributes - 1];
-      } else if (num_attributes <= 16 && !*nontrivial_divisors &&
+      } else if (num_attributes <= 16 && !*nontrivial_divisors && !zero_divisors &&
                  util_bitcount(instance_rate_inputs) ==
                     (util_last_bit(instance_rate_inputs) - ffs(instance_rate_inputs) + 1)) {
          unsigned index = radv_instance_rate_prolog_index(num_attributes, instance_rate_inputs);
@@ -2822,7 +2824,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
       return prolog;
 
    /* if we couldn't use a pre-compiled prolog, find one in the cache or create one */
-   uint32_t key_words[16];
+   uint32_t key_words[17];
    unsigned key_size = 1;
 
    struct radv_vs_prolog_key key;
@@ -2851,6 +2853,10 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
       header.nontrivial_divisors = true;
       key_words[key_size++] = *nontrivial_divisors;
    }
+   if (zero_divisors) {
+      header.zero_divisors = true;
+      key_words[key_size++] = zero_divisors;
+   }
    if (misaligned_mask) {
       header.misaligned_mask = true;
       key_words[key_size++] = misaligned_mask;
@@ -5545,8 +5551,11 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
       if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
          state->instance_rate_inputs |= 1u << loc;
          state->divisors[loc] = binding->divisor;
-         if (binding->divisor != 1)
+         if (binding->divisor == 0) {
+            state->zero_divisors |= 1u << loc;
+         } else if (binding->divisor > 1) {
             state->nontrivial_divisors |= 1u << loc;
+         }
       }
       cmd_buffer->vertex_bindings[attrib->binding].stride = binding->stride;
       state->offsets[loc] = attrib->offset;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 362220e4e50..4c097ecd1a7 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -376,6 +376,7 @@ struct radv_vs_input_state {
 
    uint32_t instance_rate_inputs;
    uint32_t nontrivial_divisors;
+   uint32_t zero_divisors;
    uint32_t post_shuffle;
    /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
     * using bitwise arithmetic.



More information about the mesa-commit mailing list