Mesa (main): aco: remove radv vs prolog key from aco internals.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed May 11 19:36:12 UTC 2022


Module: Mesa
Branch: main
Commit: c44d5d61ce7251ea935723ef888f916f69cab9a1
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c44d5d61ce7251ea935723ef888f916f69cab9a1

Author: Dave Airlie <airlied at redhat.com>
Date:   Thu May  5 14:27:01 2022 +1000

aco: remove radv vs prolog key from aco internals.

This creates an aco specific key, and converts radv to it.

Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16342>

---

 src/amd/compiler/aco_instruction_selection.cpp | 34 +++++++++++++-------------
 src/amd/compiler/aco_interface.cpp             |  2 +-
 src/amd/compiler/aco_interface.h               |  3 ++-
 src/amd/compiler/aco_ir.h                      |  3 +--
 src/amd/compiler/aco_shader_info.h             | 23 +++++++++++++++++
 src/amd/vulkan/radv_aco_shader_info.h          | 22 +++++++++++++++++
 src/amd/vulkan/radv_shader.c                   |  4 ++-
 7 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index d948ba1b0b7..bbbe0922c60 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11683,7 +11683,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, u
 }
 
 void
-select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shader_config* config,
+select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shader_config* config,
                  const struct radv_nir_compiler_options* options,
                  const struct aco_shader_info* info,
                  const struct radv_shader_args* args, unsigned* num_preserved_sgprs)
@@ -11710,7 +11710,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
    bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
 
    uint32_t attrib_mask = BITFIELD_MASK(key->num_attributes);
-   bool has_nontrivial_divisors = key->state->nontrivial_divisors & attrib_mask;
+   bool has_nontrivial_divisors = key->state.nontrivial_divisors & attrib_mask;
 
    wait_imm lgkm_imm;
    lgkm_imm.lgkm = 0;
@@ -11769,12 +11769,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
 
          bool needs_instance_index = false;
          bool needs_start_instance = false;
-         u_foreach_bit(i, key->state->instance_rate_inputs & attrib_mask)
+         u_foreach_bit(i, key->state.instance_rate_inputs & attrib_mask)
          {
-            needs_instance_index |= key->state->divisors[i] == 1;
-            needs_start_instance |= key->state->divisors[i] == 0;
+            needs_instance_index |= key->state.divisors[i] == 1;
+            needs_start_instance |= key->state.divisors[i] == 0;
          }
-         bool needs_vertex_index = ~key->state->instance_rate_inputs & attrib_mask;
+         bool needs_vertex_index = ~key->state.instance_rate_inputs & attrib_mask;
          if (needs_vertex_index)
             bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->ac.base_vertex),
                        get_arg_fixed(args, args->ac.vertex_id), false, Operand(s2), true);
@@ -11792,13 +11792,13 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
 
          /* calculate index */
          Operand fetch_index = Operand(vertex_index, v1);
-         if (key->state->instance_rate_inputs & (1u << loc)) {
-            uint32_t divisor = key->state->divisors[loc];
+         if (key->state.instance_rate_inputs & (1u << loc)) {
+            uint32_t divisor = key->state.divisors[loc];
             if (divisor) {
                fetch_index = instance_id;
-               if (key->state->nontrivial_divisors & (1u << loc)) {
+               if (key->state.nontrivial_divisors & (1u << loc)) {
                   unsigned index =
-                     util_bitcount(key->state->nontrivial_divisors & BITFIELD_MASK(loc));
+                     util_bitcount(key->state.nontrivial_divisors & BITFIELD_MASK(loc));
                   fetch_index = calc_nontrivial_instance_id(
                      bld, args, index, instance_id, start_instance, prolog_input,
                      nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1);
@@ -11813,11 +11813,11 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
          /* perform load */
          PhysReg cur_desc = desc.advance(i * 16);
          if ((key->misaligned_mask & (1u << loc))) {
-            unsigned dfmt = key->state->formats[loc] & 0xf;
-            unsigned nfmt = key->state->formats[loc] >> 4;
+            unsigned dfmt = key->state.formats[loc] & 0xf;
+            unsigned nfmt = key->state.formats[loc] >> 4;
             const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt);
             for (unsigned j = 0; j < vtx_info->num_channels; j++) {
-               bool post_shuffle = key->state->post_shuffle & (1u << loc);
+               bool post_shuffle = key->state.post_shuffle & (1u << loc);
                unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j < 3 ? 2 - j : j);
 
                /* Use MUBUF to workaround hangs for byte-aligned dword loads. The Vulkan spec
@@ -11849,7 +11849,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
       }
    }
 
-   if (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi) {
+   if (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi) {
       wait_imm vm_imm;
       vm_imm.vm = 0;
       bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->chip_class));
@@ -11857,12 +11857,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
 
    /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
     * so we may need to fix it up. */
-   u_foreach_bit(loc, (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi))
+   u_foreach_bit(loc, (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi))
    {
       PhysReg alpha(attributes_start.reg() + loc * 4u + 3);
 
-      unsigned alpha_adjust = (key->state->alpha_adjust_lo >> loc) & 0x1;
-      alpha_adjust |= ((key->state->alpha_adjust_hi >> loc) & 0x1) << 1;
+      unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1;
+      alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1;
 
       if (alpha_adjust == ALPHA_ADJUST_SSCALED)
          bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1));
diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp
index 8077d590855..a2f85f64d73 100644
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@@ -281,7 +281,7 @@ aco_compile_shader(const struct radv_nir_compiler_options* options,
 void
 aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
                       const struct aco_shader_info* info,
-                      const struct radv_vs_prolog_key* key,
+                      const struct aco_vs_prolog_key* key,
                       const struct radv_shader_args* args,
                       struct radv_prolog_binary** binary)
 {
diff --git a/src/amd/compiler/aco_interface.h b/src/amd/compiler/aco_interface.h
index 02a19c03dc9..5f293beae56 100644
--- a/src/amd/compiler/aco_interface.h
+++ b/src/amd/compiler/aco_interface.h
@@ -33,6 +33,7 @@ extern "C" {
 
 struct ac_shader_config;
 struct aco_shader_info;
+struct aco_vs_prolog_key;
 
 struct aco_compiler_statistic_info {
    char name[32];
@@ -50,7 +51,7 @@ void aco_compile_shader(const struct radv_nir_compiler_options* options,
 
 void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
                            const struct aco_shader_info* info,
-                           const struct radv_vs_prolog_key* key,
+                           const struct aco_vs_prolog_key* key,
                            const struct radv_shader_args* args,
                            struct radv_prolog_binary** binary);
 
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index e676068db61..a6dbbe0d902 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -38,7 +38,6 @@
 #include <vector>
 
 struct radv_shader_args;
-struct radv_vs_prolog_key;
 
 namespace aco {
 
@@ -2168,7 +2167,7 @@ void select_trap_handler_shader(Program* program, struct nir_shader* shader,
                                 const struct radv_nir_compiler_options* options,
                                 const struct aco_shader_info* info,
                                 const struct radv_shader_args* args);
-void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key,
+void select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key,
                       ac_shader_config* config,
                       const struct radv_nir_compiler_options* options,
                       const struct aco_shader_info* info,
diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h
index 83a1c304499..61969ac4dad 100644
--- a/src/amd/compiler/aco_shader_info.h
+++ b/src/amd/compiler/aco_shader_info.h
@@ -35,6 +35,29 @@ extern "C" {
 
 #define ACO_MAX_SO_OUTPUTS 64
 #define ACO_MAX_SO_BUFFERS 4
+#define ACO_MAX_VERTEX_ATTRIBS 32
+
+struct aco_vs_input_state {
+   uint32_t instance_rate_inputs;
+   uint32_t nontrivial_divisors;
+   uint32_t post_shuffle;
+   /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
+    * using bitwise arithmetic.
+    */
+   uint32_t alpha_adjust_lo;
+   uint32_t alpha_adjust_hi;
+
+   uint32_t divisors[ACO_MAX_VERTEX_ATTRIBS];
+   uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
+};
+
+struct aco_vs_prolog_key {
+   struct aco_vs_input_state state;
+   unsigned num_attributes;
+   uint32_t misaligned_mask;
+   bool is_ngg;
+   gl_shader_stage next_stage;
+};
 
 struct aco_vp_output_info {
    uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h
index c69c69a86de..87f84f79a3e 100644
--- a/src/amd/vulkan/radv_aco_shader_info.h
+++ b/src/amd/vulkan/radv_aco_shader_info.h
@@ -103,8 +103,30 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info,
    radv_aco_convert_shader_so_info(aco_info, radv);
    aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size;
 }
+
+#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x
+#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state->x, sizeof(radv->state->x))
+static inline void
+radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_key *aco_info,
+			       const struct radv_vs_prolog_key *radv)
+{
+   ASSIGN_VS_STATE_FIELD(instance_rate_inputs);
+   ASSIGN_VS_STATE_FIELD(nontrivial_divisors);
+   ASSIGN_VS_STATE_FIELD(post_shuffle);
+   ASSIGN_VS_STATE_FIELD(alpha_adjust_lo);
+   ASSIGN_VS_STATE_FIELD(alpha_adjust_hi);
+   ASSIGN_VS_STATE_FIELD_CP(divisors);
+   ASSIGN_VS_STATE_FIELD_CP(formats);
+   ASSIGN_FIELD(num_attributes);
+   ASSIGN_FIELD(misaligned_mask);
+   ASSIGN_FIELD(is_ngg);
+   ASSIGN_FIELD(next_stage);
+}
+#undef ASSIGN_VS_STATE_FIELD
+#undef ASSIGN_VS_STATE_FIELD_CP
 #undef ASSIGN_FIELD
 #undef ASSIGN_FIELD_CP
 #undef ASSIGN_OUTINFO
 
+
 #endif
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index e17d6ff28e9..460fa0e0b7f 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -2186,8 +2186,10 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
 
    struct radv_prolog_binary *binary = NULL;
    struct aco_shader_info ac_info;
+   struct aco_vs_prolog_key ac_key;
    radv_aco_convert_shader_info(&ac_info, &info);
-   aco_compile_vs_prolog(&options, &ac_info, key, &args, &binary);
+   radv_aco_convert_vs_prolog_key(&ac_key, key);
+   aco_compile_vs_prolog(&options, &ac_info, &ac_key, &args, &binary);
    struct radv_shader_prolog *prolog = upload_vs_prolog(device, binary, info.wave_size);
    if (prolog) {
       prolog->nontrivial_divisors = key->state->nontrivial_divisors;



More information about the mesa-commit mailing list