Mesa (main): aco: remove radv vs prolog key from aco internals.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed May 11 19:36:12 UTC 2022
Module: Mesa
Branch: main
Commit: c44d5d61ce7251ea935723ef888f916f69cab9a1
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c44d5d61ce7251ea935723ef888f916f69cab9a1
Author: Dave Airlie <airlied at redhat.com>
Date: Thu May 5 14:27:01 2022 +1000
aco: remove radv vs prolog key from aco internals.
This creates an aco specific key, and converts radv to it.
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16342>
---
src/amd/compiler/aco_instruction_selection.cpp | 34 +++++++++++++-------------
src/amd/compiler/aco_interface.cpp | 2 +-
src/amd/compiler/aco_interface.h | 3 ++-
src/amd/compiler/aco_ir.h | 3 +--
src/amd/compiler/aco_shader_info.h | 23 +++++++++++++++++
src/amd/vulkan/radv_aco_shader_info.h | 22 +++++++++++++++++
src/amd/vulkan/radv_shader.c | 4 ++-
7 files changed, 69 insertions(+), 22 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index d948ba1b0b7..bbbe0922c60 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11683,7 +11683,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, u
}
void
-select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shader_config* config,
+select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
const struct radv_shader_args* args, unsigned* num_preserved_sgprs)
@@ -11710,7 +11710,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
uint32_t attrib_mask = BITFIELD_MASK(key->num_attributes);
- bool has_nontrivial_divisors = key->state->nontrivial_divisors & attrib_mask;
+ bool has_nontrivial_divisors = key->state.nontrivial_divisors & attrib_mask;
wait_imm lgkm_imm;
lgkm_imm.lgkm = 0;
@@ -11769,12 +11769,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
bool needs_instance_index = false;
bool needs_start_instance = false;
- u_foreach_bit(i, key->state->instance_rate_inputs & attrib_mask)
+ u_foreach_bit(i, key->state.instance_rate_inputs & attrib_mask)
{
- needs_instance_index |= key->state->divisors[i] == 1;
- needs_start_instance |= key->state->divisors[i] == 0;
+ needs_instance_index |= key->state.divisors[i] == 1;
+ needs_start_instance |= key->state.divisors[i] == 0;
}
- bool needs_vertex_index = ~key->state->instance_rate_inputs & attrib_mask;
+ bool needs_vertex_index = ~key->state.instance_rate_inputs & attrib_mask;
if (needs_vertex_index)
bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->ac.base_vertex),
get_arg_fixed(args, args->ac.vertex_id), false, Operand(s2), true);
@@ -11792,13 +11792,13 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* calculate index */
Operand fetch_index = Operand(vertex_index, v1);
- if (key->state->instance_rate_inputs & (1u << loc)) {
- uint32_t divisor = key->state->divisors[loc];
+ if (key->state.instance_rate_inputs & (1u << loc)) {
+ uint32_t divisor = key->state.divisors[loc];
if (divisor) {
fetch_index = instance_id;
- if (key->state->nontrivial_divisors & (1u << loc)) {
+ if (key->state.nontrivial_divisors & (1u << loc)) {
unsigned index =
- util_bitcount(key->state->nontrivial_divisors & BITFIELD_MASK(loc));
+ util_bitcount(key->state.nontrivial_divisors & BITFIELD_MASK(loc));
fetch_index = calc_nontrivial_instance_id(
bld, args, index, instance_id, start_instance, prolog_input,
nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1);
@@ -11813,11 +11813,11 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* perform load */
PhysReg cur_desc = desc.advance(i * 16);
if ((key->misaligned_mask & (1u << loc))) {
- unsigned dfmt = key->state->formats[loc] & 0xf;
- unsigned nfmt = key->state->formats[loc] >> 4;
+ unsigned dfmt = key->state.formats[loc] & 0xf;
+ unsigned nfmt = key->state.formats[loc] >> 4;
const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt);
for (unsigned j = 0; j < vtx_info->num_channels; j++) {
- bool post_shuffle = key->state->post_shuffle & (1u << loc);
+ bool post_shuffle = key->state.post_shuffle & (1u << loc);
unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j < 3 ? 2 - j : j);
/* Use MUBUF to workaround hangs for byte-aligned dword loads. The Vulkan spec
@@ -11849,7 +11849,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
}
}
- if (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi) {
+ if (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi) {
wait_imm vm_imm;
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->chip_class));
@@ -11857,12 +11857,12 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */
- u_foreach_bit(loc, (key->state->alpha_adjust_lo | key->state->alpha_adjust_hi))
+ u_foreach_bit(loc, (key->state.alpha_adjust_lo | key->state.alpha_adjust_hi))
{
PhysReg alpha(attributes_start.reg() + loc * 4u + 3);
- unsigned alpha_adjust = (key->state->alpha_adjust_lo >> loc) & 0x1;
- alpha_adjust |= ((key->state->alpha_adjust_hi >> loc) & 0x1) << 1;
+ unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1;
+ alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1;
if (alpha_adjust == ALPHA_ADJUST_SSCALED)
bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1));
diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp
index 8077d590855..a2f85f64d73 100644
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@@ -281,7 +281,7 @@ aco_compile_shader(const struct radv_nir_compiler_options* options,
void
aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
- const struct radv_vs_prolog_key* key,
+ const struct aco_vs_prolog_key* key,
const struct radv_shader_args* args,
struct radv_prolog_binary** binary)
{
diff --git a/src/amd/compiler/aco_interface.h b/src/amd/compiler/aco_interface.h
index 02a19c03dc9..5f293beae56 100644
--- a/src/amd/compiler/aco_interface.h
+++ b/src/amd/compiler/aco_interface.h
@@ -33,6 +33,7 @@ extern "C" {
struct ac_shader_config;
struct aco_shader_info;
+struct aco_vs_prolog_key;
struct aco_compiler_statistic_info {
char name[32];
@@ -50,7 +51,7 @@ void aco_compile_shader(const struct radv_nir_compiler_options* options,
void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
- const struct radv_vs_prolog_key* key,
+ const struct aco_vs_prolog_key* key,
const struct radv_shader_args* args,
struct radv_prolog_binary** binary);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index e676068db61..a6dbbe0d902 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -38,7 +38,6 @@
#include <vector>
struct radv_shader_args;
-struct radv_vs_prolog_key;
namespace aco {
@@ -2168,7 +2167,7 @@ void select_trap_handler_shader(Program* program, struct nir_shader* shader,
const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
const struct radv_shader_args* args);
-void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key,
+void select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key,
ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct aco_shader_info* info,
diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h
index 83a1c304499..61969ac4dad 100644
--- a/src/amd/compiler/aco_shader_info.h
+++ b/src/amd/compiler/aco_shader_info.h
@@ -35,6 +35,29 @@ extern "C" {
#define ACO_MAX_SO_OUTPUTS 64
#define ACO_MAX_SO_BUFFERS 4
+#define ACO_MAX_VERTEX_ATTRIBS 32
+
+struct aco_vs_input_state {
+ uint32_t instance_rate_inputs;
+ uint32_t nontrivial_divisors;
+ uint32_t post_shuffle;
+ /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
+ * using bitwise arithmetic.
+ */
+ uint32_t alpha_adjust_lo;
+ uint32_t alpha_adjust_hi;
+
+ uint32_t divisors[ACO_MAX_VERTEX_ATTRIBS];
+ uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
+};
+
+struct aco_vs_prolog_key {
+ struct aco_vs_input_state state;
+ unsigned num_attributes;
+ uint32_t misaligned_mask;
+ bool is_ngg;
+ gl_shader_stage next_stage;
+};
struct aco_vp_output_info {
uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h
index c69c69a86de..87f84f79a3e 100644
--- a/src/amd/vulkan/radv_aco_shader_info.h
+++ b/src/amd/vulkan/radv_aco_shader_info.h
@@ -103,8 +103,30 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info,
radv_aco_convert_shader_so_info(aco_info, radv);
aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size;
}
+
+#define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x
+#define ASSIGN_VS_STATE_FIELD_CP(x) memcpy(&aco_info->state.x, &radv->state->x, sizeof(radv->state->x))
+static inline void
+radv_aco_convert_vs_prolog_key(struct aco_vs_prolog_key *aco_info,
+ const struct radv_vs_prolog_key *radv)
+{
+ ASSIGN_VS_STATE_FIELD(instance_rate_inputs);
+ ASSIGN_VS_STATE_FIELD(nontrivial_divisors);
+ ASSIGN_VS_STATE_FIELD(post_shuffle);
+ ASSIGN_VS_STATE_FIELD(alpha_adjust_lo);
+ ASSIGN_VS_STATE_FIELD(alpha_adjust_hi);
+ ASSIGN_VS_STATE_FIELD_CP(divisors);
+ ASSIGN_VS_STATE_FIELD_CP(formats);
+ ASSIGN_FIELD(num_attributes);
+ ASSIGN_FIELD(misaligned_mask);
+ ASSIGN_FIELD(is_ngg);
+ ASSIGN_FIELD(next_stage);
+}
+#undef ASSIGN_VS_STATE_FIELD
+#undef ASSIGN_VS_STATE_FIELD_CP
#undef ASSIGN_FIELD
#undef ASSIGN_FIELD_CP
#undef ASSIGN_OUTINFO
+
#endif
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index e17d6ff28e9..460fa0e0b7f 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -2186,8 +2186,10 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
struct radv_prolog_binary *binary = NULL;
struct aco_shader_info ac_info;
+ struct aco_vs_prolog_key ac_key;
radv_aco_convert_shader_info(&ac_info, &info);
- aco_compile_vs_prolog(&options, &ac_info, key, &args, &binary);
+ radv_aco_convert_vs_prolog_key(&ac_key, key);
+ aco_compile_vs_prolog(&options, &ac_info, &ac_key, &args, &binary);
struct radv_shader_prolog *prolog = upload_vs_prolog(device, binary, info.wave_size);
if (prolog) {
prolog->nontrivial_divisors = key->state->nontrivial_divisors;
More information about the mesa-commit
mailing list