Mesa (main): aco: implement VS input loads with prologs
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Oct 13 05:33:39 UTC 2021
Module: Mesa
Branch: main
Commit: d45958f82e4526f809dcb03ff6b3b0b438803ecb
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d45958f82e4526f809dcb03ff6b3b0b438803ecb
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Mon May 17 17:56:28 2021 +0100
aco: implement VS input loads with prologs
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11717>
---
src/amd/compiler/aco_insert_waitcnt.cpp | 7 ++++
src/amd/compiler/aco_instruction_selection.cpp | 46 ++++++++++++++++++++++++--
src/amd/compiler/aco_ir.h | 2 ++
src/amd/compiler/aco_statistics.cpp | 9 +++++
4 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index cb6c2a60804..2934c71c087 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -770,6 +770,13 @@ insert_wait_states(Program* program)
std::stack<unsigned, std::vector<unsigned>> loop_header_indices;
unsigned loop_progress = 0;
+ if (program->stage.has(SWStage::VS) && program->info->vs.dynamic_inputs) {
+ for (Definition def : program->vs_inputs) {
+ update_counters(in_ctx[0], event_vmem);
+ insert_wait_entry(in_ctx[0], def, event_vmem);
+ }
+ }
+
for (unsigned i = 0; i < program->blocks.size();) {
Block& current = program->blocks[i++];
wait_ctx ctx = in_ctx[current.index];
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 7d65e7855ff..b4ab24dd80e 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5004,7 +5004,36 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
nir_src offset = *nir_get_io_offset_src(instr);
- if (ctx->shader->info.stage == MESA_SHADER_VERTEX) {
+ if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.dynamic_inputs) {
+ if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
+ isel_err(offset.ssa->parent_instr,
+ "Unimplemented non-zero nir_intrinsic_load_input offset");
+
+ unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned bitsize = instr->dest.ssa.bit_size;
+ unsigned num_components = instr->dest.ssa.num_components;
+
+ Temp input = get_arg(ctx, ctx->args->vs_inputs[location]);
+
+ aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
+ aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
+ std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
+ for (unsigned i = 0; i < num_components; i++) {
+ elems[i] = emit_extract_vector(ctx, input, component + i, bitsize == 64 ? v2 : v1);
+ if (bitsize == 16) {
+ if (nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float)
+ elems[i] = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), elems[i]);
+ else
+ elems[i] = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), elems[i],
+ Operand::c32(0u));
+ }
+ vec->operands[i] = Operand(elems[i]);
+ }
+ vec->definitions[0] = Definition(dst);
+ ctx->block->instructions.emplace_back(std::move(vec));
+ ctx->allocated_vec.emplace(dst.id(), elems);
+ } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX) {
if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
isel_err(offset.ssa->parent_instr,
@@ -11273,6 +11302,18 @@ add_startpgm(struct isel_context* ctx)
ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets);
ctx->program->scratch_offset = get_arg(ctx, ctx->args->ac.scratch_offset);
+ if (ctx->stage.has(SWStage::VS) && ctx->program->info->vs.dynamic_inputs) {
+ unsigned num_attributes = util_last_bit(ctx->program->info->vs.vb_desc_usage_mask);
+ for (unsigned i = 0; i < num_attributes; i++) {
+ Definition def(get_arg(ctx, ctx->args->vs_inputs[i]));
+
+ unsigned idx = ctx->args->vs_inputs[i].arg_index;
+ def.setFixed(PhysReg(256 + ctx->args->ac.args[idx].offset));
+
+ ctx->program->vs_inputs.push_back(def);
+ }
+ }
+
return instr;
}
@@ -11571,7 +11612,8 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
split_arguments(&ctx, startpgm);
- if (program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES)) {
+ if (!args->shader_info->vs.has_prolog &&
+ (program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES))) {
Builder(ctx.program, ctx.block).sopp(aco_opcode::s_setprio, -1u, 0x3u);
}
}
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 66081d9db45..8de4f455aec 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -2073,6 +2073,8 @@ public:
unsigned next_divergent_if_logical_depth = 0;
unsigned next_uniform_if_depth = 0;
+ std::vector<Definition> vs_inputs;
+
struct {
FILE* output = stderr;
bool shorten_messages = false;
diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp
index ce114e3f879..8ccb5198b01 100644
--- a/src/amd/compiler/aco_statistics.cpp
+++ b/src/amd/compiler/aco_statistics.cpp
@@ -473,6 +473,15 @@ collect_preasm_stats(Program* program)
double usage[(int)BlockCycleEstimator::resource_count] = {0};
std::vector<BlockCycleEstimator> blocks(program->blocks.size(), program);
+ if (program->stage.has(SWStage::VS) && program->info->vs.has_prolog) {
+ unsigned vs_input_latency = 320;
+ for (Definition def : program->vs_inputs) {
+ blocks[0].vm.push_back(vs_input_latency);
+ for (unsigned i = 0; i < def.size(); i++)
+ blocks[0].reg_available[def.physReg().reg() + i] = vs_input_latency;
+ }
+ }
+
for (Block& block : program->blocks) {
BlockCycleEstimator& block_est = blocks[block.index];
for (unsigned pred : block.linear_preds)
More information about the mesa-commit
mailing list