Mesa (master): aco: implement GS copy shaders
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jan 24 14:01:19 UTC 2020
Module: Mesa
Branch: master
Commit: f8f7712666b738fc9ebd4a6390563e44db46b68f
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f8f7712666b738fc9ebd4a6390563e44db46b68f
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Fri Nov 15 11:31:03 2019 +0000
aco: implement GS copy shaders
v5: rebase on float_controls changes
v7: rebase after shader args MR and load/store vectorizer MR
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/2421>
---
src/amd/compiler/aco_instruction_selection.cpp | 180 +++++++++++++-
.../compiler/aco_instruction_selection_setup.cpp | 265 +++++++++++----------
src/amd/compiler/aco_interface.cpp | 7 +-
src/amd/compiler/aco_ir.h | 23 +-
4 files changed, 327 insertions(+), 148 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 1792e831222..bbdc2dbf3da 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -25,6 +25,7 @@
#include <algorithm>
#include <array>
+#include <stack>
#include <map>
#include "ac_shader_util.h"
@@ -8534,7 +8535,7 @@ static void create_vs_exports(isel_context *ctx)
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos);
- if (ctx->options->key.vs_common_out.export_clip_dists) {
+ if (ctx->export_clip_dists) {
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, false, &next_pos);
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
@@ -8568,7 +8569,7 @@ static void emit_stream_output(isel_context *ctx,
Temp out[4];
bool all_undef = true;
- assert(ctx->stage == vertex_vs);
+ assert(ctx->stage == vertex_vs || ctx->stage == gs_copy_vs);
for (unsigned i = 0; i < num_comps; i++) {
out[i] = ctx->vsgs_output.outputs[loc][start + i];
all_undef = all_undef && !out[i].id();
@@ -8804,13 +8805,24 @@ void setup_fp_mode(isel_context *ctx, nir_shader *shader)
ctx->block->fp_mode = program->next_fp_mode;
}
+void cleanup_cfg(Program *program)
+{
+ /* create linear_succs/logical_succs */
+ for (Block& BB : program->blocks) {
+ for (unsigned idx : BB.linear_preds)
+ program->blocks[idx].linear_succs.emplace_back(BB.index);
+ for (unsigned idx : BB.logical_preds)
+ program->blocks[idx].logical_succs.emplace_back(BB.index);
+ }
+}
+
void select_program(Program *program,
unsigned shader_count,
struct nir_shader *const *shaders,
ac_shader_config* config,
struct radv_shader_args *args)
{
- isel_context ctx = setup_isel_context(program, shader_count, shaders, config, args);
+ isel_context ctx = setup_isel_context(program, shader_count, shaders, config, args, false);
for (unsigned i = 0; i < shader_count; i++) {
nir_shader *nir = shaders[i];
@@ -8879,12 +8891,162 @@ void select_program(Program *program,
bld.smem(aco_opcode::s_dcache_wb, false);
bld.sopp(aco_opcode::s_endpgm);
- /* cleanup CFG */
- for (Block& BB : program->blocks) {
- for (unsigned idx : BB.linear_preds)
- program->blocks[idx].linear_succs.emplace_back(BB.index);
- for (unsigned idx : BB.logical_preds)
- program->blocks[idx].logical_succs.emplace_back(BB.index);
+ cleanup_cfg(program);
+}
+
+void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
+ ac_shader_config* config,
+ struct radv_shader_args *args)
+{
+ isel_context ctx = setup_isel_context(program, 1, &gs_shader, config, args, true);
+
+ program->next_fp_mode.preserve_signed_zero_inf_nan32 = false;
+ program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false;
+ program->next_fp_mode.must_flush_denorms32 = false;
+ program->next_fp_mode.must_flush_denorms16_64 = false;
+ program->next_fp_mode.care_about_round32 = false;
+ program->next_fp_mode.care_about_round16_64 = false;
+ program->next_fp_mode.denorm16_64 = fp_denorm_keep;
+ program->next_fp_mode.denorm32 = 0;
+ program->next_fp_mode.round32 = fp_round_ne;
+ program->next_fp_mode.round16_64 = fp_round_ne;
+ ctx.block->fp_mode = program->next_fp_mode;
+
+ add_startpgm(&ctx);
+ append_logical_start(ctx.block);
+
+ Builder bld(ctx.program, ctx.block);
+
+ Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), program->private_segment_buffer, Operand(RING_GSVS_VS * 16u));
+
+ Operand stream_id(0u);
+ if (args->shader_info->so.num_outputs)
+ stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
+ get_arg(&ctx, ctx.args->streamout_config), Operand(0x20018u));
+
+ Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), get_arg(&ctx, ctx.args->ac.vertex_id));
+
+ std::stack<Block> endif_blocks;
+
+ for (unsigned stream = 0; stream < 4; stream++) {
+ if (stream_id.isConstant() && stream != stream_id.constantValue())
+ continue;
+
+ unsigned num_components = args->shader_info->gs.num_stream_output_components[stream];
+ if (stream > 0 && (!num_components || !args->shader_info->so.num_outputs))
+ continue;
+
+ memset(ctx.vsgs_output.mask, 0, sizeof(ctx.vsgs_output.mask));
+
+ unsigned BB_if_idx = ctx.block->index;
+ Block BB_endif = Block();
+ if (!stream_id.isConstant()) {
+ /* begin IF */
+ Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream));
+ append_logical_end(ctx.block);
+ ctx.block->kind |= block_kind_uniform;
+ bld.branch(aco_opcode::p_cbranch_z, cond);
+
+ BB_endif.kind |= ctx.block->kind & block_kind_top_level;
+
+ ctx.block = ctx.program->create_and_insert_block();
+ add_edge(BB_if_idx, ctx.block);
+ bld.reset(ctx.block);
+ append_logical_start(ctx.block);
+ }
+
+ unsigned offset = 0;
+ for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) {
+ if (args->shader_info->gs.output_streams[i] != stream)
+ continue;
+
+ unsigned output_usage_mask = args->shader_info->gs.output_usage_mask[i];
+ unsigned length = util_last_bit(output_usage_mask);
+ for (unsigned j = 0; j < length; ++j) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ unsigned const_offset = offset * args->shader_info->gs.vertices_out * 16 * 4;
+ Temp voffset = vtx_offset;
+ if (const_offset >= 4096u) {
+ voffset = bld.vadd32(bld.def(v1), Operand(const_offset / 4096u * 4096u), voffset);
+ const_offset %= 4096u;
+ }
+
+ aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
+ mubuf->definitions[0] = bld.def(v1);
+ mubuf->operands[0] = Operand(voffset);
+ mubuf->operands[1] = Operand(gsvs_ring);
+ mubuf->operands[2] = Operand(0u);
+ mubuf->offen = true;
+ mubuf->offset = const_offset;
+ mubuf->glc = true;
+ mubuf->slc = true;
+ mubuf->dlc = args->options->chip_class >= GFX10;
+ mubuf->barrier = barrier_none;
+ mubuf->can_reorder = true;
+
+ ctx.vsgs_output.mask[i] |= 1 << j;
+ ctx.vsgs_output.outputs[i][j] = mubuf->definitions[0].getTemp();
+
+ bld.insert(std::move(mubuf));
+
+ offset++;
+ }
+ }
+
+ if (args->shader_info->so.num_outputs) {
+ emit_streamout(&ctx, stream);
+ bld.reset(ctx.block);
+ }
+
+ if (stream == 0) {
+ create_vs_exports(&ctx);
+ ctx.block->kind |= block_kind_export_end;
+ }
+
+ if (!stream_id.isConstant()) {
+ append_logical_end(ctx.block);
+
+ /* branch from then block to endif block */
+ bld.branch(aco_opcode::p_branch);
+ add_edge(ctx.block->index, &BB_endif);
+ ctx.block->kind |= block_kind_uniform;
+
+ /* emit else block */
+ ctx.block = ctx.program->create_and_insert_block();
+ add_edge(BB_if_idx, ctx.block);
+ bld.reset(ctx.block);
+ append_logical_start(ctx.block);
+
+ endif_blocks.push(std::move(BB_endif));
+ }
}
+
+ while (!endif_blocks.empty()) {
+ Block BB_endif = std::move(endif_blocks.top());
+ endif_blocks.pop();
+
+ Block *BB_else = ctx.block;
+
+ append_logical_end(BB_else);
+ /* branch from else block to endif block */
+ bld.branch(aco_opcode::p_branch);
+ add_edge(BB_else->index, &BB_endif);
+ BB_else->kind |= block_kind_uniform;
+
+ /** emit endif merge block */
+ ctx.block = program->insert_block(std::move(BB_endif));
+ bld.reset(ctx.block);
+ append_logical_start(ctx.block);
+ }
+
+ program->config->float_mode = program->blocks[0].fp_mode.val;
+
+ append_logical_end(ctx.block);
+ ctx.block->kind |= block_kind_uniform;
+ bld.sopp(aco_opcode::s_endpgm);
+
+ cleanup_cfg(program);
}
}
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 7c53a0ecd3e..2ad39180e2c 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -85,6 +85,7 @@ struct isel_context {
uint64_t output_masks[MESA_SHADER_COMPUTE];
/* VS output information */
+ bool export_clip_dists;
unsigned num_clip_distances;
unsigned num_cull_distances;
@@ -661,6 +662,54 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
return false;
}
+void
+setup_vs_output_info(isel_context *ctx, nir_shader *nir,
+ bool export_prim_id, bool export_clip_dists,
+ radv_vs_output_info *outinfo)
+{
+ memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
+ sizeof(outinfo->vs_output_param_offset));
+
+ outinfo->param_exports = 0;
+ int pos_written = 0x1;
+ if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer)
+ pos_written |= 1 << 1;
+
+ uint64_t mask = ctx->output_masks[nir->info.stage];
+ while (mask) {
+ int idx = u_bit_scan64(&mask);
+ if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || idx == VARYING_SLOT_PRIMITIVE_ID ||
+ ((idx == VARYING_SLOT_CLIP_DIST0 || idx == VARYING_SLOT_CLIP_DIST1) && export_clip_dists)) {
+ if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED)
+ outinfo->vs_output_param_offset[idx] = outinfo->param_exports++;
+ }
+ }
+ if (outinfo->writes_layer &&
+ outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] == AC_EXP_PARAM_UNDEFINED) {
+ /* when ctx->options->key.has_multiview_view_index = true, the layer
+ * variable isn't declared in NIR and it's isel's job to get the layer */
+ outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = outinfo->param_exports++;
+ }
+
+ if (export_prim_id) {
+ assert(outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED);
+ outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = outinfo->param_exports++;
+ }
+
+ ctx->export_clip_dists = export_clip_dists;
+ ctx->num_clip_distances = util_bitcount(outinfo->clip_dist_mask);
+ ctx->num_cull_distances = util_bitcount(outinfo->cull_dist_mask);
+
+ assert(ctx->num_clip_distances + ctx->num_cull_distances <= 8);
+
+ if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
+ pos_written |= 1 << 2;
+ if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
+ pos_written |= 1 << 3;
+
+ outinfo->pos_exports = util_bitcount(pos_written);
+}
+
void
setup_vs_variables(isel_context *ctx, nir_shader *nir)
{
@@ -681,49 +730,8 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir)
if (ctx->stage == vertex_vs) {
radv_vs_output_info *outinfo = &ctx->program->info->vs.outinfo;
-
- memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
- sizeof(outinfo->vs_output_param_offset));
-
- bool export_clip_dists = ctx->options->key.vs_common_out.export_clip_dists;
-
- outinfo->param_exports = 0;
- int pos_written = 0x1;
- if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer)
- pos_written |= 1 << 1;
-
- uint64_t mask = ctx->output_masks[nir->info.stage];
- while (mask) {
- int idx = u_bit_scan64(&mask);
- if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || idx == VARYING_SLOT_PRIMITIVE_ID ||
- ((idx == VARYING_SLOT_CLIP_DIST0 || idx == VARYING_SLOT_CLIP_DIST1) && export_clip_dists)) {
- if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED)
- outinfo->vs_output_param_offset[idx] = outinfo->param_exports++;
- }
- }
- if (outinfo->writes_layer &&
- outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] == AC_EXP_PARAM_UNDEFINED) {
- /* when ctx->options->key.has_multiview_view_index = true, the layer
- * variable isn't declared in NIR and it's isel's job to get the layer */
- outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = outinfo->param_exports++;
- }
-
- if (outinfo->export_prim_id) {
- assert(outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED);
- outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = outinfo->param_exports++;
- }
-
- ctx->num_clip_distances = util_bitcount(outinfo->clip_dist_mask);
- ctx->num_cull_distances = util_bitcount(outinfo->cull_dist_mask);
-
- assert(ctx->num_clip_distances + ctx->num_cull_distances <= 8);
-
- if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
- pos_written |= 1 << 2;
- if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
- pos_written |= 1 << 3;
-
- outinfo->pos_exports = util_bitcount(pos_written);
+ setup_vs_output_info(ctx, nir, outinfo->export_prim_id,
+ ctx->options->key.vs_common_out.export_clip_dists, outinfo);
} else if (ctx->stage == vertex_geometry_gs || ctx->stage == vertex_es) {
/* TODO: radv_nir_shader_info_pass() already sets this but it's larger
* than it needs to be in order to set it better, we have to improve
@@ -824,12 +832,80 @@ get_io_masks(isel_context *ctx, unsigned shader_count, struct nir_shader *const
}
}
+void
+setup_nir(isel_context *ctx, nir_shader *nir)
+{
+ Program *program = ctx->program;
+
+ /* align and copy constant data */
+ while (program->constant_data.size() % 4u)
+ program->constant_data.push_back(0);
+ ctx->constant_data_offset = program->constant_data.size();
+ program->constant_data.insert(program->constant_data.end(),
+ (uint8_t*)nir->constant_data,
+ (uint8_t*)nir->constant_data + nir->constant_data_size);
+
+ /* the variable setup has to be done before lower_io / CSE */
+ setup_variables(ctx, nir);
+
+ /* optimize and lower memory operations */
+ bool lower_to_scalar = false;
+ bool lower_pack = false;
+ if (nir_opt_load_store_vectorize(nir,
+ (nir_variable_mode)(nir_var_mem_ssbo | nir_var_mem_ubo |
+ nir_var_mem_push_const | nir_var_mem_shared),
+ mem_vectorize_callback)) {
+ lower_to_scalar = true;
+ lower_pack = true;
+ }
+ if (nir->info.stage != MESA_SHADER_COMPUTE)
+ nir_lower_io(nir, (nir_variable_mode)(nir_var_shader_in | nir_var_shader_out), type_size, (nir_lower_io_options)0);
+ nir_lower_explicit_io(nir, nir_var_mem_global, nir_address_format_64bit_global);
+
+ if (lower_to_scalar)
+ nir_lower_alu_to_scalar(nir, NULL, NULL);
+ if (lower_pack)
+ nir_lower_pack(nir);
+
+ /* lower ALU operations */
+ // TODO: implement logic64 in aco, it's more effective for sgprs
+ nir_lower_int64(nir, nir->options->lower_int64_options);
+
+ nir_opt_idiv_const(nir, 32);
+ nir_lower_idiv(nir, nir_lower_idiv_precise);
+
+ /* optimize the lowered ALU operations */
+ bool more_algebraic = true;
+ while (more_algebraic) {
+ more_algebraic = false;
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_dce);
+ NIR_PASS_V(nir, nir_opt_constant_folding);
+ NIR_PASS(more_algebraic, nir, nir_opt_algebraic);
+ }
+
+ /* cleanup passes */
+ nir_lower_load_const_to_scalar(nir);
+ nir_opt_shrink_load(nir);
+ nir_move_options move_opts = (nir_move_options)(
+ nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons);
+ nir_opt_sink(nir, move_opts);
+ nir_opt_move(nir, move_opts);
+ nir_convert_to_lcssa(nir, true, false);
+ nir_lower_phis_to_scalar(nir);
+
+ nir_function_impl *func = nir_shader_get_entrypoint(nir);
+ nir_index_ssa_defs(func);
+ nir_metadata_require(func, nir_metadata_block_index);
+}
+
isel_context
setup_isel_context(Program* program,
unsigned shader_count,
struct nir_shader *const *shaders,
ac_shader_config* config,
- struct radv_shader_args *args)
+ struct radv_shader_args *args,
+ bool is_gs_copy_shader)
{
program->stage = 0;
for (unsigned i = 0; i < shader_count; i++) {
@@ -844,7 +920,7 @@ setup_isel_context(Program* program,
program->stage |= sw_tes;
break;
case MESA_SHADER_GEOMETRY:
- program->stage |= sw_gs;
+ program->stage |= is_gs_copy_shader ? sw_gs_copy : sw_gs;
break;
case MESA_SHADER_FRAGMENT:
program->stage |= sw_fs;
@@ -868,6 +944,8 @@ setup_isel_context(Program* program,
program->stage |= hw_fs;
else if (program->stage == sw_cs)
program->stage |= hw_cs;
+ else if (program->stage == sw_gs_copy)
+ program->stage |= hw_vs;
else if (program->stage == (sw_vs | sw_gs) && gfx9_plus && !ngg)
program->stage |= hw_gs;
else
@@ -918,94 +996,25 @@ setup_isel_context(Program* program,
get_io_masks(&ctx, shader_count, shaders);
- for (unsigned i = 0; i < shader_count; i++) {
- nir_shader *nir = shaders[i];
-
- /* align and copy constant data */
- while (program->constant_data.size() % 4u)
- program->constant_data.push_back(0);
- ctx.constant_data_offset = program->constant_data.size();
- program->constant_data.insert(program->constant_data.end(),
- (uint8_t*)nir->constant_data,
- (uint8_t*)nir->constant_data + nir->constant_data_size);
-
- /* the variable setup has to be done before lower_io / CSE */
- setup_variables(&ctx, nir);
-
- /* optimize and lower memory operations */
- bool lower_to_scalar = false;
- bool lower_pack = false;
- if (nir_opt_load_store_vectorize(nir,
- (nir_variable_mode)(nir_var_mem_ssbo | nir_var_mem_ubo |
- nir_var_mem_push_const | nir_var_mem_shared),
- mem_vectorize_callback)) {
- lower_to_scalar = true;
- lower_pack = true;
- }
- if (nir->info.stage != MESA_SHADER_COMPUTE)
- nir_lower_io(nir, (nir_variable_mode)(nir_var_shader_in | nir_var_shader_out), type_size, (nir_lower_io_options)0);
- nir_lower_explicit_io(nir, nir_var_mem_global, nir_address_format_64bit_global);
-
- if (lower_to_scalar)
- nir_lower_alu_to_scalar(nir, NULL, NULL);
- if (lower_pack)
- nir_lower_pack(nir);
-
- /* lower ALU operations */
- // TODO: implement logic64 in aco, it's more effective for sgprs
- nir_lower_int64(nir, nir->options->lower_int64_options);
-
- nir_opt_idiv_const(nir, 32);
- nir_lower_idiv(nir, nir_lower_idiv_precise);
-
- /* optimize the lowered ALU operations */
- bool more_algebraic = true;
- while (more_algebraic) {
- more_algebraic = false;
- NIR_PASS_V(nir, nir_copy_prop);
- NIR_PASS_V(nir, nir_opt_dce);
- NIR_PASS_V(nir, nir_opt_constant_folding);
- NIR_PASS(more_algebraic, nir, nir_opt_algebraic);
- }
+ unsigned scratch_size = 0;
+ if (program->stage == gs_copy_vs) {
+ assert(shader_count == 1);
+ setup_vs_output_info(&ctx, shaders[0], false, true, &args->shader_info->vs.outinfo);
+ } else {
+ for (unsigned i = 0; i < shader_count; i++) {
+ nir_shader *nir = shaders[i];
+ setup_nir(&ctx, nir);
- /* Do late algebraic optimization to turn add(a, neg(b)) back into
- * subs, then the mandatory cleanup after algebraic. Note that it may
- * produce fnegs, and if so then we need to keep running to squash
- * fneg(fneg(a)).
- */
- bool more_late_algebraic = true;
- while (more_late_algebraic) {
- more_late_algebraic = false;
- NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late);
- NIR_PASS_V(nir, nir_opt_constant_folding);
- NIR_PASS_V(nir, nir_copy_prop);
- NIR_PASS_V(nir, nir_opt_dce);
- NIR_PASS_V(nir, nir_opt_cse);
+ if (args->options->dump_preoptir) {
+ fprintf(stderr, "NIR shader before instruction selection:\n");
+ nir_print_shader(nir, stderr);
+ }
}
- /* cleanup passes */
- nir_lower_load_const_to_scalar(nir);
- nir_opt_shrink_load(nir);
- nir_move_options move_opts = (nir_move_options)(
- nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons);
- nir_opt_sink(nir, move_opts);
- nir_opt_move(nir, move_opts);
- nir_convert_to_lcssa(nir, true, false);
- nir_lower_phis_to_scalar(nir);
-
- nir_function_impl *func = nir_shader_get_entrypoint(nir);
- nir_index_ssa_defs(func);
- nir_metadata_require(func, nir_metadata_block_index);
-
- if (args->options->dump_preoptir) {
- fprintf(stderr, "NIR shader before instruction selection:\n");
- nir_print_shader(nir, stderr);
- }
+ for (unsigned i = 0; i < shader_count; i++)
+ scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
}
- unsigned scratch_size = 0;
- for (unsigned i = 0; i < shader_count; i++)
- scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
ctx.block = ctx.program->create_and_insert_block();
diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp
index f951c4fdc5f..686fdca14e9 100644
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@@ -65,7 +65,10 @@ void aco_compile_shader(unsigned shader_count,
std::unique_ptr<aco::Program> program{new aco::Program};
/* Instruction Selection */
- aco::select_program(program.get(), shader_count, shaders, &config, args);
+ if (args->is_gs_copy_shader)
+ aco::select_gs_copy_shader(program.get(), shaders[0], &config, args);
+ else
+ aco::select_program(program.get(), shader_count, shaders, &config, args);
if (args->options->dump_preoptir) {
std::cerr << "After Instruction Selection:\n";
aco_print_program(program.get(), stderr);
@@ -162,7 +165,7 @@ void aco_compile_shader(unsigned shader_count,
legacy_binary->base.type = RADV_BINARY_TYPE_LEGACY;
legacy_binary->base.stage = shaders[shader_count-1]->info.stage;
- legacy_binary->base.is_gs_copy_shader = false;
+ legacy_binary->base.is_gs_copy_shader = args->is_gs_copy_shader;
legacy_binary->base.total_size = size;
memcpy(legacy_binary->data, code.data(), code.size() * sizeof(uint32_t));
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index d3ebecc081e..3f38e6aadae 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1106,23 +1106,25 @@ static constexpr Stage sw_tcs = 1 << 2;
static constexpr Stage sw_tes = 1 << 3;
static constexpr Stage sw_fs = 1 << 4;
static constexpr Stage sw_cs = 1 << 5;
-static constexpr Stage sw_mask = 0x3f;
+static constexpr Stage sw_gs_copy = 1 << 6;
+static constexpr Stage sw_mask = 0x7f;
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
-static constexpr Stage hw_vs = 1 << 6;
-static constexpr Stage hw_es = 1 << 7; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_gs = 1 << 8;
-static constexpr Stage hw_ls = 1 << 9; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_hs = 1 << 10;
-static constexpr Stage hw_fs = 1 << 11;
-static constexpr Stage hw_cs = 1 << 12;
-static constexpr Stage hw_mask = 0x7f << 6;
+static constexpr Stage hw_vs = 1 << 7;
+static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_gs = 1 << 9;
+static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_hs = 1 << 11;
+static constexpr Stage hw_fs = 1 << 12;
+static constexpr Stage hw_cs = 1 << 13;
+static constexpr Stage hw_mask = 0x7f << 7;
/* possible settings of Program::stage */
static constexpr Stage vertex_vs = sw_vs | hw_vs;
static constexpr Stage fragment_fs = sw_fs | hw_fs;
static constexpr Stage compute_cs = sw_cs | hw_cs;
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
+static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
/* GFX10/NGG */
static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
@@ -1219,6 +1221,9 @@ void select_program(Program *program,
struct nir_shader *const *shaders,
ac_shader_config* config,
struct radv_shader_args *args);
+void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
+ ac_shader_config* config,
+ struct radv_shader_args *args);
void lower_wqm(Program* program, live& live_vars,
const struct radv_nir_compiler_options *options);
More information about the mesa-commit
mailing list