Mesa (main): radv,aco: remap PS inputs when declaring shader arguments

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Oct 8 12:48:43 UTC 2021


Module: Mesa
Branch: main
Commit: 64030e79c536c4c77e64c93c382b1c4166cb0f8d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=64030e79c536c4c77e64c93c382b1c4166cb0f8d

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Tue Oct  5 09:12:54 2021 +0200

radv,aco: remap PS inputs when declaring shader arguments

LLVM seems to require all inputs for PS.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13192>

---

 src/amd/compiler/aco_instruction_selection.cpp | 31 +------------
 src/amd/vulkan/radv_shader.c                   |  2 +
 src/amd/vulkan/radv_shader.h                   |  1 +
 src/amd/vulkan/radv_shader_args.c              | 64 +++++++++++++++++++-------
 4 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 06bc31ff004..2864cb0e4fd 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11257,37 +11257,8 @@ emit_streamout(isel_context* ctx, unsigned stream)
 Pseudo_instruction*
 add_startpgm(struct isel_context* ctx)
 {
-   unsigned arg_count = ctx->args->ac.arg_count;
-   if (ctx->stage == fragment_fs) {
-      /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr
-       * itself and then communicates the results back via the ELF binary.
-       * Mirror what LLVM does by re-mapping the VGPR arguments here.
-       *
-       * TODO: If we made the FS input scanning code into a separate pass that
-       * could run before argument setup, then this wouldn't be necessary
-       * anymore.
-       */
-      struct ac_shader_args* args = &ctx->args->ac;
-      arg_count = 0;
-      for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->arg_count; i++) {
-         if (args->args[i].file != AC_ARG_VGPR) {
-            arg_count++;
-            continue;
-         }
-
-         if (!(ctx->program->config->spi_ps_input_addr & (1 << vgpr_arg))) {
-            args->args[i].skip = true;
-         } else {
-            args->args[i].offset = vgpr_reg;
-            vgpr_reg += args->args[i].size;
-            arg_count++;
-         }
-         vgpr_arg++;
-      }
-   }
-
    aco_ptr<Pseudo_instruction> startpgm{
-      create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, arg_count)};
+      create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, ctx->args->ac.arg_count)};
    for (unsigned i = 0, arg = 0; i < ctx->args->ac.arg_count; i++) {
       if (ctx->args->ac.args[i].skip)
          continue;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 645bd240c54..6c37a72c44b 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1676,6 +1676,7 @@ radv_shader_variant_compile(struct radv_device *device, struct vk_shader_module
       options.key = *key;
 
    options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
+   options.remap_spi_ps_input = !radv_use_llvm_for_stage(device, stage);
    options.robust_buffer_access = device->robust_buffer_access;
    options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
 
@@ -1693,6 +1694,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader
    gl_shader_stage stage = MESA_SHADER_VERTEX;
 
    options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
+   options.remap_spi_ps_input = !radv_use_llvm_for_stage(device, stage);
    options.key.has_multiview_view_index = multiview;
    options.key.optimisations_disabled = disable_optimizations;
 
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index c1303f77669..1918460e6b4 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -112,6 +112,7 @@ struct radv_nir_compiler_options {
    bool has_image_load_dcc_bug;
    bool enable_mrt_output_nan_fixup;
    bool wgp_mode;
+   bool remap_spi_ps_input;
    enum radeon_family family;
    enum chip_class chip_class;
    const struct radeon_info *info;
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c
index 8eec8e90219..28659b751b5 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -363,6 +363,52 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
 }
 
+static void
+declare_ps_input_vgprs(struct radv_shader_args *args)
+{
+   unsigned spi_ps_input = args->shader_info->ps.spi_ps_input;
+
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
+
+   if (args->options->remap_spi_ps_input) {
+      /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
+       * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
+       * VGPR arguments here.
+       */
+      unsigned arg_count = 0;
+      for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
+         if (args->ac.args[i].file != AC_ARG_VGPR) {
+            arg_count++;
+            continue;
+         }
+
+         if (!(spi_ps_input & (1 << vgpr_arg))) {
+            args->ac.args[i].skip = true;
+         } else {
+            args->ac.args[i].offset = vgpr_reg;
+            vgpr_reg += args->ac.args[i].size;
+            arg_count++;
+         }
+         vgpr_arg++;
+      }
+   }
+}
+
 static void
 declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
 {
@@ -654,22 +700,8 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
       if (args->options->explicit_scratch_args) {
          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
       }
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
-      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
+
+      declare_ps_input_vgprs(args);
       break;
    default:
       unreachable("Shader stage not implemented");



More information about the mesa-commit mailing list