Mesa (main): radv: rewrite RADV_FORCE_VRS directly in NIR

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Feb 9 19:41:19 UTC 2022


Module: Mesa
Branch: main
Commit: 2451290bc479b419874eb3ba2ab561a660157bba
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2451290bc479b419874eb3ba2ab561a660157bba

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Tue Jul 13 13:29:57 2021 +0200

radv: rewrite RADV_FORCE_VRS directly in NIR

This introduces a small NIR pass that exports
VARYING_SLOT_PRIMITIVE_SHADING_RATE if RADV_FORCE_VRS is used,
instead of doing this in both backend compilers.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14907>

---

 src/amd/compiler/aco_instruction_selection.cpp | 29 +-------
 src/amd/vulkan/radv_nir_to_llvm.c              | 28 +-------
 src/amd/vulkan/radv_pipeline.c                 | 43 ++++++++---
 src/amd/vulkan/radv_shader.c                   | 99 ++++++++++++++++++++++----
 src/amd/vulkan/radv_shader.h                   |  3 +-
 src/amd/vulkan/radv_shader_info.c              |  7 +-
 6 files changed, 123 insertions(+), 86 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 851defefcec..b8b64fc1a00 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10746,30 +10746,6 @@ export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos,
    if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) {
       exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]);
       exp->enabled_mask |= 0x2;
-   } else if (ctx->options->force_vrs_rates) {
-      /* Bits [2:3] = VRS rate X
-       * Bits [4:5] = VRS rate Y
-       *
-       * The range is [-2, 1]. Values:
-       *   1: 2x coarser shading rate in that direction.
-       *   0: normal shading rate
-       *  -1: 2x finer shading rate (sample shading, not directional)
-       *  -2: 4x finer shading rate (sample shading, not directional)
-       *
-       * Sample shading can't go above 8 samples, so both numbers can't be -2
-       * at the same time.
-       */
-      Builder bld(ctx->program, ctx->block);
-      Temp rates = bld.copy(bld.def(v1), Operand::c32((unsigned)ctx->options->force_vrs_rates));
-
-      /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
-      Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), Operand::c32(0x3f800000u),
-                           Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3]));
-      rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
-                       bld.copy(bld.def(v1), Operand::zero()), rates, cond);
-
-      exp->operands[1] = Operand(rates);
-      exp->enabled_mask |= 0x2;
    }
 
    exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
@@ -10818,11 +10794,8 @@ create_vs_exports(isel_context* ctx)
    int next_pos = 0;
    export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
 
-   bool force_vrs_per_vertex = ctx->options->force_vrs_rates && ctx->stage != mesh_ngg;
-   bool writes_primitive_shading_rate =
-      outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
    if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
-       writes_primitive_shading_rate) {
+       outinfo->writes_primitive_shading_rate) {
       export_vs_psiz_layer_viewport_vrs(ctx, &next_pos, outinfo);
    }
    if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 6aaa6233846..c3779f701e3 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -1257,13 +1257,10 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
       pos_args[0].out[3] = ctx->ac.f32_1; /* W */
    }
 
-   bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
-                                        ctx->options->force_vrs_rates;
-
    if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
-       outinfo->writes_viewport_index || writes_primitive_shading_rate) {
+       outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) {
       pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
-                                      (writes_primitive_shading_rate == true ? 2 : 0) |
+                                      (outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
                                       (outinfo->writes_layer == true ? 4 : 0));
       pos_args[1].valid_mask = 0;
       pos_args[1].done = 0;
@@ -1298,27 +1295,6 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
 
       if (outinfo->writes_primitive_shading_rate) {
          pos_args[1].out[1] = primitive_shading_rate;
-      } else if (ctx->options->force_vrs_rates) {
-         /* Bits [2:3] = VRS rate X
-          * Bits [4:5] = VRS rate Y
-          *
-          * The range is [-2, 1]. Values:
-          *   1: 2x coarser shading rate in that direction.
-          *   0: normal shading rate
-          *  -1: 2x finer shading rate (sample shading, not directional)
-          *  -2: 4x finer shading rate (sample shading, not directional)
-          *
-          * Sample shading can't go above 8 samples, so both numbers can't be -2 at the same time.
-          */
-         LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->options->force_vrs_rates, false);
-         LLVMValueRef cond;
-         LLVMValueRef v;
-
-         /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
-         cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE, pos_args[0].out[3], ctx->ac.f32_1, "");
-         v = LLVMBuildSelect(ctx->ac.builder, cond, rates, ctx->ac.i32_0, "");
-
-         pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
       }
    }
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index c9bb6712791..dbb75dcd6f9 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3629,6 +3629,26 @@ radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
    return VK_SUCCESS;
 }
 
+static bool
+radv_consider_force_vrs(const struct radv_pipeline *pipeline, nir_shader **nir)
+{
+   struct radv_device *device = pipeline->device;
+
+   if (device->force_vrs == RADV_FORCE_VRS_NONE)
+      return false;
+
+   /* Only VS and GS are supported for now. */
+   if (pipeline->graphics.last_vgt_api_stage != MESA_SHADER_VERTEX &&
+       pipeline->graphics.last_vgt_api_stage != MESA_SHADER_GEOMETRY)
+      return false;
+
+   nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
+   if (last_vgt_shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
+      return false;
+
+   return true;
+}
+
 VkResult
 radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
                     struct radv_device *device, struct radv_pipeline_cache *cache,
@@ -3729,6 +3749,14 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
       radv_stop_feedback(stage_feedbacks[i], false);
    }
 
+   /* Force per-vertex VRS. */
+   if (radv_consider_force_vrs(pipeline, nir)) {
+      assert(pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX ||
+             pipeline->graphics.last_vgt_api_stage == MESA_SHADER_GEOMETRY);
+      nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
+      NIR_PASS_V(last_vgt_shader, radv_force_primitive_shading_rate, device);
+   }
+
    bool optimize_conservatively = pipeline_key->optimisations_disabled;
 
    radv_link_shaders(pipeline, pipeline_key, nir, optimize_conservatively);
@@ -4738,10 +4766,8 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
    cull_dist_mask = outinfo->cull_dist_mask;
    total_mask = clip_dist_mask | cull_dist_mask;
 
-   bool writes_primitive_shading_rate =
-      outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
    bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
-                       outinfo->writes_viewport_index || writes_primitive_shading_rate;
+                       outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
    unsigned spi_vs_out_config, nparams;
 
    /* VS is required to export at least one param. */
@@ -4768,7 +4794,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
                           S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
                              S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
                              S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
-                             S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+                             S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
                              S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
                              S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
                              S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
@@ -4857,13 +4883,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
    cull_dist_mask = outinfo->cull_dist_mask;
    total_mask = clip_dist_mask | cull_dist_mask;
 
-   /* Primitive shading rate is written as a per-primitive output in mesh shaders. */
-   bool force_vrs_per_vertex =
-      pipeline->device->force_vrs != RADV_FORCE_VRS_NONE && es_type != MESA_SHADER_MESH;
-   bool writes_primitive_shading_rate =
-      outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
    bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
-                       outinfo->writes_viewport_index || writes_primitive_shading_rate;
+                       outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
    bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
    bool break_wave_at_eoi = false;
    unsigned ge_cntl;
@@ -4906,7 +4927,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
                           S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
                              S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
                              S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
-                             S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+                             S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
                              S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
                              S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
                              S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 8514c4702af..efd478a9926 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -458,6 +458,90 @@ radv_lower_primitive_shading_rate(nir_shader *nir)
    return progress;
 }
 
+bool
+radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
+{
+   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+   bool progress = false;
+   unsigned vrs_rate = 0;
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   /* Bits [2:3] = VRS rate X
+    * Bits [4:5] = VRS rate Y
+    *
+    * The range is [-2, 1]. Values:
+    *   1: 2x coarser shading rate in that direction.
+    *   0: normal shading rate
+    *  -1: 2x finer shading rate (sample shading, not directional)
+    *  -2: 4x finer shading rate (sample shading, not directional)
+    *
+    * Sample shading can't go above 8 samples, so both numbers can't be -2
+    * at the same time.
+    */
+   switch (device->force_vrs) {
+   case RADV_FORCE_VRS_2x2:
+      vrs_rate = (1u << 2) | (1u << 4);
+      break;
+   case RADV_FORCE_VRS_2x1:
+      vrs_rate = (1u << 2) | (0u << 4);
+      break;
+   case RADV_FORCE_VRS_1x2:
+      vrs_rate = (0u << 2) | (1u << 4);
+      break;
+   default:
+      unreachable("Invalid RADV_FORCE_VRS value");
+   }
+
+   nir_foreach_block_reverse(block, impl) {
+      nir_foreach_instr_reverse(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         if (intr->intrinsic != nir_intrinsic_store_deref)
+            continue;
+
+         nir_variable *var = nir_intrinsic_get_var(intr, 0);
+         if (var->data.mode != nir_var_shader_out ||
+             var->data.location != VARYING_SLOT_POS)
+            continue;
+
+         b.cursor = nir_after_instr(instr);
+
+         nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intr->src[1].ssa, 3);
+
+         /* Use coarse shading if the value of Pos.W can't be determined or if its value is != 1
+          * (typical for non-GUI elements).
+          */
+         if (!nir_ssa_scalar_is_const(scalar_idx) ||
+             nir_ssa_scalar_as_uint(scalar_idx) != 0x3f800000u) {
+
+            var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), "vrs rate");
+            var->data.location = VARYING_SLOT_PRIMITIVE_SHADING_RATE;
+            var->data.interpolation = INTERP_MODE_NONE;
+
+            nir_ssa_def *pos_w = nir_channel(&b, intr->src[1].ssa, 3);
+            nir_ssa_def *val = nir_bcsel(&b, nir_fneu(&b, pos_w, nir_imm_float(&b, 1.0f)),
+                                             nir_imm_int(&b, vrs_rate), nir_imm_int(&b, 0));
+
+            nir_deref_instr *deref = nir_build_deref_var(&b, var);
+            nir_store_deref(&b, deref, val, 0x1);
+
+            /* Update outputs_written to reflect that the pass added a new output. */
+            nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE);
+
+            progress = true;
+            if (nir->info.stage == MESA_SHADER_VERTEX)
+               return progress;
+         }
+      }
+   }
+
+   return progress;
+}
+
 nir_shader *
 radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
                            const char *entrypoint_name, gl_shader_stage stage,
@@ -802,6 +886,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
         nir->info.stage == MESA_SHADER_GEOMETRY ||
         nir->info.stage == MESA_SHADER_MESH) &&
        nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
+      /* Lower primitive shading rate to match HW requirements. */
       NIR_PASS_V(nir, radv_lower_primitive_shading_rate);
    }
 
@@ -1868,20 +1953,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
    options->debug.func = radv_compiler_debug;
    options->debug.private_data = &debug_data;
 
-   switch (options->key.ps.force_vrs) {
-   case RADV_FORCE_VRS_2x2:
-      options->force_vrs_rates = (1u << 2) | (1u << 4);
-      break;
-   case RADV_FORCE_VRS_2x1:
-      options->force_vrs_rates = (1u << 2) | (0u << 4);
-      break;
-   case RADV_FORCE_VRS_1x2:
-      options->force_vrs_rates = (0u << 2) | (1u << 4);
-      break;
-   default:
-      break;
-   }
-
    struct radv_shader_args args = {0};
    args.is_gs_copy_shader = gs_copy_shader;
    args.is_trap_handler_shader = trap_handler_shader;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 3c5f0996919..7e477a020db 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -127,7 +127,6 @@ struct radv_nir_compiler_options {
    enum chip_class chip_class;
    const struct radeon_info *info;
    uint32_t address32_hi;
-   uint8_t force_vrs_rates;
 
    struct {
       void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
@@ -663,4 +662,6 @@ bool radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
 
 void radv_get_nir_options(struct radv_physical_device *device);
 
+bool radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device);
+
 #endif
diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c
index e0e2bc3940b..03f6df238d3 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -678,15 +678,10 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
             outinfo->writes_layer = true;
       }
 
-      /* VS/TES/GS: shading rate is per-vertex, MS: it's per-primitive. */
-      bool force_vrs_per_vertex =
-         device->force_vrs != RADV_FORCE_VRS_NONE && nir->info.stage != MESA_SHADER_MESH;
-      bool writes_primitive_shading_rate =
-         outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
       int pos_written = 0x1;
 
       if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
-          writes_primitive_shading_rate)
+          outinfo->writes_primitive_shading_rate)
          pos_written |= 1 << 1;
 
       unsigned num_clip_distances = util_bitcount(outinfo->clip_dist_mask);



More information about the mesa-commit mailing list