Mesa (main): radv: rewrite RADV_FORCE_VRS directly in NIR
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Feb 9 19:41:19 UTC 2022
Module: Mesa
Branch: main
Commit: 2451290bc479b419874eb3ba2ab561a660157bba
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2451290bc479b419874eb3ba2ab561a660157bba
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Tue Jul 13 13:29:57 2021 +0200
radv: rewrite RADV_FORCE_VRS directly in NIR
This introduces a small NIR pass that exports
VARYING_SLOT_PRIMITIVE_SHADING_RATE if RADV_FORCE_VRS is used,
instead of doing this in both backend compilers.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14907>
---
src/amd/compiler/aco_instruction_selection.cpp | 29 +-------
src/amd/vulkan/radv_nir_to_llvm.c | 28 +-------
src/amd/vulkan/radv_pipeline.c | 43 ++++++++---
src/amd/vulkan/radv_shader.c | 99 ++++++++++++++++++++++----
src/amd/vulkan/radv_shader.h | 3 +-
src/amd/vulkan/radv_shader_info.c | 7 +-
6 files changed, 123 insertions(+), 86 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 851defefcec..b8b64fc1a00 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10746,30 +10746,6 @@ export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos,
if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) {
exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]);
exp->enabled_mask |= 0x2;
- } else if (ctx->options->force_vrs_rates) {
- /* Bits [2:3] = VRS rate X
- * Bits [4:5] = VRS rate Y
- *
- * The range is [-2, 1]. Values:
- * 1: 2x coarser shading rate in that direction.
- * 0: normal shading rate
- * -1: 2x finer shading rate (sample shading, not directional)
- * -2: 4x finer shading rate (sample shading, not directional)
- *
- * Sample shading can't go above 8 samples, so both numbers can't be -2
- * at the same time.
- */
- Builder bld(ctx->program, ctx->block);
- Temp rates = bld.copy(bld.def(v1), Operand::c32((unsigned)ctx->options->force_vrs_rates));
-
- /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
- Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), Operand::c32(0x3f800000u),
- Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3]));
- rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
- bld.copy(bld.def(v1), Operand::zero()), rates, cond);
-
- exp->operands[1] = Operand(rates);
- exp->enabled_mask |= 0x2;
}
exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
@@ -10818,11 +10794,8 @@ create_vs_exports(isel_context* ctx)
int next_pos = 0;
export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
- bool force_vrs_per_vertex = ctx->options->force_vrs_rates && ctx->stage != mesh_ngg;
- bool writes_primitive_shading_rate =
- outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
- writes_primitive_shading_rate) {
+ outinfo->writes_primitive_shading_rate) {
export_vs_psiz_layer_viewport_vrs(ctx, &next_pos, outinfo);
}
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 6aaa6233846..c3779f701e3 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -1257,13 +1257,10 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
}
- bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
- ctx->options->force_vrs_rates;
-
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
- outinfo->writes_viewport_index || writes_primitive_shading_rate) {
+ outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) {
pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
- (writes_primitive_shading_rate == true ? 2 : 0) |
+ (outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
(outinfo->writes_layer == true ? 4 : 0));
pos_args[1].valid_mask = 0;
pos_args[1].done = 0;
@@ -1298,27 +1295,6 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
if (outinfo->writes_primitive_shading_rate) {
pos_args[1].out[1] = primitive_shading_rate;
- } else if (ctx->options->force_vrs_rates) {
- /* Bits [2:3] = VRS rate X
- * Bits [4:5] = VRS rate Y
- *
- * The range is [-2, 1]. Values:
- * 1: 2x coarser shading rate in that direction.
- * 0: normal shading rate
- * -1: 2x finer shading rate (sample shading, not directional)
- * -2: 4x finer shading rate (sample shading, not directional)
- *
- * Sample shading can't go above 8 samples, so both numbers can't be -2 at the same time.
- */
- LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->options->force_vrs_rates, false);
- LLVMValueRef cond;
- LLVMValueRef v;
-
- /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
- cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE, pos_args[0].out[3], ctx->ac.f32_1, "");
- v = LLVMBuildSelect(ctx->ac.builder, cond, rates, ctx->ac.i32_0, "");
-
- pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
}
}
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index c9bb6712791..dbb75dcd6f9 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3629,6 +3629,26 @@ radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
return VK_SUCCESS;
}
+static bool
+radv_consider_force_vrs(const struct radv_pipeline *pipeline, nir_shader **nir)
+{
+ struct radv_device *device = pipeline->device;
+
+ if (device->force_vrs == RADV_FORCE_VRS_NONE)
+ return false;
+
+ /* Only VS and GS are supported for now. */
+ if (pipeline->graphics.last_vgt_api_stage != MESA_SHADER_VERTEX &&
+ pipeline->graphics.last_vgt_api_stage != MESA_SHADER_GEOMETRY)
+ return false;
+
+ nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
+ if (last_vgt_shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
+ return false;
+
+ return true;
+}
+
VkResult
radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
struct radv_device *device, struct radv_pipeline_cache *cache,
@@ -3729,6 +3749,14 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
radv_stop_feedback(stage_feedbacks[i], false);
}
+ /* Force per-vertex VRS. */
+ if (radv_consider_force_vrs(pipeline, nir)) {
+ assert(pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX ||
+ pipeline->graphics.last_vgt_api_stage == MESA_SHADER_GEOMETRY);
+ nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
+ NIR_PASS_V(last_vgt_shader, radv_force_primitive_shading_rate, device);
+ }
+
bool optimize_conservatively = pipeline_key->optimisations_disabled;
radv_link_shaders(pipeline, pipeline_key, nir, optimize_conservatively);
@@ -4738,10 +4766,8 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
- bool writes_primitive_shading_rate =
- outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
- outinfo->writes_viewport_index || writes_primitive_shading_rate;
+ outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
unsigned spi_vs_out_config, nparams;
/* VS is required to export at least one param. */
@@ -4768,7 +4794,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
- S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+ S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
@@ -4857,13 +4883,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
- /* Primitive shading rate is written as a per-primitive output in mesh shaders. */
- bool force_vrs_per_vertex =
- pipeline->device->force_vrs != RADV_FORCE_VRS_NONE && es_type != MESA_SHADER_MESH;
- bool writes_primitive_shading_rate =
- outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
- outinfo->writes_viewport_index || writes_primitive_shading_rate;
+ outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
bool break_wave_at_eoi = false;
unsigned ge_cntl;
@@ -4906,7 +4927,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
- S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+ S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 8514c4702af..efd478a9926 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -458,6 +458,90 @@ radv_lower_primitive_shading_rate(nir_shader *nir)
return progress;
}
+bool
+radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ bool progress = false;
+ unsigned vrs_rate = 0;
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ /* Bits [2:3] = VRS rate X
+ * Bits [4:5] = VRS rate Y
+ *
+ * The range is [-2, 1]. Values:
+ * 1: 2x coarser shading rate in that direction.
+ * 0: normal shading rate
+ * -1: 2x finer shading rate (sample shading, not directional)
+ * -2: 4x finer shading rate (sample shading, not directional)
+ *
+ * Sample shading can't go above 8 samples, so both numbers can't be -2
+ * at the same time.
+ */
+ switch (device->force_vrs) {
+ case RADV_FORCE_VRS_2x2:
+ vrs_rate = (1u << 2) | (1u << 4);
+ break;
+ case RADV_FORCE_VRS_2x1:
+ vrs_rate = (1u << 2) | (0u << 4);
+ break;
+ case RADV_FORCE_VRS_1x2:
+ vrs_rate = (0u << 2) | (1u << 4);
+ break;
+ default:
+ unreachable("Invalid RADV_FORCE_VRS value");
+ }
+
+ nir_foreach_block_reverse(block, impl) {
+ nir_foreach_instr_reverse(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_deref)
+ continue;
+
+ nir_variable *var = nir_intrinsic_get_var(intr, 0);
+ if (var->data.mode != nir_var_shader_out ||
+ var->data.location != VARYING_SLOT_POS)
+ continue;
+
+ b.cursor = nir_after_instr(instr);
+
+ nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intr->src[1].ssa, 3);
+
+ /* Use coarse shading if the value of Pos.W can't be determined or if its value is != 1
+ * (typical for non-GUI elements).
+ */
+ if (!nir_ssa_scalar_is_const(scalar_idx) ||
+ nir_ssa_scalar_as_uint(scalar_idx) != 0x3f800000u) {
+
+ var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), "vrs rate");
+ var->data.location = VARYING_SLOT_PRIMITIVE_SHADING_RATE;
+ var->data.interpolation = INTERP_MODE_NONE;
+
+ nir_ssa_def *pos_w = nir_channel(&b, intr->src[1].ssa, 3);
+ nir_ssa_def *val = nir_bcsel(&b, nir_fneu(&b, pos_w, nir_imm_float(&b, 1.0f)),
+ nir_imm_int(&b, vrs_rate), nir_imm_int(&b, 0));
+
+ nir_deref_instr *deref = nir_build_deref_var(&b, var);
+ nir_store_deref(&b, deref, val, 0x1);
+
+ /* Update outputs_written to reflect that the pass added a new output. */
+ nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE);
+
+ progress = true;
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ return progress;
+ }
+ }
+ }
+
+ return progress;
+}
+
nir_shader *
radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
const char *entrypoint_name, gl_shader_stage stage,
@@ -802,6 +886,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
nir->info.stage == MESA_SHADER_GEOMETRY ||
nir->info.stage == MESA_SHADER_MESH) &&
nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
+ /* Lower primitive shading rate to match HW requirements. */
NIR_PASS_V(nir, radv_lower_primitive_shading_rate);
}
@@ -1868,20 +1953,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
options->debug.func = radv_compiler_debug;
options->debug.private_data = &debug_data;
- switch (options->key.ps.force_vrs) {
- case RADV_FORCE_VRS_2x2:
- options->force_vrs_rates = (1u << 2) | (1u << 4);
- break;
- case RADV_FORCE_VRS_2x1:
- options->force_vrs_rates = (1u << 2) | (0u << 4);
- break;
- case RADV_FORCE_VRS_1x2:
- options->force_vrs_rates = (0u << 2) | (1u << 4);
- break;
- default:
- break;
- }
-
struct radv_shader_args args = {0};
args.is_gs_copy_shader = gs_copy_shader;
args.is_trap_handler_shader = trap_handler_shader;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 3c5f0996919..7e477a020db 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -127,7 +127,6 @@ struct radv_nir_compiler_options {
enum chip_class chip_class;
const struct radeon_info *info;
uint32_t address32_hi;
- uint8_t force_vrs_rates;
struct {
void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
@@ -663,4 +662,6 @@ bool radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
void radv_get_nir_options(struct radv_physical_device *device);
+bool radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device);
+
#endif
diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c
index e0e2bc3940b..03f6df238d3 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -678,15 +678,10 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
outinfo->writes_layer = true;
}
- /* VS/TES/GS: shading rate is per-vertex, MS: it's per-primitive. */
- bool force_vrs_per_vertex =
- device->force_vrs != RADV_FORCE_VRS_NONE && nir->info.stage != MESA_SHADER_MESH;
- bool writes_primitive_shading_rate =
- outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
int pos_written = 0x1;
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
- writes_primitive_shading_rate)
+ outinfo->writes_primitive_shading_rate)
pos_written |= 1 << 1;
unsigned num_clip_distances = util_bitcount(outinfo->clip_dist_mask);
More information about the mesa-commit
mailing list