Mesa (main): radv: lower primitive shading rate in NIR
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jul 12 18:11:13 UTC 2021
Module: Mesa
Branch: main
Commit: ee79b87c62f0187daac3f8498db924429e0b5204
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee79b87c62f0187daac3f8498db924429e0b5204
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Thu Jun 24 15:59:45 2021 +0200
radv: lower primitive shading rate in NIR
This allows more potential compiler optimizations if the value is a
constant or from a scalar load.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11579>
---
src/amd/compiler/aco_instruction_selection.cpp | 28 +-----------
src/amd/vulkan/radv_nir_to_llvm.c | 25 +---------
src/amd/vulkan/radv_shader.c | 63 ++++++++++++++++++++++++++
3 files changed, 65 insertions(+), 51 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 403c785f5c6..2af31108aae 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10355,33 +10355,7 @@ static void export_vs_psiz_layer_viewport_vrs(isel_context *ctx, int *next_pos)
}
}
if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) {
- Builder bld(ctx->program, ctx->block);
- Temp cond;
-
- /* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
- Temp x_rate = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(12u),
- Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]));
- cond = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), Operand(x_rate));
- x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
- bld.copy(bld.def(v1), Operand(0u)),
- bld.copy(bld.def(v1), Operand(1u)), cond);
-
- /* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
- Temp y_rate = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(3u),
- Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]));
- cond = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), Operand(y_rate));
- y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
- bld.copy(bld.def(v1), Operand(0u)),
- bld.copy(bld.def(v1), Operand(1u)), cond);
-
- /* Bits [2:3] = VRS rate X
- * Bits [4:5] = VRS rate Y
- * HW shading rate = (xRate << 2) | (yRate << 4)
- */
- y_rate = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(4u), Operand(y_rate));
- Temp out = bld.vop3(aco_opcode::v_lshl_or_b32, bld.def(v1), Operand(x_rate), Operand(2u), Operand(y_rate));
-
- exp->operands[1] = Operand(out);
+ exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]);
exp->enabled_mask |= 0x2;
} else if (ctx->options->force_vrs_rates) {
/* Bits [2:3] = VRS rate X
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index caccff0b097..dff9f635756 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -1331,30 +1331,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
}
if (outinfo->writes_primitive_shading_rate) {
- LLVMValueRef v = ac_to_integer(&ctx->ac, primitive_shading_rate);
- LLVMValueRef cond;
-
- /* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
- LLVMValueRef x_rate =
- LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 4 | 8, false), "");
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, x_rate, ctx->ac.i32_0, "");
- x_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, "");
-
- /* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
- LLVMValueRef y_rate =
- LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 1 | 2, false), "");
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, y_rate, ctx->ac.i32_0, "");
- y_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, "");
-
- /* Bits [2:3] = VRS rate X
- * Bits [4:5] = VRS rate Y
- * HW shading rate = (xRate << 2) | (yRate << 4)
- */
- v = LLVMBuildOr(
- ctx->ac.builder,
- LLVMBuildShl(ctx->ac.builder, x_rate, LLVMConstInt(ctx->ac.i32, 2, false), ""),
- LLVMBuildShl(ctx->ac.builder, y_rate, LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
- pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
+ pos_args[1].out[1] = primitive_shading_rate;
} else if (ctx->args->options->force_vrs_rates) {
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 72d793480e7..f1cb00f27ca 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -369,6 +369,62 @@ lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key,
return progress;
}
+static bool
+radv_lower_primitive_shading_rate(nir_shader *nir)
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ bool progress = false;
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ /* Iterate in reverse order since there should be only one deref store to PRIMITIVE_SHADING_RATE
+ * after lower_io_to_temporaries for vertex shaders.
+ */
+ nir_foreach_block_reverse(block, impl) {
+ nir_foreach_instr_reverse(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_deref)
+ continue;
+
+ nir_variable *var = nir_intrinsic_get_var(intr, 0);
+ if (var->data.mode != nir_var_shader_out ||
+ var->data.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE)
+ continue;
+
+ b.cursor = nir_before_instr(instr);
+
+ nir_ssa_def *val = nir_ssa_for_src(&b, intr->src[1], 1);
+
+ /* x_rate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
+ nir_ssa_def *x_rate = nir_iand(&b, val, nir_imm_int(&b, 12));
+ x_rate = nir_b2i32(&b, nir_ine(&b, x_rate, nir_imm_int(&b, 0)));
+
+ /* y_rate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
+ nir_ssa_def *y_rate = nir_iand(&b, val, nir_imm_int(&b, 3));
+ y_rate = nir_b2i32(&b, nir_ine(&b, y_rate, nir_imm_int(&b, 0)));
+
+ /* Bits [2:3] = VRS rate X
+ * Bits [4:5] = VRS rate Y
+ * HW shading rate = (xRate << 2) | (yRate << 4)
+ */
+ nir_ssa_def *out = nir_ior(&b, nir_ishl(&b, x_rate, nir_imm_int(&b, 2)),
+ nir_ishl(&b, y_rate, nir_imm_int(&b, 4)));
+
+ nir_instr_rewrite_src(&intr->instr, &intr->src[1], nir_src_for_ssa(out));
+
+ progress = true;
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ return progress;
+ }
+ }
+
+ return progress;
+}
+
nir_shader *
radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
const char *entrypoint_name, gl_shader_stage stage,
@@ -699,6 +755,13 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
*/
NIR_PASS_V(nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
+ /* Lower primitive shading rate to match HW requirements. */
+ if ((nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_GEOMETRY) &&
+ nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
+ NIR_PASS_V(nir, radv_lower_primitive_shading_rate);
+ }
+
/* Indirect lowering must be called after the radv_optimize_nir() loop
* has been called at least once. Otherwise indirect lowering can
* bloat the instruction count of the loop and cause it to be
More information about the mesa-commit
mailing list