Mesa (main): radv,aco,ac/llvm: use nir_op_f{sin,cos}_amd
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Jul 7 23:01:08 UTC 2022
Module: Mesa
Branch: main
Commit: 48578713b78e20bd1706cf65838fa6586ba35a63
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=48578713b78e20bd1706cf65838fa6586ba35a63
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Mon May 3 11:10:06 2021 +0100
radv,aco,ac/llvm: use nir_op_f{sin,cos}_amd
This lets NIR optimize the multiplication, particularly sin/cos(a * #b).
fossil-db (Sienna Cichlid):
Totals from 12306 (7.58% of 162293) affected shaders:
MaxWaves: 224814 -> 224834 (+0.01%)
Instrs: 17365273 -> 17338758 (-0.15%); split: -0.16%, +0.00%
CodeSize: 93478488 -> 93354912 (-0.13%); split: -0.14%, +0.01%
VGPRs: 752080 -> 752072 (-0.00%); split: -0.00%, +0.00%
SpillSGPRs: 8440 -> 8410 (-0.36%)
Latency: 200402154 -> 200279405 (-0.06%); split: -0.06%, +0.00%
InvThroughput: 37588077 -> 37545545 (-0.11%); split: -0.11%, +0.00%
VClause: 293863 -> 293874 (+0.00%); split: -0.03%, +0.03%
SClause: 619539 -> 619064 (-0.08%); split: -0.09%, +0.01%
Copies: 1151591 -> 1151641 (+0.00%); split: -0.04%, +0.05%
Branches: 506434 -> 506437 (+0.00%); split: -0.00%, +0.00%
PreSGPRs: 877609 -> 877517 (-0.01%); split: -0.01%, +0.00%
PreVGPRs: 711938 -> 711940 (+0.00%); split: -0.00%, +0.00%
fossil-db (LLVM, Sienna Cichlid):
Totals from 4377 (3.59% of 121873) affected shaders:
SGPRs: 358960 -> 359176 (+0.06%); split: -0.18%, +0.25%
VGPRs: 319832 -> 319720 (-0.04%); split: -0.18%, +0.15%
SpillSGPRs: 46983 -> 47007 (+0.05%); split: -0.99%, +1.04%
CodeSize: 30872812 -> 30764512 (-0.35%); split: -0.39%, +0.04%
MaxWaves: 73814 -> 73904 (+0.12%); split: +0.25%, -0.13%
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10587>
---
src/amd/compiler/aco_instruction_selection.cpp | 19 +++++++------------
src/amd/compiler/aco_instruction_selection_setup.cpp | 4 ++--
src/amd/llvm/ac_nir_to_llvm.c | 10 ++++++++++
src/amd/vulkan/radv_shader.c | 17 +++++++++++++++++
4 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 34e82a11b7f..442b187fc8f 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2750,27 +2750,22 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
break;
}
- case nir_op_fsin:
- case nir_op_fcos: {
+ case nir_op_fsin_amd:
+ case nir_op_fcos_amd: {
Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
aco_ptr<Instruction> norm;
if (dst.regClass() == v2b) {
- Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3118u));
- Temp tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), half_pi, src);
aco_opcode opcode =
- instr->op == nir_op_fsin ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
- bld.vop1(opcode, Definition(dst), tmp);
+ instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
+ bld.vop1(opcode, Definition(dst), src);
} else if (dst.regClass() == v1) {
- Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3e22f983u));
- Temp tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, src);
-
/* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
if (ctx->options->gfx_level < GFX9)
- tmp = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), tmp);
+ src = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), src);
aco_opcode opcode =
- instr->op == nir_op_fsin ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
- bld.vop1(opcode, Definition(dst), tmp);
+ instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
+ bld.vop1(opcode, Definition(dst), src);
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 99ebec2031e..719c5b8b978 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -501,8 +501,8 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_fceil:
case nir_op_ftrunc:
case nir_op_fround_even:
- case nir_op_fsin:
- case nir_op_fcos:
+ case nir_op_fsin_amd:
+ case nir_op_fcos_amd:
case nir_op_f2f16:
case nir_op_f2f16_rtz:
case nir_op_f2f16_rtne:
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index fb09193443b..53f6242ae33 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -854,6 +854,16 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
result =
emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
+ case nir_op_fsin_amd:
+ case nir_op_fcos_amd:
+ /* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
+ if (ctx->ac.gfx_level < GFX9)
+ src[0] = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ result =
+ emit_intrin_1f_param(&ctx->ac, instr->op == nir_op_fsin_amd ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
case nir_op_fsqrt:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 3977ac031bb..afe76d6e65e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -639,6 +639,21 @@ radv_lower_ms_workgroup_id(nir_shader *nir)
return progress;
}
+static bool
+is_sincos(const nir_instr *instr, const void *_)
+{
+ return instr->type == nir_instr_type_alu &&
+ (nir_instr_as_alu(instr)->op == nir_op_fsin || nir_instr_as_alu(instr)->op == nir_op_fcos);
+}
+
+static nir_ssa_def *
+lower_sincos(struct nir_builder *b, nir_instr *instr, void *_)
+{
+ nir_alu_instr *sincos = nir_instr_as_alu(instr);
+ nir_ssa_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
+ return sincos->op == nir_op_fsin ? nir_fsin_amd(b, src) : nir_fcos_amd(b, src);
+}
+
nir_shader *
radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_stage *stage,
const struct radv_pipeline_key *key)
@@ -849,6 +864,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
}
NIR_PASS(_, nir, nir_lower_doubles, NULL, lower_doubles);
+
+ NIR_PASS(_, nir, nir_shader_lower_instructions, &is_sincos, &lower_sincos, NULL);
}
NIR_PASS(_, nir, nir_lower_system_values);
More information about the mesa-commit
mailing list