Mesa (main): aco: return 0x76543210 for NULL FMASK fetch
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Oct 7 17:52:54 UTC 2021
Module: Mesa
Branch: main
Commit: bf0cc05227d0a4a90f4bec13e826123158c2a27b
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bf0cc05227d0a4a90f4bec13e826123158c2a27b
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Wed Aug 4 16:13:47 2021 +0100
aco: return 0x76543210 for NULL FMASK fetch
This can replace several v_cndmask_b32 with a single v_cndmask_b32, and
will be useful when we lower sample index adjustment in NIR.
fossil-db (Sienna Cichlid):
Totals from 955 (0.64% of 150170) affected shaders:
VGPRs: 53232 -> 53208 (-0.05%)
CodeSize: 4712548 -> 4722016 (+0.20%); split: -0.02%, +0.23%
MaxWaves: 19052 -> 19056 (+0.02%)
Instrs: 875891 -> 875619 (-0.03%); split: -0.04%, +0.00%
Latency: 14070164 -> 14069089 (-0.01%); split: -0.02%, +0.01%
InvThroughput: 2322982 -> 2321419 (-0.07%); split: -0.08%, +0.01%
VClause: 23070 -> 23080 (+0.04%); split: -0.00%, +0.05%
SClause: 32463 -> 32426 (-0.11%); split: -0.12%, +0.01%
Copies: 42840 -> 42787 (-0.12%); split: -0.19%, +0.07%
Branches: 17907 -> 17900 (-0.04%); split: -0.06%, +0.02%
PreSGPRs: 43585 -> 43229 (-0.82%)
PreVGPRs: 47676 -> 47625 (-0.11%)
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12214>
---
src/amd/compiler/aco_instruction_selection.cpp | 56 +++++++++++++++++---------
1 file changed, 36 insertions(+), 20 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 2f04619a7a3..e1915ce40c6 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6025,6 +6025,17 @@ adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& c
load->da = da;
load->dim = dim;
+ /* Don't adjust the sample index if WORD1.DATA_FORMAT of the FMASK
+ * resource descriptor is 0 (invalid),
+ */
+ Temp is_not_null = bld.tmp(bld.lm);
+ bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
+ emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
+ .def(0)
+ .setHint(vcc);
+ fmask =
+ bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0x76543210), fmask, is_not_null);
+
Operand sample_index4;
if (sample_index.isConstant()) {
if (sample_index.constantValue() < 16) {
@@ -6041,28 +6052,12 @@ adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& c
bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), sample_index);
}
- Temp final_sample;
if (sample_index4.isConstant() && sample_index4.constantValue() == 0)
- final_sample = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
+ return bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
else if (sample_index4.isConstant() && sample_index4.constantValue() == 28)
- final_sample = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
+ return bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
else
- final_sample =
- bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
-
- /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
- * resource descriptor is 0 (invalid),
- */
- Temp compare = bld.tmp(bld.lm);
- bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(compare), Operand::zero(),
- emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
- .def(0)
- .setHint(vcc);
-
- Temp sample_index_v = bld.copy(bld.def(v1), sample_index);
-
- /* Replace the MSAA sample index. */
- return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), sample_index_v, final_sample, compare);
+ return bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
}
static std::vector<Temp>
@@ -9556,7 +9551,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
dmask = 1 << instr->component;
if (tg4_integer_cube_workaround || dst.type() == RegType::sgpr)
tmp_dst = bld.tmp(instr->is_sparse ? v5 : v4);
- } else if (instr->op == nir_texop_samples_identical) {
+ } else if (instr->op == nir_texop_samples_identical ||
+ instr->op == nir_texop_fragment_mask_fetch_amd) {
tmp_dst = bld.tmp(v1);
} else if (util_bitcount(dmask) != instr->dest.ssa.num_components ||
dst.type() == RegType::sgpr) {
@@ -9758,6 +9754,26 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
bld.vopc(aco_opcode::v_cmp_eq_u32, Definition(dst), Operand::zero(), tmp_dst)
.def(0)
.setHint(vcc);
+ } else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
+ /* Use 0x76543210 if the image doesn't have FMASK. */
+ assert(dmask == 1 && dst.bytes() == 4);
+ assert(dst.id() != tmp_dst.id());
+
+ if (dst.regClass() == s1) {
+ Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
+ emit_extract_vector(ctx, resource, 1, s1));
+ bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
+ bld.as_uniform(tmp_dst), Operand::c32(0x76543210),
+ bld.scc(is_not_null));
+ } else {
+ Temp is_not_null = bld.tmp(bld.lm);
+ bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
+ emit_extract_vector(ctx, resource, 1, s1))
+ .def(0)
+ .setHint(vcc);
+ bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst),
+ bld.copy(bld.def(v1), Operand::c32(0x76543210)), tmp_dst, is_not_null);
+ }
} else {
expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
}
More information about the mesa-commit
mailing list