Mesa (main): nir_to_tgsi: Enable fdot_replicates flag.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jan 7 10:22:51 UTC 2022
Module: Mesa
Branch: main
Commit: 558a6006299544ee5f77843f094015c62558f4ad
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=558a6006299544ee5f77843f094015c62558f4ad
Author: Emma Anholt <emma at anholt.net>
Date: Tue Dec 14 14:35:03 2021 -0800
nir_to_tgsi: Enable fdot_replicates flag.
That's how the TGSI math opcodes work.
This lets lower_vec_to_regs coalesce the DP output into the .yzw channels,
giving an impressive shader-db win on softpipe:
total instructions in shared programs: 2929840 -> 2794036 (-4.64%)
instructions in affected programs: 1651438 -> 1515634 (-8.22%)
total temps in shared programs: 372730 -> 332744 (-10.73%)
temps in affected programs: 118151 -> 78165 (-33.84%)
and a minor one on r300:
total instructions in shared programs: 51238 -> 51149 (-0.17%)
instructions in affected programs: 2621 -> 2532 (-3.40%)
total vinst in shared programs: 15655 -> 15618 (-0.24%)
vinst in affected programs: 468 -> 431 (-7.91%)
total temps in shared programs: 9838 -> 9828 (-0.10%)
temps in affected programs: 59 -> 49 (-16.95%)
and a bigger one on i915g:
total instructions in shared programs: 398064 -> 395901 (-0.54%)
instructions in affected programs: 29271 -> 27108 (-7.39%)
total tex_indirect in shared programs: 12261 -> 12233 (-0.23%)
tex_indirect in affected programs: 98 -> 70 (-28.57%)
LOST: 0
GAINED: 5
The r300 change is less impressive because it does some backend copy-prop,
but also because intermediate storage of DPs now takes a vec4 instead of a
scalar.
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14200>
---
src/compiler/nir/nir_builder_opcodes_h.py | 5 +++++
src/gallium/auxiliary/nir/nir_to_tgsi.c | 4 ++++
src/gallium/drivers/i915/i915_screen.c | 2 ++
src/gallium/drivers/r300/r300_screen.c | 4 ++++
src/gallium/drivers/softpipe/sp_screen.c | 1 +
5 files changed, 16 insertions(+)
diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py
index 35e5ca7a506..7fc6af9c776 100644
--- a/src/compiler/nir/nir_builder_opcodes_h.py
+++ b/src/compiler/nir/nir_builder_opcodes_h.py
@@ -30,9 +30,13 @@ def src_decl_list(num_srcs):
def src_list(num_srcs):
return ', '.join('src' + str(i) for i in range(num_srcs))
+
+def needs_num_components(opcode):
+ return "replicated" in opcode.name
%>
% for name, opcode in sorted(opcodes.items()):
+% if not needs_num_components(opcode):
static inline nir_ssa_def *
nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
{
@@ -43,6 +47,7 @@ nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
return nir_build_alu_src_arr(build, nir_op_${name}, srcs);
% endif
}
+% endif
% endfor
% for name, opcode in sorted(INTR_OPCODES.items()):
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index 5ad01306fbd..e5097d78cac 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -858,6 +858,9 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
[nir_op_fdot2] = { TGSI_OPCODE_DP2 },
[nir_op_fdot3] = { TGSI_OPCODE_DP3 },
[nir_op_fdot4] = { TGSI_OPCODE_DP4 },
+ [nir_op_fdot2_replicated] = { TGSI_OPCODE_DP2 },
+ [nir_op_fdot3_replicated] = { TGSI_OPCODE_DP3 },
+ [nir_op_fdot4_replicated] = { TGSI_OPCODE_DP4 },
[nir_op_ffloor] = { TGSI_OPCODE_FLR, TGSI_OPCODE_DFLR },
[nir_op_ffract] = { TGSI_OPCODE_FRC, TGSI_OPCODE_DFRAC },
[nir_op_fceil] = { TGSI_OPCODE_CEIL, TGSI_OPCODE_DCEIL },
@@ -3191,6 +3194,7 @@ nir_to_tgsi(struct nir_shader *s,
}
static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_extract_byte = true,
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index d2ab19c4cda..75d57e1cbce 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -106,6 +106,7 @@ i915_get_name(struct pipe_screen *screen)
}
static const nir_shader_compiler_options i915_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */
.lower_extract_byte = true,
@@ -122,6 +123,7 @@ static const nir_shader_compiler_options i915_compiler_options = {
};
static const struct nir_shader_compiler_options gallivm_nir_options = {
+ .fdot_replicates = true,
.lower_bitops = true, /* required for !CAP_INTEGERS nir_to_tgsi */
.lower_scmp = true,
.lower_flrp32 = true,
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 27c6835e339..8d4f902722f 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -474,6 +474,7 @@ static int r300_get_video_param(struct pipe_screen *screen,
}
static const nir_shader_compiler_options r500_vs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
@@ -499,6 +500,7 @@ static const nir_shader_compiler_options r500_vs_compiler_options = {
};
static const nir_shader_compiler_options r500_fs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
@@ -525,6 +527,7 @@ static const nir_shader_compiler_options r500_fs_compiler_options = {
};
static const nir_shader_compiler_options r300_vs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
@@ -549,6 +552,7 @@ static const nir_shader_compiler_options r300_vs_compiler_options = {
};
static const nir_shader_compiler_options r300_fs_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_bitops = true,
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index c87d4067cfc..4984f608011 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -75,6 +75,7 @@ softpipe_get_name(struct pipe_screen *screen)
}
static const nir_shader_compiler_options sp_compiler_options = {
+ .fdot_replicates = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.lower_extract_byte = true,
More information about the mesa-commit
mailing list