Mesa (main): aco: Keep VGPR destinations for uniform shared loads when beneficial.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jun 9 17:25:49 UTC 2021
Module: Mesa
Branch: main
Commit: b4e22eb4822d74a6e981c629ddff9bcd29b9a0ec
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b4e22eb4822d74a6e981c629ddff9bcd29b9a0ec
Author: Timur Kristóf <timur.kristof at gmail.com>
Date: Fri May 28 22:08:45 2021 +0200
aco: Keep VGPR destinations for uniform shared loads when beneficial.
When the result of these loads is only used by cross-lane instructions,
it is beneficial to use a VGPR destination. This is because this allows
to put the s_waitcnt further down, which decreases latency.
Signed-off-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Tony Wasserka <tony.wasserka at gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11072>
---
.../compiler/aco_instruction_selection_setup.cpp | 36 +++++++++++++++++++++-
1 file changed, 35 insertions(+), 1 deletion(-)
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 7141bbf2c93..09536cad5b6 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -89,6 +89,31 @@ is_block_reachable(nir_function_impl *impl, nir_block *known_reachable, nir_bloc
return false;
}
+bool
+only_used_by_readlane_or_phi(nir_dest *dest)
+{
+ nir_src *src = list_first_entry(&dest->ssa.uses, nir_src, use_link);
+
+ switch (src->parent_instr->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
+ if (alu->op == nir_op_unpack_64_2x32_split_x || alu->op == nir_op_unpack_64_2x32_split_y)
+ return only_used_by_readlane_or_phi(&alu->dest.dest);
+ return false;
+ }
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);
+ return intrin->intrinsic == nir_intrinsic_read_invocation ||
+ intrin->intrinsic == nir_intrinsic_read_first_invocation ||
+ intrin->intrinsic == nir_intrinsic_lane_permute_16_amd;
+ }
+ case nir_instr_type_phi:
+ return only_used_by_readlane_or_phi(&nir_instr_as_phi(src->parent_instr)->dest);
+ default:
+ return false;
+ }
+}
+
/* If one side of a divergent IF ends in a branch and the other doesn't, we
* might have to emit the contents of the side without the branch at the merge
* block instead. This is so that we can use any SGPR live-out of the side
@@ -830,6 +855,16 @@ void init_context(isel_context *ctx, nir_shader *shader)
case nir_intrinsic_bvh64_intersect_ray_amd:
type = RegType::vgpr;
break;
+ case nir_intrinsic_load_shared:
+ /* When the result of these loads is only used by cross-lane instructions,
+ * it is beneficial to use a VGPR destination. This is because this allows
+ * to put the s_waitcnt further down, which decreases latency.
+ */
+ if (only_used_by_readlane_or_phi(&intrinsic->dest)) {
+ type = RegType::vgpr;
+ break;
+ }
+ FALLTHROUGH;
case nir_intrinsic_shuffle:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
@@ -844,7 +879,6 @@ void init_context(isel_context *ctx, nir_shader *shader)
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_global:
case nir_intrinsic_vulkan_resource_index:
- case nir_intrinsic_load_shared:
case nir_intrinsic_get_ssbo_size:
type = nir_dest_is_divergent(intrinsic->dest) ? RegType::vgpr : RegType::sgpr;
break;
More information about the mesa-commit
mailing list