Mesa (master): r600/sfn: eliminate loading unused component loads from shared memory
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Mar 5 18:30:18 UTC 2021
Module: Mesa
Branch: master
Commit: f3aa2f15c20d5fa0767dce99b54f74a7eab07b2e
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3aa2f15c20d5fa0767dce99b54f74a7eab07b2e
Author: Gert Wollny <gert.wollny at collabora.com>
Date: Thu Mar 4 19:39:52 2021 +0100
r600/sfn: eliminate loading unused component loads from shared memory
LDS loads are quite expensive, so try to eliminate as many as possible
Signed-off-by: Gert Wollny <gert.wollny at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9416>
---
.../drivers/r600/sfn/sfn_nir_lower_tess_io.cpp | 103 ++++++++++++++++++---
1 file changed, 90 insertions(+), 13 deletions(-)
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
index 14d41f0cb9d..b1b86ffaaa7 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
@@ -131,22 +131,99 @@ static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
}
}
+static nir_ssa_def *load_offset_group_from_mask(nir_builder *b, uint32_t mask)
+{
+ auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
+ return nir_channels(b, full_mask, mask);
+}
+
+struct MaskQuery {
+ uint32_t mask;
+ uint32_t ssa_index;
+ nir_alu_instr *alu;
+ int index;
+ uint32_t full_mask;
+};
+
+static bool update_alu_mask(nir_src *src, void *data)
+{
+ auto mq = reinterpret_cast<MaskQuery *>(data);
+
+ if (mq->ssa_index == src->ssa->index) {
+ mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
+ }
+ ++mq->index;
+
+ return mq->mask != mq->full_mask;
+}
+
+static uint32_t get_dest_usee_mask(nir_intrinsic_instr *op)
+{
+ assert(op->dest.is_ssa);
+
+ MaskQuery mq = {0};
+ mq.full_mask = (1 << nir_dest_num_components(op->dest)) - 1;
+
+ nir_foreach_use(use_src, &op->dest.ssa) {
+ auto use_instr = use_src->parent_instr;
+ mq.ssa_index = use_src->ssa->index;
+
+ switch (use_instr->type) {
+ case nir_instr_type_alu: {
+ mq.alu = nir_instr_as_alu(use_instr);
+ mq.index = 0;
+ if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
+ return 0xf;
+ break;
+ }
+ case nir_instr_type_intrinsic: {
+ auto intr = nir_instr_as_intrinsic(use_instr);
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
+ break;
+ case nir_intrinsic_store_scratch:
+ case nir_intrinsic_store_local_shared_r600:
+ mq.mask |= nir_intrinsic_write_mask(intr);
+ break;
+ default:
+ return 0xf;
+ }
+ break;
+ }
+ default:
+ return 0xf;
+ }
+
+ }
+ return mq.mask;
+}
+
static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
{
- nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
- load_tcs_in->num_components = op->num_components;
- nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest,
- load_tcs_in->num_components, 32, NULL);
-
- nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components));
- if (nir_intrinsic_component(op))
- addr_outer = nir_iadd(b, addr_outer, nir_imm_int(b, 4 * nir_intrinsic_component(op)));
-
- load_tcs_in->src[0] = nir_src_for_ssa(addr_outer);
- nir_builder_instr_insert(b, &load_tcs_in->instr);
- nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa));
+ uint32_t mask = get_dest_usee_mask(op);
+ if (mask) {
+ nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
+ if (nir_intrinsic_component(op))
+ addr_outer = nir_iadd(b, addr_outer, nir_imm_int(b, 4 * nir_intrinsic_component(op)));
+
+ auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
+
+ auto undef = nir_ssa_undef(b, 1, 32);
+ int comps = nir_dest_num_components(op->dest);
+ nir_ssa_def *remix[4] = {undef, undef, undef, undef};
+
+ int chan = 0;
+ for (int i = 0; i < comps; ++i) {
+ if (mask & (1 << i)) {
+ remix[i] = nir_channel(b, new_load, chan++);
+ }
+ }
+ auto new_load_remixed = nir_vec(b, remix, comps);
+ nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(new_load_remixed));
+ }
nir_instr_remove(&op->instr);
-
}
static nir_ssa_def *
More information about the mesa-commit
mailing list