Mesa (main): gallivm/nir: Add a short circuit uniform-offset mode for load_global.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 6 00:37:10 UTC 2022


Module: Mesa
Branch: main
Commit: 181f25aff4054959ae89a4399f01fa860bd4a515
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=181f25aff4054959ae89a4399f01fa860bd4a515

Author: Emma Anholt <emma at anholt.net>
Date:   Thu Feb 10 14:29:13 2022 -0800

gallivm/nir: Add a short circuit uniform-offset mode for load_global.

If we know the offset is constant, we don't have ask LLVM to loop over the
elements pulling the same value out over and over.

This doesn't seem to have produced a win in the testcase I was looking at,
but it was an easier entrypoint to figuring out how to do scalar memory
access than load_memory, and will probably affect some workload.

Reviewed-by: Dave Airlie <airlied at redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14999>

---

 src/gallium/auxiliary/gallivm/lp_bld_nir.c     |  3 ++-
 src/gallium/auxiliary/gallivm/lp_bld_nir.h     |  1 +
 src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c | 16 ++++++++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
index 72243a18cd2..2b5b45d8b93 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -1695,9 +1695,10 @@ static void visit_load_global(struct lp_build_nir_context *bld_base,
                               nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
+   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
    bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
                          nir_src_bit_size(instr->src[0]),
-                         addr, result);
+                         offset_is_uniform, addr, result);
 }
 
 static void visit_store_global(struct lp_build_nir_context *bld_base,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h
index b68bf073111..895897e4bc8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h
@@ -91,6 +91,7 @@ struct lp_build_nir_context
    void (*load_global)(struct lp_build_nir_context *bld_base,
                        unsigned nc, unsigned bit_size,
                        unsigned offset_bit_size,
+                       bool offset_is_global,
                        LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
 
    void (*store_global)(struct lp_build_nir_context *bld_base,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
index de9565a9072..b0ffe89250a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
@@ -816,6 +816,7 @@ static void emit_load_global(struct lp_build_nir_context *bld_base,
                              unsigned nc,
                              unsigned bit_size,
                              unsigned addr_bit_size,
+                             bool offset_is_uniform,
                              LLVMValueRef addr,
                              LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
 {
@@ -827,6 +828,21 @@ static void emit_load_global(struct lp_build_nir_context *bld_base,
 
    res_bld = get_int_bld(bld_base, true, bit_size);
 
+   if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
+      /* If the offset is uniform, then use the address from invocation 0 to
+       * load, and broadcast to all invocations.
+       */
+      LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
+                                                      lp_build_const_int32(gallivm, 0), "");
+      addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
+
+      for (unsigned c = 0; c < nc; c++) {
+         LLVMValueRef scalar = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
+         outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
+      }
+      return;
+   }
+
    for (unsigned c = 0; c < nc; c++) {
       LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
       struct lp_build_loop_state loop_state;



More information about the mesa-commit mailing list