Mesa (main): gallivm/nir: Add a short circuit uniform-offset mode for load_ssbo/load_shared.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 6 00:37:10 UTC 2022


Module: Mesa
Branch: main
Commit: 591899eedd2d797bfdb7dbb6b3e2169d50d42109
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=591899eedd2d797bfdb7dbb6b3e2169d50d42109

Author: Emma Anholt <emma at anholt.net>
Date:   Thu Feb 10 15:05:19 2022 -0800

gallivm/nir: Add a short circuit uniform-offset mode for load_ssbo/load_shared.

dEQP-VK.binding_model.buffer_device_address.set3.depth3.basessbo.convertuvec2.nostore.multi.scalar.vert
runtime -24.4002% +/- 1.94375% (n=7).  The win (I think) is in LLVM not
having to chew through handling the extra loops on every constant-offset
SSBO load, not in actual rendering time.

Reviewed-by: Dave Airlie <airlied at redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14999>

---

 src/gallium/auxiliary/gallivm/lp_bld_nir.c     |  6 +++--
 src/gallium/auxiliary/gallivm/lp_bld_nir.h     |  1 +
 src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c | 37 ++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
index 2b5b45d8b93..d7741abe7c1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -1397,8 +1397,9 @@ static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
 {
    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
+   bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[0]) && nir_src_is_always_uniform(instr->src[1]);
    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
-                       idx, offset, result);
+                      index_and_offset_are_uniform, idx, offset, result);
 }
 
 static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
@@ -1634,8 +1635,9 @@ static void visit_shared_load(struct lp_build_nir_context *bld_base,
                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
+   bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
-                      NULL, offset, result);
+                      offset_is_uniform, NULL, offset, result);
 }
 
 static void visit_shared_store(struct lp_build_nir_context *bld_base,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h
index 895897e4bc8..a71181f9ddb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h
@@ -111,6 +111,7 @@ struct lp_build_nir_context
    /* for SSBO and shared memory */
    void (*load_mem)(struct lp_build_nir_context *bld_base,
                     unsigned nc, unsigned bit_size,
+                    bool index_and_offset_are_uniform,
                     LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
    void (*store_mem)(struct lp_build_nir_context *bld_base,
                      unsigned writemask, unsigned nc, unsigned bit_size,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
index b0ffe89250a..558245ca641 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
@@ -1143,6 +1143,7 @@ mem_access_base_pointer(struct lp_build_nir_context *bld_base,
 static void emit_load_mem(struct lp_build_nir_context *bld_base,
                           unsigned nc,
                           unsigned bit_size,
+                          bool index_and_offset_are_uniform,
                           LLVMValueRef index,
                           LLVMValueRef offset,
                           LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
@@ -1158,6 +1159,42 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base,
 
    offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
 
+   /* If the address is uniform, then use the address from invocation 0 to load,
+    * and broadcast to all invocations.
+    */
+   if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
+      LLVMValueRef ssbo_limit;
+      LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, index,
+                                                     lp_build_const_int32(gallivm, 0), &ssbo_limit);
+
+      offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
+
+      for (unsigned c = 0; c < nc; c++) {
+         LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
+
+         LLVMValueRef scalar;
+         /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
+         if (ssbo_limit) {
+            LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size);
+            LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
+            LLVMBuildStore(builder, zero, res_store);
+
+            LLVMValueRef fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, ssbo_limit, chan_offset, "");
+            struct lp_build_if_state ifthen;
+            lp_build_if(&ifthen, gallivm, fetch_cond);
+            LLVMBuildStore(builder, lp_build_pointer_get(builder, mem_ptr, chan_offset), res_store);
+            lp_build_endif(&ifthen);
+
+            scalar = LLVMBuildLoad(builder, res_store, "");
+         } else {
+            scalar = lp_build_pointer_get(builder, mem_ptr, chan_offset);
+         }
+
+         outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
+      }
+      return;
+   }
+
    /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
 
    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];



More information about the mesa-commit mailing list