Mesa (master): r600/sfn; go back to not lowering uniforms to UBOs

Mon Sep 28 18:23:24 UTC 2020

Module: Mesa
Branch: master
Commit: 6784cea646b0d5f7dde4827b52ea269f53210d67
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6784cea646b0d5f7dde4827b52ea269f53210d67

Author: Gert Wollny <gert.wollny at collabora.com>
Date:   Sat Sep 26 20:14:00 2020 +0200

r600/sfn; go back to not lowering uniforms to UBOs

Lowering uniforms to UBOs results in an aditional iadd for the
UBO buffer id evaluation, and for indirect buffers access that
results in an unnecessary op that can be avoided by not lowering
uniforms. There is some code duplication when reading the uniforms
but it saves a whole instruction group per indirect cont buffer
access.

This reverts commit 98eb00face93b9af5aac19008ecff5a2bf376745 with
some additional fixes.

Signed-off-by: Gert Wollny <gert.wollny at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6879>

---

 src/gallium/drivers/r600/r600_pipe_common.c      |  2 -
 src/gallium/drivers/r600/sfn/sfn_shader_base.cpp | 51 +++++++++++++++++++++---
 src/gallium/drivers/r600/sfn/sfn_shader_base.h   |  1 +
 src/gallium/drivers/r600/sfn/sfn_value.cpp       |  2 +-
 4 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index eab7ce91dd7..9dc00cf1aef 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1201,7 +1201,6 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
 	.vectorize_io = true,
 	.has_umad24 = true,
 	.has_umul24 = true,
-        .lower_uniforms_to_ubo = true
 };
 
 const struct nir_shader_compiler_options r600_nir_options = {
@@ -1224,7 +1223,6 @@ const struct nir_shader_compiler_options r600_nir_options = {
 	.vectorize_io = true,
 	.has_umad24 = true,
 	.has_umul24 = true,
-        .lower_uniforms_to_ubo = true
 };
 
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
index d6ade30d4de..d288b96e09a 100644
--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
@@ -657,6 +657,8 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
       return emit_load_scratch(instr);
    case nir_intrinsic_store_deref:
       return emit_store_deref(instr);
+   case nir_intrinsic_load_uniform:
+      return load_uniform(instr);
    case nir_intrinsic_discard:
    case nir_intrinsic_discard_if:
       return emit_discard_if(instr);
@@ -868,7 +870,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
          for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
             int cmp = buf_cmp + i;
             assert(cmp < 4);
-            auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32));
+            auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
             if (instr->dest.is_ssa)
                load_preloaded_value(instr->dest, i, u);
             else {
@@ -881,7 +883,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
          return true;
 
       } else {
-         return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32);
+         return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32 + 1);
       }
    } else {
       if (buf_offset) {
@@ -915,7 +917,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
       }
 
       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
-                                     0, bufid, bim_zero);
+                                     1, bufid, bim_zero);
       ir->set_dest_swizzle(swz);
 
       emit_instruction(ir);
@@ -952,6 +954,46 @@ bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
    return do_emit_load_deref(var, instr);
 }
 
+bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
+{
+   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "'\n";
+
+
+   /* If the target register is a SSA register and the loading is not
+    * indirect then we can do lazy loading, i.e. the uniform value can
+    * be used directly. Otherwise we have to load the data for real
+    * rigt away.
+    */
+   auto literal = nir_src_as_const_value(instr->src[0]);
+   int base = nir_intrinsic_base(instr);
+
+   if (literal) {
+      AluInstruction *ir = nullptr;
+
+      for (int i = 0; i < instr->num_components ; ++i) {
+         PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
+         sfn_log << SfnLog::io << "uniform "
+                 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
+
+         if (instr->dest.is_ssa)
+            load_preloaded_value(instr->dest, i, u);
+         else {
+            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
+                                                   u, {alu_write});
+             emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   } else {
+      PValue addr = from_nir(instr->src[0], 0, 0);
+      return load_uniform_indirect(instr, addr, 16 * base, 0);
+   }
+   return true;
+}
+
 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 {
    if (!addr) {
@@ -963,7 +1005,7 @@ bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, P
    std::array<int, 4> swz = {7,7,7,7};
    for (int i = 0; i < 4; ++i) {
       trgt.set_reg_i(i, from_nir(instr->dest, i));
-      swz[i] = i + nir_intrinsic_component(instr);
+      swz[i] = i;
    }
 
    if (addr->type() != Value::gpr) {
@@ -971,7 +1013,6 @@ bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, P
       addr = trgt.reg_i(0);
    }
 
-   /* FIXME: buffer index and index mode are not set correctly */
    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
                                   bufferid, PValue(), bim_none);
    ir->set_dest_swizzle(swz);
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h
index 309493f48e6..50a0b9e8a51 100644
--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.h
@@ -166,6 +166,7 @@ private:
 
    bool emit_store_deref(nir_intrinsic_instr* instr);
 
+   bool load_uniform(nir_intrinsic_instr* instr);
    bool process_uniforms(nir_variable *uniform);
    bool process_inputs(nir_variable *input);
    bool process_outputs(nir_variable *output);
diff --git a/src/gallium/drivers/r600/sfn/sfn_value.cpp b/src/gallium/drivers/r600/sfn/sfn_value.cpp
index 3a5a3ce7129..cdd1367461d 100644
--- a/src/gallium/drivers/r600/sfn/sfn_value.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_value.cpp
@@ -210,7 +210,7 @@ UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank):
 UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr):
    Value(Value::kconst, chan),
    m_index(sel),
-   m_kcache_bank(0),
+   m_kcache_bank(1),
    m_addr(addr)
 {