[Mesa-dev] [PATCH 12/20] i965/fs/nir: Append nir_intrinsic_load_ubo entries to the gather table

Abdiel Janulgue abdiel.janulgue at linux.intel.com
Fri Sep 11 01:33:26 PDT 2015


When the const block and offset are immediate values. Otherwise just
fall-back to the previous method of uploading the UBO constant data to
GRF using pull constants.

Cc: kenneth at whitecape.org
Cc: jason at jlekstrand.net
Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         | 17 +++++++
 src/mesa/drivers/dri/i965/brw_fs.h           |  6 +++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp     | 68 ++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  6 ++-
 4 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e39d821..ad084af 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1865,6 +1865,7 @@ fs_visitor::assign_constant_locations()
 
    stage_prog_data->nr_pull_params = num_pull_constants;
    stage_prog_data->nr_params = 0;
+   stage_prog_data->nr_ubo_params = ubo_uniforms;
 
    unsigned const_reg_access[uniforms];
    memset(const_reg_access, 0, sizeof(const_reg_access));
@@ -1899,6 +1900,20 @@ fs_visitor::assign_constant_locations()
       stage_prog_data->gather_table[p].channel_mask =
          const_reg_access[i];
    }
+
+   for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
+      int p = stage_prog_data->nr_gather_table++;
+      stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
+      stage_prog_data->gather_table[p].channel_mask =
+         this->ubo_gather_table[i].channel_mask;
+      stage_prog_data->gather_table[p].const_block =
+         this->ubo_gather_table[i].const_block;
+      stage_prog_data->gather_table[p].const_offset =
+         this->ubo_gather_table[i].const_offset;
+      stage_prog_data->max_ubo_const_block =
+         MAX2(stage_prog_data->max_ubo_const_block,
+              this->ubo_gather_table[i].const_block);
+   }
 }
 
 /**
@@ -5171,6 +5186,7 @@ brw_wm_fs_emit(struct brw_context *brw,
    fs_visitor v(brw->intelScreen->compiler, brw,
                 mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
                 prog, &fp->Base, 8, st_index8);
+   v.use_gather_constants = brw->fs_ubo_gather && brw->use_resource_streamer;
    if (!v.run_fs(false /* do_rep_send */)) {
       if (prog) {
          prog->LinkStatus = false;
@@ -5187,6 +5203,7 @@ brw_wm_fs_emit(struct brw_context *brw,
    fs_visitor v2(brw->intelScreen->compiler, brw,
                  mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
                  prog, &fp->Base, 16, st_index16);
+   v2.use_gather_constants = v.use_gather_constants;
    if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
       if (!v.simd16_unsupported) {
          /* Try a SIMD16 compile */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index dd0526a..ded007a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -293,6 +293,9 @@ public:
                                          unsigned n);
 
    int implied_mrf_writes(fs_inst *inst);
+   bool nir_generate_ubo_gather_table(const brw::fs_builder &bld,
+                                      nir_intrinsic_instr *instr, fs_reg &dest,
+                                      bool has_indirect);
 
    virtual void dump_instructions();
    virtual void dump_instructions(const char *name);
@@ -316,6 +319,9 @@ public:
    /** Number of uniform variable components visited. */
    unsigned uniforms;
 
+   /** Number of ubo uniform variable components visited. */
+   unsigned ubo_uniforms;
+
    /** Byte-offset for the next available spot in the scratch space buffer. */
    unsigned last_scratch;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a6c6a2f..9a50b99 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1488,6 +1488,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       has_indirect = true;
       /* fallthrough */
    case nir_intrinsic_load_ubo: {
+      if (nir_generate_ubo_gather_table(bld, instr, dest, has_indirect))
+         break;
+
       nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
       fs_reg surf_index;
 
@@ -1874,3 +1877,68 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr)
       unreachable("unknown jump");
    }
 }
+
+bool
+fs_visitor::nir_generate_ubo_gather_table(const brw::fs_builder &bld,
+                                          nir_intrinsic_instr *instr,
+                                          fs_reg &dest,
+                                          bool has_indirect)
+{
+   const nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
+
+   if (!const_index || has_indirect || !use_gather_constants)
+      return false;
+
+   /* Only allow 16 registers (128 uniform components) as push constants.
+    */
+   static const unsigned max_push_components = 16 * 8;
+   const unsigned param_index = uniforms + ubo_uniforms;
+   if ((param_index + instr->num_components) > max_push_components)
+      return false;
+
+   /* Only SIMD8 gets to assign push constant register locations and generate
+    * the gather table masks.
+    */
+   bool generate_gather_table = (dispatch_width == 8);
+
+   fs_reg uniform_reg;
+   if (!generate_gather_table) {
+      for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) {
+         if ((this->ubo_gather_table[i].const_block ==
+              const_index->u[0]) &&
+             (this->ubo_gather_table[i].const_offset ==
+              (unsigned) instr->const_index[0])) {
+            uniform_reg = fs_reg(UNIFORM, this->ubo_gather_table[i].reg);
+            break;
+         }
+      }
+      assert(uniform_reg.file == UNIFORM);
+   }
+
+   int gather = -1;
+   if (generate_gather_table) {
+      uniform_reg = fs_reg(UNIFORM, 0);
+      gather = this->nr_ubo_gather_table++;
+
+      assert(instr->num_components <= 4);
+      this->ubo_gather_table[gather].reg = uniform_reg.reg;
+      this->ubo_gather_table[gather].const_block = const_index->u[0];
+      this->ubo_gather_table[gather].const_offset = instr->const_index[0];
+   }
+
+   ubo_uniforms += instr->num_components;
+   for (unsigned j = 0; j < instr->num_components; j++) {
+      fs_reg src = offset(retype(uniform_reg, dest.type), bld,
+                          j + param_index);
+      bld.MOV(dest, src);
+      dest = offset(dest, bld, 1);
+
+      if (gather != -1) {
+         unsigned mask = ((instr->const_index[0] % 16) == 0) ?
+            1 << j : 1 << (((instr->const_index[0] % 16) / 4) + j);
+         this->ubo_gather_table[gather].channel_mask |= mask;
+      }
+   }
+
+   return true;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5cb794b..59eb122 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1109,6 +1109,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
    this->regs_live_at_ip = NULL;
 
    this->uniforms = 0;
+   this->ubo_uniforms = 0;
    this->last_scratch = 0;
    this->pull_constant_loc = NULL;
    this->push_constant_loc = NULL;
@@ -1116,8 +1117,11 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
    this->spilled_any_registers = false;
    this->do_dual_src = false;
 
-   if (dispatch_width == 8)
+   if (dispatch_width == 8) {
       this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
+      this->ubo_gather_table = rzalloc_array(mem_ctx, backend_shader::gather_table,
+                                             stage_prog_data->nr_params);
+   }
 }
 
 fs_visitor::~fs_visitor()
-- 
1.9.1



More information about the mesa-dev mailing list