[Mesa-dev] [PATCH 11/11] i965: Use pushed UBO data in the scalar backend.

Kenneth Graunke kenneth at whitecape.org
Fri Jul 7 00:22:20 UTC 2017


This actually takes advantage of the newly pushed UBO data, avoiding
pull loads.

XXX: quote performance numbers
---
 src/intel/compiler/brw_fs.cpp     | 35 ++++++++++++++++++++++++++++++++++-
 src/intel/compiler/brw_fs.h       |  2 ++
 src/intel/compiler/brw_fs_nir.cpp | 28 ++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 49e714f1c1f..68648eda64d 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1386,7 +1386,9 @@ fs_visitor::assign_curb_setup()
    unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
 
    unsigned ubo_push_length = 0;
+   unsigned ubo_push_start[4];
    for (int i = 0; i < 4; i++) {
+      ubo_push_start[i] = 8 * (ubo_push_length + uniform_push_length);
       ubo_push_length += stage_prog_data->ubo_ranges[i].length;
    }
 
@@ -1398,7 +1400,11 @@ fs_visitor::assign_curb_setup()
 	 if (inst->src[i].file == UNIFORM) {
             int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4;
             int constant_nr;
-            if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
+            if (inst->src[i].nr >= UBO_START) {
+               /* constant_nr is in 32-bit units, the rest are in bytes */
+               constant_nr = ubo_push_start[inst->src[i].nr - UBO_START] +
+                             inst->src[i].offset / 4;
+            } else if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
                constant_nr = push_constant_loc[uniform_nr];
             } else {
                /* Section 5.11 of the OpenGL 4.1 spec says:
@@ -2069,6 +2075,20 @@ fs_visitor::assign_constant_locations()
    stage_prog_data->nr_params = num_push_constants;
    stage_prog_data->nr_pull_params = num_pull_constants;
 
+   /* Now that we know how many regular uniforms we'll push, reduce the
+    * UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.
+    */
+   unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
+   for (int i = 0; i < 4; i++) {
+      struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
+
+      if (push_length + range->length > 64)
+         range->length = 64 - push_length;
+
+      push_length += range->length;
+   }
+   assert(push_length <= 64);
+
    /* Up until now, the param[] array has been indexed by reg + offset
     * of UNIFORM registers.  Move pull constants into pull_param[] and
     * condense param[] to only contain the uniforms we chose to push.
@@ -2103,6 +2123,19 @@ fs_visitor::get_pull_locs(const fs_reg &src,
 {
    assert(src.file == UNIFORM);
 
+   if (src.nr >= UBO_START) {
+      const struct brw_ubo_range *range =
+         &prog_data->ubo_ranges[src.nr - UBO_START];
+
+      /* If this access is in our (reduced) range, use the push data. */
+      if (src.offset / 32 < range->length && !getenv("PULL"))
+         return false;
+
+      *out_surf_index = prog_data->binding_table.ubo_start + range->block;
+      *out_pull_index = (32 * range->start + src.offset) / 4;
+      return true;
+   }
+
    const unsigned location = src.nr + src.offset / 4;
 
    if (location < uniforms && pull_constant_loc[location] != -1) {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index beb0d971b4c..f1ba193de7e 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -50,6 +50,8 @@ offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
    return offset(reg, bld.dispatch_width(), delta);
 }
 
+#define UBO_START ((1 << 16) - 4)
+
 /**
  * The fragment shader front-end.
  *
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index a9dce42c38d..34e8cd1427d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3822,6 +3822,34 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           * and we have to split it if necessary.
           */
          const unsigned type_size = type_sz(dest.type);
+
+         /* See if we've selected this as a push constant candidate */
+         if (const_index) {
+            const unsigned ubo_block = const_index->u32[0];
+            const unsigned offset_256b = const_offset->u32[0] / 32;
+
+            fs_reg push_reg;
+            for (int i = 0; i < 4; i++) {
+               const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
+               if (range->block == ubo_block &&
+                   offset_256b >= range->start &&
+                   offset_256b < range->start + range->length) {
+
+                  push_reg = fs_reg(UNIFORM, UBO_START + i, dest.type);
+                  push_reg.offset = const_offset->u32[0] - 32 * range->start;
+                  break;
+               }
+            }
+
+            if (push_reg.file != BAD_FILE) {
+               for (unsigned i = 0; i < instr->num_components; i++) {
+                  bld.MOV(offset(dest, bld, i),
+                          byte_offset(push_reg, i * type_size));
+               }
+               break;
+            }
+         }
+
          const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
          const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0);
          const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);
-- 
2.13.2



More information about the mesa-dev mailing list