Mesa (master): i965/fs: Use MOV_INDIRECT for all indirect uniform loads

Jason Ekstrand jekstrand at kemper.freedesktop.org
Thu Apr 14 23:00:41 UTC 2016


Module: Mesa
Branch: master
Commit: 3c93cdfaf598bc3c28e3dc288da35675c666602b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c93cdfaf598bc3c28e3dc288da35675c666602b

Author: Jason Ekstrand <jason.ekstrand at intel.com>
Date:   Tue Nov 24 15:12:20 2015 -0800

i965/fs: Use MOV_INDIRECT for all indirect uniform loads

Instead of using reladdr, this commit changes the FS backend to emit a
MOV_INDIRECT whenever we need an indirect uniform load.  We also have to
rework some of the other bits of the backend to handle this new form of
uniform load.  The obvious change is that demote_pull_constants now acts
more like a lowering pass when it hits a MOV_INDIRECT.

Reviewed-by: Kristian Høgsberg <krh at bitplanet.net>
Acked-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_fs.cpp     | 72 +++++++++++++++++++-------------
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 55 +++++++++++++++++++-----
 2 files changed, 87 insertions(+), 40 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0a2d769..38c6e96 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1963,8 +1963,8 @@ fs_visitor::assign_constant_locations()
          if (inst->src[i].file != UNIFORM)
             continue;
 
-         if (inst->src[i].reladdr) {
-            int uniform = inst->src[i].nr;
+         if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
+            int uniform = inst->src[0].nr;
 
             /* If this array isn't already present in the pull constant buffer,
              * add it.
@@ -2046,49 +2046,63 @@ fs_visitor::assign_constant_locations()
 void
 fs_visitor::demote_pull_constants()
 {
-   foreach_block_and_inst (block, fs_inst, inst, cfg) {
+   const unsigned index = stage_prog_data->binding_table.pull_constants_start;
+
+   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+      /* Set up the annotation tracking for new generated instructions. */
+      const fs_builder ibld(this, block, inst);
+
       for (int i = 0; i < inst->sources; i++) {
 	 if (inst->src[i].file != UNIFORM)
 	    continue;
 
-         int pull_index;
+         /* We'll handle this case later */
+         if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
+            continue;
+
          unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
-         if (location >= uniforms) /* Out of bounds access */
-            pull_index = -1;
-         else
-            pull_index = pull_constant_loc[location];
+         if (location >= uniforms)
+            continue; /* Out of bounds access */
+
+         int pull_index = pull_constant_loc[location];
 
          if (pull_index == -1)
 	    continue;
 
-         /* Set up the annotation tracking for new generated instructions. */
-         const fs_builder ibld(this, block, inst);
-         const unsigned index = stage_prog_data->binding_table.pull_constants_start;
-         fs_reg dst = vgrf(glsl_type::float_type);
-
          assert(inst->src[i].stride == 0);
 
-         /* Generate a pull load into dst. */
-         if (inst->src[i].reladdr) {
-            VARYING_PULL_CONSTANT_LOAD(ibld, dst,
-                                       brw_imm_ud(index),
-                                       *inst->src[i].reladdr,
-                                       pull_index * 4);
-            inst->src[i].reladdr = NULL;
-            inst->src[i].stride = 1;
-         } else {
-            const fs_builder ubld = ibld.exec_all().group(8, 0);
-            struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
-            ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                      dst, brw_imm_ud(index), offset);
-            inst->src[i].set_smear(pull_index & 3);
-         }
-         brw_mark_surface_used(prog_data, index);
+         fs_reg dst = vgrf(glsl_type::float_type);
+         const fs_builder ubld = ibld.exec_all().group(8, 0);
+         struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
+         ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                   dst, brw_imm_ud(index), offset);
 
          /* Rewrite the instruction to use the temporary VGRF. */
          inst->src[i].file = VGRF;
          inst->src[i].nr = dst.nr;
          inst->src[i].reg_offset = 0;
+         inst->src[i].set_smear(pull_index & 3);
+
+         brw_mark_surface_used(prog_data, index);
+      }
+
+      if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
+          inst->src[0].file == UNIFORM) {
+
+         unsigned location = inst->src[0].nr + inst->src[0].reg_offset;
+         if (location >= uniforms)
+            continue; /* Out of bounds access */
+
+         int pull_index = pull_constant_loc[location];
+         assert(pull_index >= 0); /* This had better be pull */
+
+         VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
+                                    brw_imm_ud(index),
+                                    inst->src[1],
+                                    pull_index * 4);
+         inst->remove(block);
+
+         brw_mark_surface_used(prog_data, index);
       }
    }
    invalidate_live_intervals();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 698894b..59b1376 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1266,6 +1266,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
 {
    fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
                 BRW_REGISTER_TYPE_UD);
+   fs_reg indirect;
+   unsigned indirect_max = 0;
 
    for (const nir_deref *tail = &deref->deref; tail->child;
         tail = tail->child) {
@@ -1277,7 +1279,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
       image = offset(image, bld, base * element_size);
 
       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
-         fs_reg tmp = vgrf(glsl_type::int_type);
+         fs_reg tmp = vgrf(glsl_type::uint_type);
 
          if (devinfo->gen == 7 && !devinfo->is_haswell) {
             /* IVB hangs when trying to access an invalid surface index with
@@ -1295,15 +1297,31 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
             bld.MOV(tmp, get_nir_src(deref_array->indirect));
          }
 
+         indirect_max += element_size * (tail->type->length - 1);
+
          bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
-         if (image.reladdr)
-            bld.ADD(*image.reladdr, *image.reladdr, tmp);
-         else
-            image.reladdr = new(mem_ctx) fs_reg(tmp);
+         if (indirect.file == BAD_FILE) {
+            indirect = tmp;
+         } else {
+            bld.ADD(indirect, indirect, tmp);
+         }
       }
    }
 
-   return image;
+   if (indirect.file == BAD_FILE) {
+      return image;
+   } else {
+      /* Emit a pile of MOVs to load the uniform into a temporary.  The
+       * dead-code elimination pass will get rid of what we don't use.
+       */
+      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
+      for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
+         bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+                  offset(tmp, bld, j), offset(image, bld, j),
+                  indirect, brw_imm_ud((indirect_max + 1) * 4));
+      }
+      return tmp;
+   }
 }
 
 void
@@ -2678,13 +2696,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          /* Offsets are in bytes but they should always be multiples of 4 */
          assert(const_offset->u32[0] % 4 == 0);
          src.reg_offset = const_offset->u32[0] / 4;
+
+         for (unsigned j = 0; j < instr->num_components; j++) {
+            bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+         }
       } else {
-         src.reladdr = new(mem_ctx) fs_reg(retype(get_nir_src(instr->src[0]),
-                                                  BRW_REGISTER_TYPE_UD));
-      }
+         fs_reg indirect = retype(get_nir_src(instr->src[0]),
+                                  BRW_REGISTER_TYPE_UD);
 
-      for (unsigned j = 0; j < instr->num_components; j++) {
-         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+         /* We need to pass a size to the MOV_INDIRECT but we don't want it to
+          * go past the end of the uniform.  In order to keep the n'th
+          * component from running past, we subtract off the size of all but
+          * one component of the vector.
+          */
+         assert(instr->const_index[1] >= instr->num_components * 4);
+         unsigned read_size = instr->const_index[1] -
+                              (instr->num_components - 1) * 4;
+
+         for (unsigned j = 0; j < instr->num_components; j++) {
+            bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+                     offset(dest, bld, j), offset(src, bld, j),
+                     indirect, brw_imm_ud(read_size));
+         }
       }
       break;
    }




More information about the mesa-commit mailing list