[Mesa-dev] [PATCH 19/21] i965/fs: Add an opcode for loading indirect push constants

Jason Ekstrand jason at jlekstrand.net
Wed Aug 19 22:45:54 PDT 2015


This commit adds an FS_OPCODE_PUSH_CONSTANT_LOAD opcode which allows you to
load an indirect push constant.  The first argument to the function is a
non-indirect uniform, the second is the indirect, and the third is an
immediate value that provides a bound on the indirect.  This way we can
provide accurate regs_read() information to optimization passes and things
that need to think about interference.
---
 src/mesa/drivers/dri/i965/brw_defines.h        | 17 +++++++
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 23 +++++++++
 src/mesa/drivers/dri/i965/brw_fs.h             |  4 ++
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp       |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 70 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp       |  2 +
 6 files changed, 117 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 82a3635..f7f0a2e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1029,6 +1029,23 @@ enum opcode {
    FS_OPCODE_LINTERP,
    FS_OPCODE_PIXEL_X,
    FS_OPCODE_PIXEL_Y,
+
+   /**
+    * Loads a uniform push constant with an indirect.  This opcode takes four
+    * arguments:
+    *
+    *  0) The uniform register to load only without a NULL reladdr
+    *  1) An immediate base offset (in bytes)
+    *  2) A register indirect offset (in bytes)
+    *  3) The immediate value representing the maximum possible total offset.
+    *
+    * The base offset and indirect offset are added together to get a the
+    * total offset which is then added to the starting address of the register
+    * in src0.  The reason for the multiplicity of arguments is so that the
+    * range [reg, reg + regs_read()) is an accurate representation of all of
+    * the values that could be read by the instruction.
+    */
+   FS_OPCODE_PUSH_CONSTANT_LOAD,
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3d55dc8..60c9a0f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -796,6 +796,25 @@ fs_inst::regs_read(int arg) const
    case CS_OPCODE_CS_TERMINATE:
       return 1;
 
+   case FS_OPCODE_PUSH_CONSTANT_LOAD:
+      if (arg == 0) {
+         assert(src[3].file == IMM);
+         unsigned max_indirect = src[3].fixed_hw_reg.dw1.ud;
+
+         if (src[0].file == UNIFORM) {
+            return (max_indirect / 4) + 1;
+         } else {
+            /* This is the case after assign_curb_setup() */
+            assert(src[0].file == HW_REG);
+
+            struct brw_reg reg = src[0].fixed_hw_reg;
+            unsigned base_offset = reg.nr * REG_SIZE + reg.subnr;
+            unsigned max_offset = base_offset + max_indirect;
+            return (max_offset / REG_SIZE) - (base_offset / REG_SIZE) + 1;
+         }
+      }
+      break;
+
    default:
       if (is_tex() && arg == 0 && src[0].file == GRF)
          return mlen;
@@ -4233,6 +4252,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
    case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
       return 8;
 
+   case FS_OPCODE_PUSH_CONSTANT_LOAD:
+      /* Prior to BDW, we only have 8 address registers */
+      return devinfo->gen < 8 ? 8 : inst->exec_size;
+
    default:
       return inst->exec_size;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 90c9756..6bc434a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -444,6 +444,10 @@ private:
    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
+   void generate_push_constant_load(fs_inst *inst, struct brw_reg dst,
+                                    struct brw_reg reg,
+                                    struct brw_reg base_offset,
+                                    struct brw_reg indirect);
    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
                                             struct brw_reg index,
                                             struct brw_reg offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index c7628dc..cdc6c10 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -71,6 +71,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
    case BRW_OPCODE_PLN:
    case BRW_OPCODE_MAD:
    case BRW_OPCODE_LRP:
+   case FS_OPCODE_PUSH_CONSTANT_LOAD:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c86ca04..956bfb8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1031,6 +1031,72 @@ fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst)
 }
 
 void
+fs_generator::generate_push_constant_load(fs_inst *inst, struct brw_reg dst,
+                                          struct brw_reg reg,
+                                          struct brw_reg base_offset_reg,
+                                          struct brw_reg indirect)
+{
+   assert(base_offset_reg.file == BRW_IMMEDIATE_VALUE);
+   unsigned base_offset = base_offset_reg.dw1.ud;
+
+   /* Add in the register position to get the absolute offset */
+   base_offset += reg.nr * REG_SIZE + reg.subnr;
+
+   assert(indirect.type == BRW_REGISTER_TYPE_D ||
+          indirect.type == BRW_REGISTER_TYPE_UD);
+
+   if (indirect.file == BRW_IMMEDIATE_VALUE) {
+      base_offset += indirect.dw1.d;
+
+      reg.nr = base_offset / REG_SIZE;
+      reg.subnr = base_offset % REG_SIZE;
+      brw_MOV(p, dst, reg);
+   } else {
+      struct brw_reg addr = vec8(brw_address_reg(0));
+
+      /* The destination stride of an instruction (in bytes) must be greater
+       * than or equal to the size of the rest of the instruction.  Since the
+       * address register is of type UW, we can't use a D-type instruction.
+       * In order to get around this, re re-type to UW and use a stride.
+       */
+      indirect = spread(indirect, 2);
+      indirect.type = BRW_REGISTER_TYPE_UW;
+
+      if (devinfo->gen < 8) {
+         /* Prior to Broadwell, there are a couple silly restrictions that
+          * we have to work around.  First, we only have 8 address register
+          * entries so this is SIMD8-only.
+          */
+         assert(inst->exec_size <= 8);
+
+         /* Finally, the bottom 5 bits of the base offset and the bottom 5
+          * bits of the indirect must add to less than 32.  In other words,
+          * the hardware needs to be able to add the bottom five bits of the
+          * two to get the subnumber and add the next 7 bits of each to get
+          * the actual register number.  Since uniforms frequently cross
+          * register boundaries, this makes it almost useless.  We could try
+          * and do something clever where we use a actual base offset if
+          * base_offset % 32 == 0 but that would mean we were generating
+          * different code depending on the base offset.  Instead, for the
+          * sake of consistency, we'll just do the add ourselves.
+          */
+         brw_ADD(p, addr, indirect, brw_imm_uw(base_offset));
+         base_offset = 0;
+      } else {
+         /* On Broadwell and above, we have 16 address registers and
+          * everything seems to "just work".
+          */
+         brw_MOV(p, addr, indirect);
+      }
+
+      /* Get a VxH indirect for a0.0. */
+      struct brw_reg src = brw_VxH_indirect(0, base_offset);
+
+      brw_MOV(p, dst, retype(src, dst.type));
+   }
+}
+
+void
 fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
                                                   struct brw_reg dst,
                                                   struct brw_reg index,
@@ -1951,6 +2017,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 	 generate_urb_write(inst, src[0]);
 	 break;
 
+      case FS_OPCODE_PUSH_CONSTANT_LOAD:
+	 generate_push_constant_load(inst, dst, src[0], src[1], src[2]);
+	 break;
+
       case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
 	 generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index a7453fa..fdbcca5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -695,6 +695,8 @@ brw_instruction_name(enum opcode op)
    case FS_OPCODE_PIXEL_Y:
       return "pixel_y";
 
+   case FS_OPCODE_PUSH_CONSTANT_LOAD:
+      return "push_const";
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return "uniform_pull_const";
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
-- 
2.4.3



More information about the mesa-dev mailing list