[Mesa-dev] [PATCH v2 1/3] i965: Introduce a MOV_INDIRECT opcode.

Kenneth Graunke kenneth at whitecape.org
Wed Nov 11 12:02:01 PST 2015


The geometry and tessellation control shader stages both read from
multiple URB entries (one per vertex).  The thread payload contains
several URB handles which reference these separate memory segments.

In GLSL, these inputs are represented as per-vertex arrays; the
outermost array index selects which vertex's inputs to read.  This
array index does not necessarily need to be constant.

To handle that, we need to use indirect addressing on GRFs to select
which of the thread payload registers has the appropriate URB handle.
(This is before we can even think about applying the pull model!)

This patch introduces a new opcode which performs a MOV from a
source using VxH indirect addressing (which allows each of the 8
SIMD channels to select distinct data.)

Based on a patch by Jason Ekstrand.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h        |  9 +++++++
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 24 ++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h             |  5 ++++
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp       |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 34 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp       |  2 ++
 6 files changed, 75 insertions(+)

Jason,

I omitted a few things from your patch:

 - UNIFORM handling in regs_read() - didn't want to ship it without testing it
   So you'll need to add this back in when you start using it.
 - Gen7 base_offset munging - on IRC you suggested dropping imm_offset, so it
   didn't appear to actually do anything.  Feel free to put it back in if you
   need it, or switch your code to not use base_offset.

But we're using regs_read() now rather than RA hackery, and using a register
with a HW_REG file instead of an IMM offset, so that should be a lot more
reusable.

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 99a3a2d..6e1cfed 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1268,6 +1268,15 @@ enum opcode {
     * Calculate the high 32-bits of a 32x32 multiply.
     */
    SHADER_OPCODE_MULH,
+
+   /**
+    * A MOV that uses VxH indirect addressing.
+    *
+    * Source 0: A register to start from (HW_REG).
+    * Source 1: An indirect offset (in bytes, UD GRF).
+    * Source 2: The maximum value of the indirect offset (in bytes, UD IMM).
+    */
+   SHADER_OPCODE_MOV_INDIRECT,
 };
 
 enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index be712e5..bf8a4a6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -841,6 +841,30 @@ fs_inst::regs_read(int arg) const
    case SHADER_OPCODE_BARRIER:
       return 1;
 
+   case SHADER_OPCODE_MOV_INDIRECT:
+      if (arg == 0) {
+         assert(src[2].file == IMM);
+         unsigned max_indirect = src[2].fixed_hw_reg.dw1.ud;
+
+         if (src[0].file == HW_REG) {
+            assert(src[0].file == HW_REG);
+
+            unsigned regs = DIV_ROUND_UP(max_indirect, REG_SIZE);
+
+            /* If the start of the region is not register aligned, then
+             * we only read part of the register at the beginning, but
+             * overlap into another register at the end.  So add 1.
+             */
+            if (src[0].fixed_hw_reg.subnr)
+               regs++;
+
+            return regs;
+         } else {
+            assert(!"Invalid register file");
+         }
+      }
+      break;
+
    default:
       if (is_tex() && arg == 0 && src[0].file == GRF)
          return mlen;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8a93b56..c599cde 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -526,6 +526,11 @@ private:
                                  struct brw_reg offset,
                                  struct brw_reg value);
 
+   void generate_mov_indirect(fs_inst *inst,
+                              struct brw_reg dst,
+                              struct brw_reg reg,
+                              struct brw_reg indirect_byte_offset);
+
    bool patch_discard_jumps_to_fb_writes();
 
    const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 3a28c8d..ffb5f87 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
    case FS_OPCODE_LINTERP:
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
    case SHADER_OPCODE_BROADCAST:
+   case SHADER_OPCODE_MOV_INDIRECT:
       return true;
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 974219f..97a85bb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -368,6 +368,36 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
 }
 
 void
+fs_generator::generate_mov_indirect(fs_inst *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg reg,
+                                    struct brw_reg indirect_byte_offset)
+{
+   assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
+   assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
+
+   unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
+
+   /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+   struct brw_reg addr = vec8(brw_address_reg(0));
+
+   /* The destination stride of an instruction (in bytes) must be greater
+    * than or equal to the size of the rest of the instruction.  Since the
+    * address register is of type UW, we can't use a D-type instruction.
+    * In order to get around this, re re-type to UW and use a stride.
+    */
+   indirect_byte_offset =
+      retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+   /* Prior to Broadwell, there are only 8 address registers. */
+   assert(inst->exec_size == 8 || devinfo->gen >= 8);
+
+   brw_MOV(p, addr, indirect_byte_offset);
+   brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
+   brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+}
+
+void
 fs_generator::generate_urb_read(fs_inst *inst,
                                 struct brw_reg dst,
                                 struct brw_reg header)
@@ -2070,6 +2100,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          fill_count++;
 	 break;
 
+      case SHADER_OPCODE_MOV_INDIRECT:
+         generate_mov_indirect(inst, dst, src[0], src[1]);
+         break;
+
       case SHADER_OPCODE_URB_READ_SIMD8:
          generate_urb_read(inst, dst, src[0]);
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index a6b57c3..9550a62 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -552,6 +552,8 @@ brw_instruction_name(enum opcode op)
       return "barrier";
    case SHADER_OPCODE_MULH:
       return "mulh";
+   case SHADER_OPCODE_MOV_INDIRECT:
+      return "mov_indirect";
    }
 
    unreachable("not reached");
-- 
2.6.2



More information about the mesa-dev mailing list