[Mesa-dev] [PATCH 2/4] i965: Introduce a INDIRECT_THREAD_PAYLOAD_MOV opcode.

Kenneth Graunke kenneth at whitecape.org
Sat Nov 7 21:03:58 PST 2015


The geometry and tessellation control shader stages both read from
multiple URB entries (one per vertex).  The thread payload contains
several URB handles which reference these separate memory segments.

In GLSL, these inputs are represented as per-vertex arrays; the
outermost array index selects which vertex's inputs to read.  This
array index does not necessarily need to be constant.

To handle that, we need to use indirect addressing on GRFs to select
which of the thread payload registers has the appropriate URB handle.
(This is before we can even think about applying the pull model!)

This patch introduces a new opcode which performs a MOV from a
source using VxH indirect addressing (which allows each of the 8
SIMD channels to select distinct data.)  It also marks a whole
segment of the payload as "used", so the register allocator recognizes
the read and avoids reusing those registers.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h           | 11 ++++++++
 src/mesa/drivers/dri/i965/brw_fs.h                |  4 +++
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp          |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp    | 32 +++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 10 +++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp          |  2 ++
 6 files changed, 60 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 6433cff..288d8b2 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1264,6 +1264,17 @@ enum opcode {
     * Calculate the high 32-bits of a 32x32 multiply.
     */
    SHADER_OPCODE_MULH,
+
+   /**
+    * A SIMD8 VxH indirect addressed MOV from the thread payload.
+    *
+    * This can be used to select GS or TCS input URB handles.
+    *
+    * Source 0: Immediate offset in bytes (UD immediate).
+    * Source 1: Indirect offset in bytes (UD GRF).
+    * Source 2: Number of registers that could be indirectly addressed.
+    */
+   SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV,
 };
 
 enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8a93b56..fb70f0c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -526,6 +526,10 @@ private:
                                  struct brw_reg offset,
                                  struct brw_reg value);
 
+   void generate_indirect_thread_payload_mov(struct brw_reg dst,
+                                             struct brw_reg imm_byte_offset,
+                                             struct brw_reg indirect_byte_offset);
+
    bool patch_discard_jumps_to_fb_writes();
 
    const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 3a28c8d..699baab 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
    case FS_OPCODE_LINTERP:
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
    case SHADER_OPCODE_BROADCAST:
+   case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV:
       return true;
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e207a77..7d51c0e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -368,6 +368,33 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
 }
 
 void
+fs_generator::generate_indirect_thread_payload_mov(struct brw_reg dst,
+                                                   struct brw_reg imm_byte_offset_reg,
+                                                   struct brw_reg indirect_byte_offset)
+{
+   assert(imm_byte_offset_reg.type == BRW_REGISTER_TYPE_UD);
+   assert(imm_byte_offset_reg.file == BRW_IMMEDIATE_VALUE);
+   assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
+   assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
+   unsigned imm_byte_offset = imm_byte_offset_reg.dw1.ud;
+
+   /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+   struct brw_reg addr = vec8(brw_address_reg(0));
+
+   /* The destination stride of an instruction (in bytes) must be greater
+    * than or equal to the size of the rest of the instruction.  Since the
+    * address register is of type UW, we can't use a D-type instruction.
+    * In order to get around this, re re-type to UW and use a stride.
+    */
+   indirect_byte_offset =
+      retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+   brw_MOV(p, addr, indirect_byte_offset);
+   brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
+   brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+}
+
+void
 fs_generator::generate_urb_read(fs_inst *inst,
                                 struct brw_reg dst,
                                 struct brw_reg header)
@@ -2085,6 +2112,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          fill_count++;
 	 break;
 
+      case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV:
+         assert(inst->exec_size == 8);
+         generate_indirect_thread_payload_mov(dst, src[0], src[1]);
+         break;
+
       case SHADER_OPCODE_URB_READ_SIMD8:
          generate_urb_read(inst, dst, src[0]);
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 9251d95..648a0f8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -389,6 +389,16 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
       case CS_OPCODE_CS_TERMINATE:
          payload_last_use_ip[0] = use_ip;
          break;
+      case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV: {
+         assert(inst->src[0].file == IMM && inst->src[2].file == IMM);
+         int first_reg = inst->src[0].fixed_hw_reg.dw1.ud / REG_SIZE;
+         int num_regs = inst->src[2].fixed_hw_reg.dw1.ud;
+
+         for (int i = 0; i < num_regs; i++) {
+            payload_last_use_ip[first_reg + i] = use_ip;
+         }
+         break;
+      }
 
       default:
          if (inst->eot) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 4ea297a..5e407e9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -546,6 +546,8 @@ brw_instruction_name(enum opcode op)
       return "barrier";
    case SHADER_OPCODE_MULH:
       return "mulh";
+   case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV:
+      return "indirect_thread_payload_mov";
    }
 
    unreachable("not reached");
-- 
2.6.2



More information about the mesa-dev mailing list