<div dir="ltr">On 20 August 2013 11:30, Paul Berry <span dir="ltr"><<a href="mailto:stereotype441@gmail.com" target="_blank">stereotype441@gmail.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">---<br>
 src/mesa/drivers/dri/i965/brw_defines.h     |  9 +++++++++<br>
 src/mesa/drivers/dri/i965/brw_eu.h          |  6 ++++++<br>
 src/mesa/drivers/dri/i965/brw_eu_emit.c     |  4 ++--<br>
 src/mesa/drivers/dri/i965/brw_shader.cpp    |  5 ++++-<br>
 src/mesa/drivers/dri/i965/brw_vec4.cpp      |  2 ++<br>
 src/mesa/drivers/dri/i965/brw_vec4.h        |  3 ++-<br>
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 +++++++++++++++++++++--<br>
 7 files changed, 46 insertions(+), 6 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h<br>
index 2ab0a2b..16a1dbc 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_defines.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_defines.h<br>
@@ -799,6 +799,15 @@ enum opcode {<br>
    VS_OPCODE_PULL_CONSTANT_LOAD,<br>
    VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,<br>
    VS_OPCODE_UNPACK_FLAGS_SIMD4X2,<br>
+<br>
+   /**<br>
+    * Write geometry shader output data to the URB.<br>
+    *<br>
+    * Unlike VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move from<br>
+    * R0 to the first MRF.  This allows the geometry shader to override the<br>
+    * "Slot {0,1} Offset" fields in the message header.<br>
+    */<br>
+   GS_OPCODE_URB_WRITE,<br>
 };<br>
<br>
 #define BRW_PREDICATE_NONE             0<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h<br>
index ae4cab5..9053ea2 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu.h<br>
@@ -252,6 +252,12 @@ enum brw_urb_write_flags {<br>
    BRW_URB_WRITE_COMPLETE = 0x8,<br>
<br>
    /**<br>
+    * Indicates that an additional offset (which may be different for the two<br>
+    * vec4 slots) is stored in the message header (gen == 7).<br>
+    */<br>
+   BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10,<br>
+<br>
+   /**<br>
     * Convenient combination of flags: end the thread while simultaneously<br>
     * marking the given URB entry as complete.<br>
     */<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
index 622b22f..b55b57e 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
@@ -531,8 +531,8 @@ static void brw_set_urb_message( struct brw_compile *p,<br>
       insn->bits3.urb_gen7.offset = offset;<br>
       assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);<br>
       insn->bits3.urb_gen7.swizzle_control = swizzle_control;<br>
-      /* per_slot_offset = 0 makes it ignore offsets in message header */<br>
-      insn->bits3.urb_gen7.per_slot_offset = 0;<br>
+      insn->bits3.urb_gen7.per_slot_offset =<br>
+         flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0;<br>
       insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0;<br>
    } else if (brw->gen >= 5) {<br>
       insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */<br>
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
index afa14c5..d3de6ed 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp<br>
@@ -485,7 +485,7 @@ brw_instruction_name(enum opcode op)<br>
       return "placeholder_halt";<br>
<br>
    case VS_OPCODE_URB_WRITE:<br>
-      return "urb_write";<br>
+      return "vs_urb_write";<br>
    case VS_OPCODE_SCRATCH_READ:<br>
       return "scratch_read";<br>
    case VS_OPCODE_SCRATCH_WRITE:<br>
@@ -497,6 +497,9 @@ brw_instruction_name(enum opcode op)<br>
    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:<br>
       return "unpack_flags_simd4x2";<br>
<br>
+   case GS_OPCODE_URB_WRITE:<br>
+      return "gs_urb_write";<br>
+<br>
    default:<br>
       /* Yes, this leaks.  It's in debug code, it should never occur, and if<br>
        * it does, you should just add the case to the list above.<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp<br>
index abdf3ab..c978396 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp<br>
@@ -259,6 +259,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)<br>
       return 2;<br>
    case VS_OPCODE_SCRATCH_WRITE:<br>
       return 3;<br>
+   case GS_OPCODE_URB_WRITE:<br>
+      return 0;<br>
    case SHADER_OPCODE_SHADER_TIME_ADD:<br>
       return 0;<br>
    case SHADER_OPCODE_TEX:<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
index a398f71..c3e2212 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
@@ -627,7 +627,8 @@ private:<br>
                     struct brw_reg dst,<br>
                     struct brw_reg src);<br>
<br>
-   void generate_urb_write(vec4_instruction *inst);<br>
+   void generate_vs_urb_write(vec4_instruction *inst);<br>
+   void generate_gs_urb_write(vec4_instruction *inst);<br>
    void generate_oword_dual_block_offsets(struct brw_reg m1,<br>
                                          struct brw_reg index);<br>
    void generate_scratch_write(vec4_instruction *inst,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
index 89831de..681dbdd 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp<br>
@@ -399,7 +399,7 @@ vec4_generator::generate_tex(vec4_instruction *inst,<br>
 }<br>
<br>
 void<br>
-vec4_generator::generate_urb_write(vec4_instruction *inst)<br>
+vec4_generator::generate_vs_urb_write(vec4_instruction *inst)<br>
 {<br>
    brw_urb_WRITE(p,<br>
                 brw_null_reg(), /* dest */<br>
@@ -413,6 +413,21 @@ vec4_generator::generate_urb_write(vec4_instruction *inst)<br>
 }<br>
<br>
 void<br>
+vec4_generator::generate_gs_urb_write(vec4_instruction *inst)<br>
+{<br>
+   struct brw_reg src = brw_message_reg(inst->base_mrf);<br>
+   brw_urb_WRITE(p,<br>
+                brw_null_reg(), /* dest */<br>
+                inst->base_mrf, /* starting mrf reg nr */<br>
+                src,<br>
+                 inst->urb_write_flags,<br>
+                inst->mlen,<br>
+                0,             /* response len */<br>
+                inst->offset,  /* urb destination offset */<br>
+                BRW_URB_SWIZZLE_INTERLEAVE);<br>
+}<br></blockquote><div><br>From our in-person code review yesterday:<br><br></div><div>There are space vs. tab issues in the code above.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+<br>
+void<br>
 vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,<br>
                                                   struct brw_reg index)<br>
 {<br>
@@ -861,7 +876,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,<br>
       break;<br>
<br>
    case VS_OPCODE_URB_WRITE:<br>
-      generate_urb_write(inst);<br>
+      generate_vs_urb_write(inst);<br>
       break;<br>
<br>
    case VS_OPCODE_SCRATCH_READ:<br>
@@ -880,6 +895,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,<br>
       generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);<br>
       break;<br>
<br>
+   case GS_OPCODE_URB_WRITE:<br>
+      generate_gs_urb_write(inst);<br>
+      break;<br>
+<br>
    case SHADER_OPCODE_SHADER_TIME_ADD:<br>
       brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);<br>
       mark_surface_used(SURF_INDEX_VS_SHADER_TIME);<br>
<span class=""><font color="#888888">--<br>
1.8.3.4<br>
<br>
</font></span></blockquote></div><br></div></div>