<div dir="ltr">On 15 September 2013 00:19, Francisco Jerez <span dir="ltr"><<a href="mailto:currojerez@riseup.net" target="_blank">currojerez@riseup.net</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">

<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">This can deal with all the 15 32-bit untyped atomic operations the<br>
hardware supports, but only INC and PREDEC are going to be exposed<br>
through the API for now.<br>
---<br>
 src/mesa/drivers/dri/i965/brw_fs.h           |  7 +++<br>
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 83 ++++++++++++++++++++++++++++<br>
 2 files changed, 90 insertions(+)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
index dcd489c..44930f7 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
@@ -392,6 +392,13 @@ public:<br>
    void emit_shader_time_write(enum shader_time_shader_type type,<br>
                                fs_reg value);<br>
<br>
+   void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,<br>
+                            unsigned offset, fs_reg dst, fs_reg src0,<br>
+                            fs_reg src1);<br>
+<br>
+   void emit_untyped_surface_read(unsigned surf_index, unsigned offset,<br>
+                                  fs_reg dst);<br>
+<br>
    bool try_rewrite_rhs_to_dst(ir_assignment *ir,<br>
                               fs_reg dst,<br>
                               fs_reg src,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
index 762832a..412d27a 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
@@ -2112,8 +2112,91 @@ fs_visitor::visit(ir_end_primitive *)<br>
 }<br>
<br>
 void<br>
+fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,<br>
+                                unsigned offset, fs_reg dst, fs_reg src0,<br>
+                                fs_reg src1)<br>
+{<br>
+   const unsigned operand_len = dispatch_width / 8;<br>
+   unsigned mlen = 0;<br>
+<br>
+   /* Initialize the sample mask in the message header. */<br>
+   emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))<br>
+      ->force_writemask_all = true;<br>
+   emit(MOV(brw_uvec_mrf(1, mlen, 7),<br>
+            retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))<br>
+      ->force_writemask_all = true;<br></blockquote><div><br></div><div>For fragment shaders that don't use discard (fp->UsesKill == false) this will do the right thing.<br><br></div><div>For fragment shaders that do use discard, we store the current pixel mask in the f0.1 register, so you'll need to do something like this:<br>

<br></div><div>emit(MOV(brw_uvec_mrf(1, mlen, 7), brw_flag_reg(0, 1))->force_writemask_all = true;<br><br></div><div>Otherwise pixels that have been discarded will erroneously get the atomic operation applied to them.<br>

<br></div><div>Note: if all the pixels within a 2x2 subspan get discarded, we disable that subspan in the execution mask.  So in order to test this effectively, you'll probably need to write a piglit test that discards only some of the pixels within a subspan, and not others.<br>

 <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+   mlen++;<br>
+<br>
+   /* Set the atomic operation offset. */<br>
+   emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), brw_imm_ud(offset)));<br>
+   mlen += operand_len;<br>
+<br>
+   /* Set the atomic operation arguments. */<br>
+   if (src0.file != BAD_FILE) {<br>
+      emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src0));<br>
+      mlen += operand_len;<br>
+   }<br>
+<br>
+   if (src1.file != BAD_FILE) {<br>
+      emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), src1));<br>
+      mlen += operand_len;<br>
+   }<br></blockquote><div><br></div><div>src0 is address and src1 is write data, right?  It would be nice to have that in a comment so that readers don't have to cross reference to the bspec.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">


+<br>
+   /* Emit the instruction. */<br>
+   fs_inst inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst,<br>
+                fs_reg(atomic_op), fs_reg(surf_index));<br>
+   inst.base_mrf = 0;<br>
+   inst.mlen = mlen;<br>
+   emit(inst);<br>
+}<br>
+<br>
+void<br>
+fs_visitor::emit_untyped_surface_read(unsigned surf_index, unsigned offset,<br>
+                                      fs_reg dst)<br>
+{<br>
+   const unsigned operand_len = dispatch_width / 8;<br>
+   unsigned mlen = 0;<br>
+<br>
+   /* Initialize the sample mask in the message header. */<br>
+   emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))<br>
+      ->force_writemask_all = true;<br>
+   emit(MOV(brw_uvec_mrf(1, mlen, 7),<br>
+            retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))<br>
+      ->force_writemask_all = true;<br></blockquote><div><br></div><div>Same comment about discard applies here, although it's less critical because this is a read operation (I suspect the only effect will be a tiny performance penalty).<br>

</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+   mlen++;<br>
+<br>
+   /* Set the surface read offset. */<br>
+   emit(MOV(brw_uvec_mrf(dispatch_width, mlen, 0), brw_imm_ud(offset)));<br>
+   mlen += operand_len;<br>
+<br>
+   /* Emit the instruction. */<br>
+   fs_inst inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, fs_reg(surf_index));<br>
+   inst.base_mrf = 0;<br>
+   inst.mlen = mlen;<br>
+   emit(inst);<br>
+}<br>
+<br>
+void<br>
 fs_visitor::visit(ir_atomic *ir)<br>
 {<br>
+   ir_variable *loc = ir->location->variable_referenced();<br>
+   unsigned surf_index = SURF_INDEX_WM_ABO(loc->atomic.buffer_index);<br>
+<br>
+   result = fs_reg(this, ir->type);<br>
+<br>
+   switch (ir->op) {<br>
+   case ir_atomic_read:<br>
+      emit_untyped_surface_read(surf_index, loc->atomic.offset, result);<br>
+      break;<br>
+   case ir_atomic_inc:<br>
+      emit_untyped_atomic(BRW_AOP_INC, surf_index, loc->atomic.offset,<br>
+                          result, fs_reg(), fs_reg());<br>
+      break;<br>
+   case ir_atomic_dec:<br>
+      emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, loc->atomic.offset,<br>
+                          result, fs_reg(), fs_reg());<br></blockquote><div><br></div><div>These calls to fs_reg() don't look right to me.  Don't we need to pass the address and the increment/decrement amount to emit_untyped_atomic()?<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+      break;<br>
+   }<br>
 }<br>
<br>
 fs_inst *<br>
<span><font color="#888888">--<br>
1.8.3.4<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>