<div dir="ltr">On 21 October 2013 17:48, Eric Anholt <span dir="ltr"><<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

Things blew up when I enabled the debug register spill code without<br>
disabling 16-wide, so I decided to just fix 16-wide spilling.<br>
<br>
We still don't generate 16-wide when register spilling happens as part of<br>
allocation (since we expect it to be slower), but now we can experiment<br>
with allowing it in some cases in the future.<br></blockquote><div><br></div><div>Also, we'll need the ability to spill in 16-wide when we get to doing compute shaders, because there are certain conditions in compute shaders where SIMD8 isn't an option (basically, if a compute shader uses barriers or shared variables, all the threads within a thread group need to run simultaneously on one subslice; with the maximum possible size thread group (1024), that's only possible if we use SIMD16.  See Graphics BSpec: 3D-Media-GPGPU Engine > Media GPGPU Pipeline > Media GPGPU Pipeline > Programming the GPGPU Pipeline IVB+ > GPGPU Thread Limits [DevHSW+]).<br>

</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
---<br>
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp    |  8 ++++----<br>
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 15 ++++++++-------<br>
 2 files changed, 12 insertions(+), 11 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
index fa15f7b..6c8fb76 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
@@ -718,8 +718,8 @@ fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)<br>
    brw_MOV(p,<br>
           retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),<br>
           retype(src, BRW_REGISTER_TYPE_UD));<br>
-   brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,<br>
-                                inst->offset);<br>
+   brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf),<br>
+                                 inst->mlen, inst->offset);<br>
 }<br>
<br>
 void<br>
@@ -727,8 +727,8 @@ fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)<br>
 {<br>
    assert(inst->mlen != 0);<br>
<br>
-   brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,<br>
-                               inst->offset);<br>
+   brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf),<br>
+                                dispatch_width / 8, inst->offset);<br>
 }<br>
<br>
 void<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
index 7826cd4..ed0ce0d 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
@@ -540,7 +540,7 @@ fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset,<br>
       inst->insert_before(unspill_inst);<br>
<br>
       dst.reg_offset++;<br></blockquote><div><br></div>I had to do a lot of digging to reassure myself that this didn't need to change to "dst.reg_offset += dispatch_width / 8;".  Would you mind clarifying the comment above the declaration of reg_offset in brw_fs.h?  Currently it says:<br>
<br>   /**<br>    * For virtual registers, this is a hardware register offset from<br>    * the start of the register block (for example, a constant index<br>    * in an array access).<br>    */<br><br></div><div class="gmail_quote">
which seems to imply that it's measured in increments of a single 256-bit register.  But it appears that for virtual registers, it's actually in increments of (dispatch_width/8) 256-bit registers.<br></div><div class="gmail_quote">
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
-      spill_offset += REG_SIZE;<br>
+      spill_offset += dispatch_width * sizeof(float);<br>
    }<br>
 }<br>
<br>
@@ -624,10 +624,11 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)<br>
 void<br>
 fs_visitor::spill_reg(int spill_reg)<br>
 {<br>
+   int reg_size = dispatch_width * sizeof(float);<br>
    int size = virtual_grf_sizes[spill_reg];<br>
    unsigned int spill_offset = c->last_scratch;<br>
    assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */<br>
-   c->last_scratch += size * REG_SIZE;<br>
+   c->last_scratch += size * reg_size;<br>
<br>
    /* Generate spill/unspill instructions for the objects being<br>
     * spilled.  Right now, we spill or unspill the whole thing to a<br>
@@ -646,7 +647,7 @@ fs_visitor::spill_reg(int spill_reg)<br>
             inst->src[i].reg_offset = 0;<br>
<br>
             emit_unspill(inst, inst->src[i],<br>
-                         spill_offset + REG_SIZE * inst->src[i].reg_offset,<br>
+                         spill_offset + reg_size * inst->src[i].reg_offset,<br>
                          regs_read);<br>
         }<br>
       }<br>
@@ -654,7 +655,7 @@ fs_visitor::spill_reg(int spill_reg)<br>
       if (inst->dst.file == GRF &&<br>
          inst->dst.reg == spill_reg) {<br>
          int subset_spill_offset = (spill_offset +<br>
-                                    REG_SIZE * inst->dst.reg_offset);<br>
+                                    reg_size * inst->dst.reg_offset);<br>
          inst->dst.reg = virtual_grf_alloc(inst->regs_written);<br>
          inst->dst.reg_offset = 0;<br>
<br>
@@ -677,11 +678,11 @@ fs_visitor::spill_reg(int spill_reg)<br>
            fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,<br>
                                                       reg_null_f, spill_src);<br>
            spill_src.reg_offset++;<br>
-           spill_inst->offset = subset_spill_offset + chan * REG_SIZE;<br>
+           spill_inst->offset = subset_spill_offset + chan * reg_size;<br>
            spill_inst->ir = inst->ir;<br>
            spill_inst->annotation = inst->annotation;<br>
-           spill_inst->base_mrf = 14;<br>
-           spill_inst->mlen = 2; /* header, value */<br>
+           spill_inst->mlen = 1 + dispatch_width / 8; /* header, value */<br>
+           spill_inst->base_mrf = 16 - spill_inst->mlen;<br></blockquote><div><br></div><div>This means that for SIMD16 spills, we'll use MRFs 13-15 (previously we used MRFs 14-15).  Do we know for sure that MRF 13 isn't used by any non-spill-related code?  If so, would you mind adding a quick comment to clarify that?<br>
 <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
            inst->insert_after(spill_inst);<br>
         }<br>
       }<br>
<span><font color="#888888">--<br>
1.8.4.rc3<br></font></span></blockquote><div><br></div><div>With those comments added, the patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br></div><div>
 </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><span><font color="#888888">
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>