<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Tue, May 24, 2016 at 12:18 AM, Francisco Jerez <span dir="ltr"><<a href="mailto:currojerez@riseup.net" target="_blank">currojerez@riseup.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">This generalizes the current fs_inst::force_sechalf flag to allow<br>
specifying channel enable groups other than 0 or 8.  At some point it<br>
will likely make sense to fix the vec4 generator to support arbitrary<br>
execution groups and then move the definition of fs_inst::group into<br>
backend_instruction (e.g. so we can do FP64 in the VEC4 back-end).<br>
---<br>
 src/mesa/drivers/dri/i965/brw_fs.cpp              | 20 ++++++++------------<br>
 src/mesa/drivers/dri/i965/brw_fs_builder.h        | 14 +++++++++++---<br>
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp          |  4 ++--<br>
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp    |  7 ++++---<br>
 src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp |  2 +-<br>
 src/mesa/drivers/dri/i965/brw_ir_fs.h             | 20 +++++++++-----------<br>
 6 files changed, 35 insertions(+), 32 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index a59cd3c..92caeaa 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication()<br>
             mul->src[1].stride *= 2;<br>
<br>
          } else if (devinfo->gen == 7 && !devinfo->is_haswell &&<br>
-                    inst->force_sechalf) {<br>
+                    inst->group > 0) {<br>
             /* Among other things the quarter control bits influence which<br>
              * accumulator register is used by the hardware for instructions<br>
              * that access the accumulator implicitly (e.g. MACH).  A<br>
@@ -3655,7 +3655,7 @@ fs_visitor::lower_integer_multiplication()<br>
              * to get the result masked correctly according to the current<br>
              * channel enables.<br>
              */<br>
-            mach->force_sechalf = false;<br>
+            mach->group = 0;<br>
             mach->force_writemask_all = true;<br>
             mach->dst = ibld.vgrf(inst->dst.type);<br>
             ibld.MOV(inst->dst, mach->dst);<br>
@@ -3791,8 +3791,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,<br>
       sample_mask.stride *= 2;<br>
<br>
       bld.exec_all().annotate("FB write oMask")<br>
-         .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),<br>
-                   inst->force_sechalf),<br>
+         .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),<br>
+                           inst->group),<br>
               sample_mask);<br>
       length++;<br>
    }<br>
@@ -5017,10 +5017,10 @@ fs_visitor::lower_simd_width()<br>
           * execution size of the builder to the highest of both for now so<br>
           * we're sure that both cases can be handled.<br>
           */<br>
+         const unsigned max_width = MAX2(inst->exec_size, lower_width);<br>
          const fs_builder ibld = <a href="http://bld.at" rel="noreferrer" target="_blank">bld.at</a>(block, inst)<br>
                                     .exec_all(inst->force_writemask_all)<br>
-                                    .group(MAX2(inst->exec_size, lower_width),<br>
-                                           inst->force_sechalf);<br>
+                                    .group(max_width, inst->group / max_width);<br>
<br>
          /* Split the copies in chunks of the execution width of either the<br>
           * original or the lowered instruction, whichever is lower.<br>
@@ -5352,12 +5352,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)<br>
    if (inst->force_writemask_all)<br>
       fprintf(file, "NoMask ");<br>
<br>
-   if (dispatch_width == 16 && inst->exec_size == 8) {<br>
-      if (inst->force_sechalf)<br>
-         fprintf(file, "2ndhalf ");<br>
-      else<br>
-         fprintf(file, "1sthalf ");<br>
-   }<br>
+   if (inst->exec_size != dispatch_width)<br>
+      fprintf(file, "group%d ", inst->group);<br>
<br>
    fprintf(file, "\n");<br>
 }<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h<br>
index b50dda4..c1d13a2 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h<br>
@@ -72,7 +72,7 @@ namespace brw {<br>
       fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :<br>
          shader(shader), block(block), cursor(inst),<br>
          _dispatch_width(inst->exec_size),<br>
-         _group(inst->force_sechalf ? 8 : 0),<br>
+         _group(inst->group),<br>
          force_writemask_all(inst->force_writemask_all)<br>
       {<br>
          annotation.str = inst->annotation;<br>
@@ -168,6 +168,15 @@ namespace brw {<br>
       }<br>
<br>
       /**<br>
+       * Get the channel group in use.<br>
+       */<br>
+      unsigned<br>
+      group() const<br>
+      {<br>
+         return _group;<br>
+      }<br>
+<br>
+      /**<br>
        * Allocate a virtual register of natural vector size (one for this IR)<br>
        * and SIMD width.  \p n gives the amount of space to allocate in<br>
        * dispatch_width units (which is just enough space for one logical<br>
@@ -353,9 +362,8 @@ namespace brw {<br>
          assert(inst->exec_size <= 32);<br>
          assert(inst->exec_size == dispatch_width() ||<br>
                 force_writemask_all);<br>
-         assert(_group == 0 || _group == 8);<br>
<br>
-         inst->force_sechalf = (_group == 8);<br>
+         inst->group = _group;<br>
          inst->force_writemask_all = force_writemask_all;<br>
          inst->annotation = annotation.str;<br>
          inst->ir = <a href="http://annotation.ir" rel="noreferrer" target="_blank">annotation.ir</a>;<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp<br>
index 9c39106..159bf5d 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp<br>
@@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)<br>
    return a->opcode == b->opcode &&<br>
           a->force_writemask_all == b->force_writemask_all &&<br>
           a->exec_size == b->exec_size &&<br>
-          a->force_sechalf == b->force_sechalf &&<br>
+          a->group == b->group &&<br>
           a->saturate == b->saturate &&<br>
           a->predicate == b->predicate &&<br>
           a->predicate_inverse == b->predicate_inverse &&<br>
@@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)<br>
       copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);<br>
    } else {<br>
       copy = bld.MOV(inst->dst, src);<br>
-      copy->force_sechalf = inst->force_sechalf;<br>
+      copy->group = inst->group;<br>
       copy->force_writemask_all = inst->force_writemask_all;<br>
       copy->src[0].negate = negate;<br>
    }<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
index 804c639..914ec9b 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
@@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst,<br>
    if (inst->opcode == FS_OPCODE_REP_FB_WRITE)<br>
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;<br>
    else if (prog_data->dual_src_blend) {<br>
-      if (!inst->force_sechalf)<br>
+      if (!inst->group)<br>
          msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;<br>
       else<br>
          msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;<br></blockquote><div><br></div><div>This doesn't look right.  I suppose it's correct for SIMD16-only and that you have a follow-on patch for SIMD32 but it seems wrong.  If it's an easy one-liner, maybe just squash it in?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
@@ -1071,7 +1071,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)<br>
    brw_set_default_compression(p, lower_size > 8);<br>
<br>
    for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {<br>
-      brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size * i);<br>
+      brw_set_default_group(p, inst->group + lower_size * i);<br>
<br>
       brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),<br>
               retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));<br>
@@ -1615,7 +1615,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)<br>
       const bool compressed =<br>
            inst->dst.component_size(inst->exec_size) > REG_SIZE;<br>
       brw_set_default_compression(p, compressed);<br>
-      brw_set_default_group(p, inst->force_sechalf ? 8 : 0);<br>
+      brw_set_default_group(p, inst->group);<br>
<br>
       for (unsigned int i = 0; i < inst->sources; i++) {<br>
          src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,<br>
@@ -1643,6 +1643,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)<br>
       brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);<br>
<br>
       assert(inst->force_writemask_all || inst->exec_size >= 8);<br>
+      assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);<br>
       assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));<br>
       assert(inst->mlen <= BRW_MAX_MSG_LENGTH);<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp<br>
index 8613725..8cd897f 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp<br>
@@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel()<br>
          /* Check that the MOVs are the right form. */<br>
          if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||<br>
              then_mov[i]->exec_size != else_mov[i]->exec_size ||<br>
-             then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||<br>
+             then_mov[i]->group != else_mov[i]->group ||<br>
              then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||<br>
              then_mov[i]->is_partial_write() ||<br>
              else_mov[i]->is_partial_write() ||<br>
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h<br>
index cdb4464..6a93e81 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h<br>
@@ -289,22 +289,20 @@ public:<br>
     */<br>
    uint8_t exec_size;<br>
<br>
+   /**<br>
+    * Channel group from the hardware execution and predication mask that<br>
+    * should be applied to the instruction.  The subset of channel enable<br>
+    * signals (calculated from the EU control flow and predication state)<br>
+    * given by [group, group + exec_size[ will be used to mask GRF writes and<br></blockquote><div><br></div><div>If you're intending to indicate a half-open interval, I believe the notation more people will be familiar with is "[group, group + exec_size)"<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+    * any other side effects of the instruction.<br>
+    */<br>
+   uint8_t group;<br>
+<br>
    bool eot:1;<br>
-   bool force_sechalf:1;<br>
    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */<br>
 };<br>
<br>
 /**<br>
- * Set second-half quarter control on \p inst.<br>
- */<br>
-static inline fs_inst *<br>
-set_sechalf(fs_inst *inst)<br>
-{<br>
-   inst->force_sechalf = true;<br>
-   return inst;<br>
-}<br>
-<br>
-/**<br>
  * Make the execution of \p inst dependent on the evaluation of a possibly<br>
  * inverted predicate.<br>
  */<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.7.3<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>