<div dir="ltr"><div dir="ltr"><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, Jan 18, 2019 at 6:09 PM Francisco Jerez <<a href="mailto:currojerez@riseup.net">currojerez@riseup.net</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Currently the execution type calculation will return a bogus value in<br>
cases like:<br>
<br>
  mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u<br>
<br>
Which will be considered to have a 32-bit integer execution type even<br>
though the actual indirect move operation will be carried out with<br>
16-bit precision.<br>
<br>
Similarly there's no need to apply the CHV/BXT double-precision region<br>
alignment restrictions to such control sources, since they aren't<br>
directly involved in the double-precision arithmetic operations<br>
emitted by these virtual instructions.  Applying the CHV/BXT<br>
restrictions to control sources was expected to be harmless if mildly<br>
inefficient, but unfortunately it exposed problems at codegen level<br>
for virtual instructions (namely the SHUFFLE instruction used for the<br>
Vulkan 1.1 subgroup feature) that weren't prepared to accept control<br>
sources with an arbitrary strided region.<br>
<br>
Bugzilla: <a href="https://bugs.freedesktop.org/show_bug.cgi?id=109328" rel="noreferrer" target="_blank">https://bugs.freedesktop.org/show_bug.cgi?id=109328</a><br>
Reported-by: Mark Janes <<a href="mailto:mark.a.janes@intel.com" target="_blank">mark.a.janes@intel.com</a>><br>
Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass."<br>
---<br>
 src/intel/compiler/brw_fs.cpp                 | 54 +++++++++++++++++++<br>
 src/intel/compiler/brw_fs_lower_regioning.cpp |  6 +--<br>
 src/intel/compiler/brw_ir_fs.h                | 10 +++-<br>
 3 files changed, 66 insertions(+), 4 deletions(-)<br>
<br>
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp<br>
index 0359eb079f7..f475b617df2 100644<br>
--- a/src/intel/compiler/brw_fs.cpp<br>
+++ b/src/intel/compiler/brw_fs.cpp<br>
@@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const<br>
    }<br>
 }<br>
<br>
+bool<br>
+fs_inst::is_control_source(unsigned arg) const<br>
+{<br>
+   switch (opcode) {<br>
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:<br>
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:<br>
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:<br>
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:<br>
+      return arg == 0;<br>
+<br>
+   case SHADER_OPCODE_BROADCAST:<br>
+   case SHADER_OPCODE_SHUFFLE:<br>
+   case SHADER_OPCODE_QUAD_SWIZZLE:<br>
+   case FS_OPCODE_INTERPOLATE_AT_SAMPLE:<br>
+   case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:<br>
+   case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:<br>
+   case SHADER_OPCODE_IMAGE_SIZE:<br>
+   case SHADER_OPCODE_GET_BUFFER_SIZE:<br>
+      return arg == 1;<br>
+<br>
+   case SHADER_OPCODE_MOV_INDIRECT:<br>
+   case SHADER_OPCODE_CLUSTER_BROADCAST:<br>
+   case SHADER_OPCODE_TEX:<br>
+   case FS_OPCODE_TXB:<br>
+   case SHADER_OPCODE_TXD:<br>
+   case SHADER_OPCODE_TXF:<br>
+   case SHADER_OPCODE_TXF_LZ:<br>
+   case SHADER_OPCODE_TXF_CMS:<br>
+   case SHADER_OPCODE_TXF_CMS_W:<br>
+   case SHADER_OPCODE_TXF_UMS:<br>
+   case SHADER_OPCODE_TXF_MCS:<br>
+   case SHADER_OPCODE_TXL:<br>
+   case SHADER_OPCODE_TXL_LZ:<br>
+   case SHADER_OPCODE_TXS:<br>
+   case SHADER_OPCODE_LOD:<br>
+   case SHADER_OPCODE_TG4:<br>
+   case SHADER_OPCODE_TG4_OFFSET:<br>
+   case SHADER_OPCODE_SAMPLEINFO:<br>
+   case SHADER_OPCODE_UNTYPED_ATOMIC:<br>
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:<br>
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ:<br>
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
+   case SHADER_OPCODE_BYTE_SCATTERED_READ:<br>
+   case SHADER_OPCODE_BYTE_SCATTERED_WRITE:<br>
+   case SHADER_OPCODE_TYPED_ATOMIC:<br>
+   case SHADER_OPCODE_TYPED_SURFACE_READ:<br>
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE:<br></blockquote><div><br></div>As of b284d222db, we are no longer using many of the opcodes in this list (gen7 pull constant loads, [un]typed surface reads/writes, etc.)  It will need to be rebased and we need to add SHADER_OPCODE_SEND to the list.  Fortunately, the changes to add SHADER_OPCODE_SEND landed before the 19.0 cut-off so there is no need to make two versions for backporting.</div><div class="gmail_quote"><br></div><div class="gmail_quote">Other than that, this patch seems perfectly reasonable to me</div><div class="gmail_quote"><br></div><div class="gmail_quote">Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>></div><div class="gmail_quote"><br></div><div class="gmail_quote">If you want me to hand-review the new list of opcodes, feel free to send a v2 and cc me.<br></div><div class="gmail_quote"><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+      return arg == 1 || arg == 2;<br>
+<br>
+   default:<br>
+      return false;<br>
+   }<br>
+}<br>
+<br>
 /**<br>
  * Returns true if this instruction's sources and destinations cannot<br>
  * safely be the same register.<br>
diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp<br>
index df50993dee6..6a3c23892b4 100644<br>
--- a/src/intel/compiler/brw_fs_lower_regioning.cpp<br>
+++ b/src/intel/compiler/brw_fs_lower_regioning.cpp<br>
@@ -74,7 +74,7 @@ namespace {<br>
          unsigned stride = inst->dst.stride * type_sz(inst->dst.type);<br>
<br>
          for (unsigned i = 0; i < inst->sources; i++) {<br>
-            if (!is_uniform(inst->src[i]))<br>
+            if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))<br>
                stride = MAX2(stride, inst->src[i].stride *<br>
                              type_sz(inst->src[i].type));<br>
          }<br>
@@ -92,7 +92,7 @@ namespace {<br>
    required_dst_byte_offset(const fs_inst *inst)<br>
    {<br>
       for (unsigned i = 0; i < inst->sources; i++) {<br>
-         if (!is_uniform(inst->src[i]))<br>
+         if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))<br>
             if (reg_offset(inst->src[i]) % REG_SIZE !=<br>
                 reg_offset(inst->dst) % REG_SIZE)<br>
                return 0;<br>
@@ -109,7 +109,7 @@ namespace {<br>
    has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst,<br>
                           unsigned i)<br>
    {<br>
-      if (is_unordered(inst)) {<br>
+      if (is_unordered(inst) || inst->is_control_source(i)) {<br>
          return false;<br>
       } else {<br>
          const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);<br>
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h<br>
index 08e3d83d910..0a0ba1d363a 100644<br>
--- a/src/intel/compiler/brw_ir_fs.h<br>
+++ b/src/intel/compiler/brw_ir_fs.h<br>
@@ -358,6 +358,13 @@ public:<br>
    bool can_change_types() const;<br>
    bool has_source_and_destination_hazard() const;<br>
<br>
+   /**<br>
+    * Return whether \p arg is a control source of a virtual instruction which<br>
+    * shouldn't contribute to the execution type and usual regioning<br>
+    * restriction calculations of arithmetic instructions.<br>
+    */<br>
+   bool is_control_source(unsigned arg) const;<br>
+<br>
    /**<br>
     * Return the subset of flag registers read by the instruction as a bitset<br>
     * with byte granularity.<br>
@@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst)<br>
    brw_reg_type exec_type = BRW_REGISTER_TYPE_B;<br>
<br>
    for (int i = 0; i < inst->sources; i++) {<br>
-      if (inst->src[i].file != BAD_FILE) {<br>
+      if (inst->src[i].file != BAD_FILE &&<br>
+          !inst->is_control_source(i)) {<br>
          const brw_reg_type t = get_exec_type(inst->src[i].type);<br>
          if (type_sz(t) > type_sz(exec_type))<br>
             exec_type = t;<br>
-- <br>
2.19.2<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div></div>