<div dir="ltr"><div dir="ltr"><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, Jan 18, 2019 at 6:09 PM Francisco Jerez <<a href="mailto:currojerez@riseup.net">currojerez@riseup.net</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Currently the execution type calculation will return a bogus value in<br>
cases like:<br>
<br>
mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u<br>
<br>
Which will be considered to have a 32-bit integer execution type even<br>
though the actual indirect move operation will be carried out with<br>
16-bit precision.<br>
<br>
Similarly there's no need to apply the CHV/BXT double-precision region<br>
alignment restrictions to such control sources, since they aren't<br>
directly involved in the double-precision arithmetic operations<br>
emitted by these virtual instructions. Applying the CHV/BXT<br>
restrictions to control sources was expected to be harmless if mildly<br>
inefficient, but unfortunately it exposed problems at codegen level<br>
for virtual instructions (namely the SHUFFLE instruction used for the<br>
Vulkan 1.1 subgroup feature) that weren't prepared to accept control<br>
sources with an arbitrary strided region.<br>
<br>
Bugzilla: <a href="https://bugs.freedesktop.org/show_bug.cgi?id=109328" rel="noreferrer" target="_blank">https://bugs.freedesktop.org/show_bug.cgi?id=109328</a><br>
Reported-by: Mark Janes <<a href="mailto:mark.a.janes@intel.com" target="_blank">mark.a.janes@intel.com</a>><br>
Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass."<br>
---<br>
src/intel/compiler/brw_fs.cpp | 54 +++++++++++++++++++<br>
src/intel/compiler/brw_fs_lower_regioning.cpp | 6 +--<br>
src/intel/compiler/brw_ir_fs.h | 10 +++-<br>
3 files changed, 66 insertions(+), 4 deletions(-)<br>
<br>
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp<br>
index 0359eb079f7..f475b617df2 100644<br>
--- a/src/intel/compiler/brw_fs.cpp<br>
+++ b/src/intel/compiler/brw_fs.cpp<br>
@@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const<br>
}<br>
}<br>
<br>
+bool<br>
+fs_inst::is_control_source(unsigned arg) const<br>
+{<br>
+ switch (opcode) {<br>
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:<br>
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:<br>
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:<br>
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:<br>
+ return arg == 0;<br>
+<br>
+ case SHADER_OPCODE_BROADCAST:<br>
+ case SHADER_OPCODE_SHUFFLE:<br>
+ case SHADER_OPCODE_QUAD_SWIZZLE:<br>
+ case FS_OPCODE_INTERPOLATE_AT_SAMPLE:<br>
+ case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:<br>
+ case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:<br>
+ case SHADER_OPCODE_IMAGE_SIZE:<br>
+ case SHADER_OPCODE_GET_BUFFER_SIZE:<br>
+ return arg == 1;<br>
+<br>
+ case SHADER_OPCODE_MOV_INDIRECT:<br>
+ case SHADER_OPCODE_CLUSTER_BROADCAST:<br>
+ case SHADER_OPCODE_TEX:<br>
+ case FS_OPCODE_TXB:<br>
+ case SHADER_OPCODE_TXD:<br>
+ case SHADER_OPCODE_TXF:<br>
+ case SHADER_OPCODE_TXF_LZ:<br>
+ case SHADER_OPCODE_TXF_CMS:<br>
+ case SHADER_OPCODE_TXF_CMS_W:<br>
+ case SHADER_OPCODE_TXF_UMS:<br>
+ case SHADER_OPCODE_TXF_MCS:<br>
+ case SHADER_OPCODE_TXL:<br>
+ case SHADER_OPCODE_TXL_LZ:<br>
+ case SHADER_OPCODE_TXS:<br>
+ case SHADER_OPCODE_LOD:<br>
+ case SHADER_OPCODE_TG4:<br>
+ case SHADER_OPCODE_TG4_OFFSET:<br>
+ case SHADER_OPCODE_SAMPLEINFO:<br>
+ case SHADER_OPCODE_UNTYPED_ATOMIC:<br>
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:<br>
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:<br>
+ case SHADER_OPCODE_BYTE_SCATTERED_READ:<br>
+ case SHADER_OPCODE_BYTE_SCATTERED_WRITE:<br>
+ case SHADER_OPCODE_TYPED_ATOMIC:<br>
+ case SHADER_OPCODE_TYPED_SURFACE_READ:<br>
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:<br></blockquote><div><br></div>As of b284d222db, we are no longer using many of the opcodes in this list (gen7 pull constant loads, [un]typed surface reads/writes, etc.) It will need to be rebased and we need to add SHADER_OPCODE_SEND to the list. Fortunately, the changes to add SHADER_OPCODE_SEND landed before the 19.0 cut-off so there is no need to make two versions for backporting.</div><div class="gmail_quote"><br></div><div class="gmail_quote">Other than that, this patch seems perfectly reasonable to me</div><div class="gmail_quote"><br></div><div class="gmail_quote">Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>></div><div class="gmail_quote"><br></div><div class="gmail_quote">If you want me to hand-review the new list of opcodes, feel free to send a v2 and cc me.<br></div><div class="gmail_quote"><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ return arg == 1 || arg == 2;<br>
+<br>
+ default:<br>
+ return false;<br>
+ }<br>
+}<br>
+<br>
/**<br>
* Returns true if this instruction's sources and destinations cannot<br>
* safely be the same register.<br>
diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp<br>
index df50993dee6..6a3c23892b4 100644<br>
--- a/src/intel/compiler/brw_fs_lower_regioning.cpp<br>
+++ b/src/intel/compiler/brw_fs_lower_regioning.cpp<br>
@@ -74,7 +74,7 @@ namespace {<br>
unsigned stride = inst->dst.stride * type_sz(inst->dst.type);<br>
<br>
for (unsigned i = 0; i < inst->sources; i++) {<br>
- if (!is_uniform(inst->src[i]))<br>
+ if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))<br>
stride = MAX2(stride, inst->src[i].stride *<br>
type_sz(inst->src[i].type));<br>
}<br>
@@ -92,7 +92,7 @@ namespace {<br>
required_dst_byte_offset(const fs_inst *inst)<br>
{<br>
for (unsigned i = 0; i < inst->sources; i++) {<br>
- if (!is_uniform(inst->src[i]))<br>
+ if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))<br>
if (reg_offset(inst->src[i]) % REG_SIZE !=<br>
reg_offset(inst->dst) % REG_SIZE)<br>
return 0;<br>
@@ -109,7 +109,7 @@ namespace {<br>
has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst,<br>
unsigned i)<br>
{<br>
- if (is_unordered(inst)) {<br>
+ if (is_unordered(inst) || inst->is_control_source(i)) {<br>
return false;<br>
} else {<br>
const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);<br>
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h<br>
index 08e3d83d910..0a0ba1d363a 100644<br>
--- a/src/intel/compiler/brw_ir_fs.h<br>
+++ b/src/intel/compiler/brw_ir_fs.h<br>
@@ -358,6 +358,13 @@ public:<br>
bool can_change_types() const;<br>
bool has_source_and_destination_hazard() const;<br>
<br>
+ /**<br>
+ * Return whether \p arg is a control source of a virtual instruction which<br>
+ * shouldn't contribute to the execution type and usual regioning<br>
+ * restriction calculations of arithmetic instructions.<br>
+ */<br>
+ bool is_control_source(unsigned arg) const;<br>
+<br>
/**<br>
* Return the subset of flag registers read by the instruction as a bitset<br>
* with byte granularity.<br>
@@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst)<br>
brw_reg_type exec_type = BRW_REGISTER_TYPE_B;<br>
<br>
for (int i = 0; i < inst->sources; i++) {<br>
- if (inst->src[i].file != BAD_FILE) {<br>
+ if (inst->src[i].file != BAD_FILE &&<br>
+ !inst->is_control_source(i)) {<br>
const brw_reg_type t = get_exec_type(inst->src[i].type);<br>
if (type_sz(t) > type_sz(exec_type))<br>
exec_type = t;<br>
-- <br>
2.19.2<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div></div>